diff --git a/policyengine_us_data/datasets/__init__.py b/policyengine_us_data/datasets/__init__.py index ffde8e4..8746183 100644 --- a/policyengine_us_data/datasets/__init__.py +++ b/policyengine_us_data/datasets/__init__.py @@ -18,4 +18,11 @@ from .puf import PUF_2015, PUF_2021, PUF_2024, IRS_PUF_2015 from .acs import ACS_2022 -DATASETS = [CPS_2022, PUF_2021, CPS_2024, EnhancedCPS_2024, ACS_2022] +DATASETS = [ + CPS_2022, + PUF_2021, + CPS_2024, + EnhancedCPS_2024, + ACS_2022, + Pooled_3_Year_CPS_2023, +] diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index ecdb2f7..fec5af9 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -226,6 +226,19 @@ def build_loss_matrix(dataset: type, time_period): raise ValueError(f"Missing values for {label}") targets_array.append(target) + # Negative household market income total rough estimate from the IRS SOI PUF + + market_income = sim.calculate("household_market_income").values + loss_matrix["irs/negative_household_market_income_total"] = ( + market_income * (market_income < 0) + ) + targets_array.append(-138e9) + + loss_matrix["irs/negative_household_market_income_count"] = ( + market_income < 0 + ) + targets_array.append(3e6) + # Healthcare spending by age healthcare = pd.read_csv(STORAGE_FOLDER / "healthcare_spending.csv")