Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calibrate state populations (and under-5s) #121

Merged
merged 6 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
!eitc.csv
!spm_threshold_agi.csv
**/_build
!population_by_state.csv
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ docker:

documentation:
jb clean docs && jb build docs
python docs/add_plotly_to_book.py docs


data:
python policyengine_us_data/datasets/acs/acs.py
Expand Down
5 changes: 5 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- bump: minor
changes:
added:
- Metric comparisons by dataset to the documentation.
- Calibration of state populations.
27 changes: 27 additions & 0 deletions docs/add_plotly_to_book.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
from pathlib import Path

# This command-line tools enables Plotly charts to show in the HTML files for the Jupyter Book documentation.

parser = argparse.ArgumentParser()
parser.add_argument("book_path", help="Path to the Jupyter Book.")

args = parser.parse_args()

# Find every HTML file in the Jupyter Book. Then, add a script tag to the start of the <head> tag in each file, with the contents:
# <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>

book_folder = Path(args.book_path)

for html_file in book_folder.glob("**/*.html"):
with open(html_file, "r") as f:
html = f.read()

# Add the script tag to the start of the <head> tag.
html = html.replace(
"<head>",
'<head><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>',
)

with open(html_file, "w") as f:
f.write(html)
5 changes: 3 additions & 2 deletions docs/results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@
},
{
"cell_type": "code",
"execution_count": 92,
"execution_count": 94,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -7270,7 +7270,8 @@
" title=\"Weight\",\n",
" type=\"log\",\n",
" ),\n",
")"
")\n",
"fig"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/acs/acs.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class ACS_2022(ACS):
time_period = 2022
file_path = STORAGE_FOLDER / "acs_2022.h5"
census_acs = CensusACS_2022
url = "release://PolicyEngine/policyengine-us-data/1.11.0/acs_2022.h5"
url = "release://PolicyEngine/policyengine-us-data/1.13.0/acs_2022.h5"


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ class CPS_2024(CPS):
label = "CPS 2024 (2022-based)"
file_path = STORAGE_FOLDER / "cps_2024.h5"
time_period = 2024
url = "release://policyengine/policyengine-us-data/1.11.0/cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.13.0/cps_2024.h5"


class PooledCPS(Dataset):
Expand Down Expand Up @@ -707,7 +707,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
CPS_2023,
]
time_period = 2023
url = "release://PolicyEngine/policyengine-us-data/1.11.0/pooled_3_year_cps_2023.h5"
url = "release://PolicyEngine/policyengine-us-data/1.13.0/pooled_3_year_cps_2023.h5"


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class EnhancedCPS_2024(EnhancedCPS):
name = "enhanced_cps_2024"
label = "Enhanced CPS 2024"
file_path = STORAGE_FOLDER / "enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.11.0/enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.13.0/enhanced_cps_2024.h5"


if __name__ == "__main__":
Expand Down
53 changes: 53 additions & 0 deletions policyengine_us_data/storage/population_by_state.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
state,population,population_under_5
CA,38965193.00,2104120.00
TX,30503301.00,1921708.00
FL,22610726.00,1130536.00
NY,19571216.00,1037274.00
PA,12961683.00,661046.00
IL,12549689.00,665134.00
OH,11785935.00,660012.00
GA,11029227.00,639695.00
NC,10835491.00,606787.00
MI,10037261.00,531975.00
NJ,9290841.00,520287.00
VA,8715698.00,488079.00
WA,7812880.00,421896.00
AZ,7431344.00,393861.00
TN,7126489.00,413336.00
MA,7001399.00,343069.00
IN,6862199.00,404870.00
MO,6196156.00,353181.00
MD,6180253.00,352274.00
WI,5910955.00,313281.00
CO,5877610.00,311513.00
MN,5737915.00,327061.00
SC,5373555.00,290172.00
AL,5108468.00,291183.00
LA,4573749.00,278999.00
KY,4526154.00,262517.00
OR,4233358.00,203201.00
OK,4053824.00,243229.00
CT,3617176.00,180859.00
UT,3417734.00,232406.00
IA,3207004.00,186006.00
PR,3205691.00,96171.00
NV,3194176.00,172486.00
AR,3067732.00,180996.00
KS,2940546.00,176433.00
MS,2939690.00,173442.00
NM,2114371.00,107833.00
NE,1978379.00,124638.00
ID,1964726.00,113954.00
WV,1770071.00,86733.00
HI,1435138.00,77497.00
NH,1402054.00,63092.00
ME,1395722.00,61412.00
MT,1132812.00,57773.00
RI,1095962.00,52606.00
DE,1031890.00,54690.00
SD,919318.00,57917.00
ND,783926.00,49387.00
AK,733406.00,46205.00
DC,678972.00,38701.00
VT,647464.00,27193.00
WY,584057.00,30955.00
1 change: 1 addition & 0 deletions policyengine_us_data/storage/uprating_factors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ alimony_expense,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.
alimony_income,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779
american_opportunity_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
amt_foreign_tax_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
capital_gains_before_response,1.0,1.824,1.11,1.195,1.244,1.195,1.14,1.122,1.126,1.145,1.173,1.206,1.243,1.283,1.326
casualty_loss,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
cdcc_relevant_expenses,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
charitable_cash_donations,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
Expand Down
1 change: 1 addition & 0 deletions policyengine_us_data/storage/uprating_growth_factors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ alimony_expense,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.
alimony_income,0,0.255,0.053,0.026,0.066,0.04,0.021,0.021,0.006,0.012,0.017,0.02,0.021,0.02,0.033
american_opportunity_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
amt_foreign_tax_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
capital_gains_before_response,0,0.824,-0.391,0.077,0.041,-0.039,-0.046,-0.016,0.004,0.017,0.024,0.028,0.031,0.032,0.034
casualty_loss,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
cdcc_relevant_expenses,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
charitable_cash_donations,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
Expand Down
18 changes: 18 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,24 @@ def build_loss_matrix(dataset: type, time_period):
)
targets_array.append(row["count"])

# Population by state and population under 5 by state

state_population = pd.read_csv(STORAGE_FOLDER / "population_by_state.csv")

for _, row in state_population.iterrows():
in_state = sim.calculate("state_code", map_to="person") == row["state"]
label = f"census/population_by_state/{row['state']}"
loss_matrix[label] = sim.map_result(in_state, "person", "household")
targets_array.append(row["population"])

under_5 = sim.calculate("age").values < 5
in_state_under_5 = in_state * under_5
label = f"census/population_under_5_by_state/{row['state']}"
loss_matrix[label] = sim.map_result(
in_state_under_5, "person", "household"
)
targets_array.append(row["population_under_5"])

if any(loss_matrix.isna().sum() > 0):
raise ValueError("Some targets are missing from the loss matrix")

Expand Down