Skip to content

Commit

Permalink
Calibrate state populations (and under-5s) (#121)
Browse files Browse the repository at this point in the history
* Add dataset comparisons to docs

* Add inequality to book

* Add population by state to calibration

* Update uprating factors

* Update versioning and dataset URLs
  • Loading branch information
nikhilwoodruff authored Nov 19, 2024
1 parent 2f0e8cf commit 786b18e
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
!eitc.csv
!spm_threshold_agi.csv
**/_build
!population_by_state.csv
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ docker:

documentation:
jb clean docs && jb build docs
python docs/add_plotly_to_book.py docs


data:
python policyengine_us_data/datasets/acs/acs.py
Expand Down
5 changes: 5 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- bump: minor
changes:
added:
- Metric comparisons by dataset to the documentation.
- Calibration of state populations.
27 changes: 27 additions & 0 deletions docs/add_plotly_to_book.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
from pathlib import Path

# This command-line tools enables Plotly charts to show in the HTML files for the Jupyter Book documentation.

parser = argparse.ArgumentParser()
parser.add_argument("book_path", help="Path to the Jupyter Book.")

args = parser.parse_args()

# Find every HTML file in the Jupyter Book. Then, add a script tag to the start of the <head> tag in each file, with the contents:
# <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>

book_folder = Path(args.book_path)

for html_file in book_folder.glob("**/*.html"):
with open(html_file, "r") as f:
html = f.read()

# Add the script tag to the start of the <head> tag.
html = html.replace(
"<head>",
'<head><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>',
)

with open(html_file, "w") as f:
f.write(html)
5 changes: 3 additions & 2 deletions docs/results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@
},
{
"cell_type": "code",
"execution_count": 92,
"execution_count": 94,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -7270,7 +7270,8 @@
" title=\"Weight\",\n",
" type=\"log\",\n",
" ),\n",
")"
")\n",
"fig"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/acs/acs.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class ACS_2022(ACS):
time_period = 2022
file_path = STORAGE_FOLDER / "acs_2022.h5"
census_acs = CensusACS_2022
url = "release://PolicyEngine/policyengine-us-data/1.11.0/acs_2022.h5"
url = "release://PolicyEngine/policyengine-us-data/1.13.0/acs_2022.h5"


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ class CPS_2024(CPS):
label = "CPS 2024 (2022-based)"
file_path = STORAGE_FOLDER / "cps_2024.h5"
time_period = 2024
url = "release://policyengine/policyengine-us-data/1.11.0/cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.13.0/cps_2024.h5"


class PooledCPS(Dataset):
Expand Down Expand Up @@ -707,7 +707,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
CPS_2023,
]
time_period = 2023
url = "release://PolicyEngine/policyengine-us-data/1.11.0/pooled_3_year_cps_2023.h5"
url = "release://PolicyEngine/policyengine-us-data/1.13.0/pooled_3_year_cps_2023.h5"


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class EnhancedCPS_2024(EnhancedCPS):
name = "enhanced_cps_2024"
label = "Enhanced CPS 2024"
file_path = STORAGE_FOLDER / "enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.11.0/enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.13.0/enhanced_cps_2024.h5"


if __name__ == "__main__":
Expand Down
53 changes: 53 additions & 0 deletions policyengine_us_data/storage/population_by_state.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
state,population,population_under_5
CA,38965193.00,2104120.00
TX,30503301.00,1921708.00
FL,22610726.00,1130536.00
NY,19571216.00,1037274.00
PA,12961683.00,661046.00
IL,12549689.00,665134.00
OH,11785935.00,660012.00
GA,11029227.00,639695.00
NC,10835491.00,606787.00
MI,10037261.00,531975.00
NJ,9290841.00,520287.00
VA,8715698.00,488079.00
WA,7812880.00,421896.00
AZ,7431344.00,393861.00
TN,7126489.00,413336.00
MA,7001399.00,343069.00
IN,6862199.00,404870.00
MO,6196156.00,353181.00
MD,6180253.00,352274.00
WI,5910955.00,313281.00
CO,5877610.00,311513.00
MN,5737915.00,327061.00
SC,5373555.00,290172.00
AL,5108468.00,291183.00
LA,4573749.00,278999.00
KY,4526154.00,262517.00
OR,4233358.00,203201.00
OK,4053824.00,243229.00
CT,3617176.00,180859.00
UT,3417734.00,232406.00
IA,3207004.00,186006.00
PR,3205691.00,96171.00
NV,3194176.00,172486.00
AR,3067732.00,180996.00
KS,2940546.00,176433.00
MS,2939690.00,173442.00
NM,2114371.00,107833.00
NE,1978379.00,124638.00
ID,1964726.00,113954.00
WV,1770071.00,86733.00
HI,1435138.00,77497.00
NH,1402054.00,63092.00
ME,1395722.00,61412.00
MT,1132812.00,57773.00
RI,1095962.00,52606.00
DE,1031890.00,54690.00
SD,919318.00,57917.00
ND,783926.00,49387.00
AK,733406.00,46205.00
DC,678972.00,38701.00
VT,647464.00,27193.00
WY,584057.00,30955.00
1 change: 1 addition & 0 deletions policyengine_us_data/storage/uprating_factors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ alimony_expense,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.
alimony_income,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779
american_opportunity_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
amt_foreign_tax_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
capital_gains_before_response,1.0,1.824,1.11,1.195,1.244,1.195,1.14,1.122,1.126,1.145,1.173,1.206,1.243,1.283,1.326
casualty_loss,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
cdcc_relevant_expenses,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
charitable_cash_donations,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
Expand Down
1 change: 1 addition & 0 deletions policyengine_us_data/storage/uprating_growth_factors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ alimony_expense,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.
alimony_income,0,0.255,0.053,0.026,0.066,0.04,0.021,0.021,0.006,0.012,0.017,0.02,0.021,0.02,0.033
american_opportunity_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
amt_foreign_tax_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
capital_gains_before_response,0,0.824,-0.391,0.077,0.041,-0.039,-0.046,-0.016,0.004,0.017,0.024,0.028,0.031,0.032,0.034
casualty_loss,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
cdcc_relevant_expenses,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
charitable_cash_donations,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033
Expand Down
18 changes: 18 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,24 @@ def build_loss_matrix(dataset: type, time_period):
)
targets_array.append(row["count"])

# Population by state and population under 5 by state

state_population = pd.read_csv(STORAGE_FOLDER / "population_by_state.csv")

for _, row in state_population.iterrows():
in_state = sim.calculate("state_code", map_to="person") == row["state"]
label = f"census/population_by_state/{row['state']}"
loss_matrix[label] = sim.map_result(in_state, "person", "household")
targets_array.append(row["population"])

under_5 = sim.calculate("age").values < 5
in_state_under_5 = in_state * under_5
label = f"census/population_under_5_by_state/{row['state']}"
loss_matrix[label] = sim.map_result(
in_state_under_5, "person", "household"
)
targets_array.append(row["population_under_5"])

if any(loss_matrix.isna().sum() > 0):
raise ValueError("Some targets are missing from the loss matrix")

Expand Down

0 comments on commit 786b18e

Please sign in to comment.