Skip to content

Commit

Permalink
Add uprating tools
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Aug 19, 2024
1 parent 737a792 commit d91726f
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
25 changes: 25 additions & 0 deletions policyengine_us_data/datasets/cps/policyengine_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import yaml
from typing import Type
from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table


class CPS(Dataset):
Expand All @@ -22,6 +23,24 @@ def generate(self):
Technical documentation and codebook here: https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar21.pdf
"""

if self.raw_cps is None:
# Extrapolate from CPS 2022

cps_2022 = CPS_2022(require=True)
print("Creating uprating factors table...")
uprating = create_policyengine_uprating_factors_table()
arrays = cps_2022.load_dataset()
for variable in uprating:
if variable in arrays:
current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
start_index = uprating[uprating.Variable == variable][2021].values[0]
growth = current_index / start_index
print(f"Uprating {variable} by {growth-1:.1%}")
arrays[variable] = arrays[variable] * growth

self.save_dataset(arrays)
return

raw_data = self.raw_cps(require=True).load()
cps = h5py.File(self.file_path, mode="w")

Expand Down Expand Up @@ -536,3 +555,9 @@ class CPS_2022(CPS):
previous_year_raw_cps = CensusCPS_2021
file_path = STORAGE_FOLDER / "cps_2022.h5"
time_period = 2022

class CPS_2024(CPS):
name = "cps_2024"
label = "CPS 2024"
file_path = STORAGE_FOLDER / "cps_2024.h5"
time_period = 2024
21 changes: 21 additions & 0 deletions policyengine_us_data/datasets/puf/policyengine_puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .uprate_puf import uprate_puf
from survey_enhance import Imputation
from .irs_puf import IRS_PUF_2015
from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table

rng = np.random.default_rng(seed=64)

Expand Down Expand Up @@ -290,6 +291,20 @@ def generate(self):

if self.time_period == 2021:
puf = uprate_puf(puf, 2015, self.time_period)
elif self.time_period >= 2021:
puf_2021 = PUF_2021(require=True)
print("Creating uprating factors table...")
uprating = create_policyengine_uprating_factors_table()
arrays = puf_2021.load_dataset()
for variable in uprating:
if variable in arrays:
current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
start_index = uprating[uprating.Variable == variable][2021].values[0]
growth = current_index / start_index
print(f"Uprating {variable} by {growth-1:.1%}")
arrays[variable] = arrays[variable] * growth
self.save_dataset(arrays)
return

puf = puf[puf.MARS != 0] # Remove aggregate records

Expand Down Expand Up @@ -476,3 +491,9 @@ class PUF_2021(PUF):
name = "puf_2021"
time_period = 2021
file_path = STORAGE_FOLDER / "pe_puf_2021.h5"

class PUF_2024(PUF):
label = "PUF 2024"
name = "puf_2024"
time_period = 2024
file_path = STORAGE_FOLDER / "pe_puf_2024.h5"
27 changes: 22 additions & 5 deletions policyengine_us_data/utils/uprating.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from policyengine_core.model_api import Variable
from policyengine_us_data.data_storage import STORAGE_FOLDER
import pandas as pd

START_YEAR = 2020
Expand All @@ -13,21 +13,38 @@ def create_policyengine_uprating_factors_table():
years = []
index_values = []

population_size = system.parameters.get_child("calibration.gov.census.populations.total")

for variable in system.variables.values():
if variable.uprating is not None:
parameter = system.parameters.get_child(variable.uprating)
start_value = parameter(START_YEAR)
for year in range(START_YEAR, END_YEAR):
for year in range(START_YEAR, END_YEAR + 1):
population_growth = population_size(year) / population_size(START_YEAR)
variable_names.append(variable.name)
years.append(year)
index_values.append(round(parameter(year) / start_value, 3))
growth = parameter(year) / start_value
if "_weight" not in variable.name:
per_capita_growth = growth / population_growth
else:
per_capita_growth = growth
index_values.append(round(per_capita_growth, 3))

df["Variable"] = variable_names
df["Year"] = years
df["Value"] = index_values

# Convert to there is a column for each year
df = df.pivot(index="Variable", columns="Year", values="Value")
df = df.sort_values("Variable")
df.to_csv(STORAGE_FOLDER / "uprating_factors.csv")

return df.sort_values("Variable")

# Create a table with growth factors by year

df_growth = df.copy()
for year in range(END_YEAR, START_YEAR, -1):
df_growth[year] = df_growth[year] / df_growth[year - 1] - 1
df_growth[START_YEAR] = 0

df_growth.to_csv(STORAGE_FOLDER / "uprating_growth_factors.csv")
return df

0 comments on commit d91726f

Please sign in to comment.