Add uprating tools

PolicyEngine · Aug 19, 2024 · d91726f · d91726f
1 parent 737a792
commit d91726f
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 5 deletions.
diff --git a/policyengine_us_data/datasets/cps/policyengine_cps.py b/policyengine_us_data/datasets/cps/policyengine_cps.py
@@ -8,6 +8,7 @@
 import os
 import yaml
 from typing import Type
+from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
 
 
 class CPS(Dataset):
@@ -22,6 +23,24 @@ def generate(self):
         Technical documentation and codebook here: https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar21.pdf
         """
 
+        if self.raw_cps is None:
+            # Extrapolate from CPS 2022
+
+            cps_2022 = CPS_2022(require=True)
+            print("Creating uprating factors table...")
+            uprating = create_policyengine_uprating_factors_table()
+            arrays = cps_2022.load_dataset()
+            for variable in uprating:
+                if variable in arrays:
+                    current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
+                    start_index = uprating[uprating.Variable == variable][2021].values[0]
+                    growth = current_index / start_index
+                    print(f"Uprating {variable} by {growth-1:.1%}")
+                    arrays[variable] = arrays[variable] * growth
+
+            self.save_dataset(arrays)
+            return
+
         raw_data = self.raw_cps(require=True).load()
         cps = h5py.File(self.file_path, mode="w")
 
@@ -536,3 +555,9 @@ class CPS_2022(CPS):
     previous_year_raw_cps = CensusCPS_2021
     file_path = STORAGE_FOLDER / "cps_2022.h5"
     time_period = 2022
+
+class CPS_2024(CPS):
+    name = "cps_2024"
+    label = "CPS 2024"
+    file_path = STORAGE_FOLDER / "cps_2024.h5"
+    time_period = 2024
diff --git a/policyengine_us_data/datasets/puf/policyengine_puf.py b/policyengine_us_data/datasets/puf/policyengine_puf.py
@@ -7,6 +7,7 @@
 from .uprate_puf import uprate_puf
 from survey_enhance import Imputation
 from .irs_puf import IRS_PUF_2015
+from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
 
 rng = np.random.default_rng(seed=64)
 
@@ -290,6 +291,20 @@ def generate(self):
 
         if self.time_period == 2021:
             puf = uprate_puf(puf, 2015, self.time_period)
+        elif self.time_period >= 2021:
+            puf_2021 = PUF_2021(require=True)
+            print("Creating uprating factors table...")
+            uprating = create_policyengine_uprating_factors_table()
+            arrays = puf_2021.load_dataset()
+            for variable in uprating:
+                if variable in arrays:
+                    current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
+                    start_index = uprating[uprating.Variable == variable][2021].values[0]
+                    growth = current_index / start_index
+                    print(f"Uprating {variable} by {growth-1:.1%}")
+                    arrays[variable] = arrays[variable] * growth
+            self.save_dataset(arrays)
+            return
 
         puf = puf[puf.MARS != 0] # Remove aggregate records
 
@@ -476,3 +491,9 @@ class PUF_2021(PUF):
     name = "puf_2021"
     time_period = 2021
     file_path = STORAGE_FOLDER / "pe_puf_2021.h5"
+
+class PUF_2024(PUF):
+    label = "PUF 2024"
+    name = "puf_2024"
+    time_period = 2024
+    file_path = STORAGE_FOLDER / "pe_puf_2024.h5"
diff --git a/policyengine_us_data/utils/uprating.py b/policyengine_us_data/utils/uprating.py
@@ -1,4 +1,4 @@
-from policyengine_core.model_api import Variable
+from policyengine_us_data.data_storage import STORAGE_FOLDER
 import pandas as pd
 
 START_YEAR = 2020
@@ -13,21 +13,38 @@ def create_policyengine_uprating_factors_table():
     years = []
     index_values = []
 
+    population_size = system.parameters.get_child("calibration.gov.census.populations.total")
+
     for variable in system.variables.values():
         if variable.uprating is not None:
             parameter = system.parameters.get_child(variable.uprating)
             start_value = parameter(START_YEAR)
-            for year in range(START_YEAR, END_YEAR):
+            for year in range(START_YEAR, END_YEAR + 1):
+                population_growth = population_size(year) / population_size(START_YEAR)
                 variable_names.append(variable.name)
                 years.append(year)
-                index_values.append(round(parameter(year) / start_value, 3))
+                growth = parameter(year) / start_value
+                if "_weight" not in variable.name:
+                    per_capita_growth = growth / population_growth
+                else:
+                    per_capita_growth = growth
+                index_values.append(round(per_capita_growth, 3))
 
     df["Variable"] = variable_names
     df["Year"] = years
     df["Value"] = index_values
 
     # Convert to there is a column for each year
     df = df.pivot(index="Variable", columns="Year", values="Value")
+    df = df.sort_values("Variable")
+    df.to_csv(STORAGE_FOLDER / "uprating_factors.csv")
 
-    return df.sort_values("Variable")
-
+    # Create a table with growth factors by year
+
+    df_growth = df.copy()
+    for year in range(END_YEAR, START_YEAR, -1):
+        df_growth[year] = df_growth[year] / df_growth[year - 1] - 1
+    df_growth[START_YEAR] = 0
+
+    df_growth.to_csv(STORAGE_FOLDER / "uprating_growth_factors.csv")
+    return df