Skip to content

Commit

Permalink
Add IRS PUF
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Aug 14, 2024
1 parent 0c76194 commit a4fff74
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 3 deletions.
36 changes: 36 additions & 0 deletions policyengine_us_data/irs_puf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from policyengine_core.data import Dataset
from policyengine_us_data.data_storage import STORAGE_FOLDER
from pathlib import Path


class IRS_PUF(Dataset):
"""Dataset containing IRS PUF tables."""

puf_file_path: Path
puf_demographics_file_path: Path
data_format = Dataset.TABLES

def generate(self):
import pandas as pd

puf_file_path = Path(self.puf_file_path).expanduser().resolve()
puf_demographics_file_path = Path(self.puf_demographics_file_path).expanduser().resolve()

if not puf_file_path.exists():
raise FileNotFoundError(f"PUF file not found at {puf_file_path}. Either put it there, or change {Path(__file__)} point to a different path.")

if not puf_demographics_file_path.exists():
raise FileNotFoundError(f"PUF demographics file not found at {puf_demographics_file_path}. Either put it there, or change {Path(__file__)} point to a different path.")

with pd.HDFStore(self.file_path, mode="w") as storage:
storage.put("puf", pd.read_csv(puf_file_path))
storage.put("puf_demographics", pd.read_csv(puf_demographics_file_path))


class IRS_PUF_2015(IRS_PUF):
name = "irs_puf_2015"
label = "IRS PUF (2015)"
time_period = 2015
puf_file_path = "~/Downloads/puf_2015.csv"
puf_demographics_file_path = "~/Downloads/demographics_2015.csv"
file_path = STORAGE_FOLDER / "irs_puf_2015.h5"
4 changes: 1 addition & 3 deletions policyengine_us_data/policyengine_cps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from policyengine_core.data import Dataset
from policyengine_us.data.storage import STORAGE_FOLDER
from policyengine_us_data.data_storage import STORAGE_FOLDER
import h5py
from policyengine_us.data.datasets.cps.raw_cps import (
RawCPS_2018,
Expand All @@ -9,8 +9,6 @@
RawCPS_2022,
RawCPS,
)
from policyengine_us.data.datasets.cps.uprated_cps import UpratedCPS
from policyengine_us.data.storage import STORAGE_FOLDER
from pandas import DataFrame, Series
import numpy as np
import pandas as pd
Expand Down
12 changes: 12 additions & 0 deletions policyengine_us_data/tests/test_datasets/test_irs_puf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pytest


@pytest.mark.parametrize("year", [2015])
def test_irs_puf_generates(year: int):
from policyengine_us_data.irs_puf import IRS_PUF_2015

dataset_by_year = {
2015: IRS_PUF_2015,
}

dataset_by_year[year](require=True)
16 changes: 16 additions & 0 deletions policyengine_us_data/tests/test_datasets/test_policyengine_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,19 @@ def test_policyengine_cps_generates(year: int):
}

dataset_by_year[year](require=True)

@pytest.mark.parametrize("year", [2022])
def test_policyengine_cps_loads(year: int):
from policyengine_us_data.policyengine_cps import CPS_2022

dataset_by_year = {
2022: CPS_2022,
}

dataset = dataset_by_year[year]

from policyengine_us import Microsimulation

sim = Microsimulation(dataset=dataset)

assert not sim.calculate("household_net_income").isna().any()

0 comments on commit a4fff74

Please sign in to comment.