Skip to content

Commit

Permalink
Merge pull request PSLmodels#165 from PSLmodels/cosmetic-taxcalc_data…
Browse files Browse the repository at this point in the history
…set-changes

Cosmetic changes to taxcalc_dataset.py code
  • Loading branch information
martinholmer authored Aug 22, 2024
2 parents 7dc5728 + 2748b3e commit 9c2189d
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions tax_microdata_benchmarking/datasets/taxcalc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ def create_tc_dataset(pe_dataset: Type, year: int) -> pd.DataFrame:

def pe(variable):
if system.variables[variable].entity.key == "person":
# sum over non-dependents
# sum over nondependents
values = pe_sim.calculate(variable).values
return np.array(tax_unit.sum(values * is_non_dep))
else:
return np.array(pe_sim.calculate(variable, map_to="tax_unit"))

# specify tcname-to-pename dictionary
# specify tcname-to-pename dictionary for simple one-to-one variables
vnames = {
"RECID": "household_id",
"S006": "tax_unit_weight",
Expand Down Expand Up @@ -113,7 +113,7 @@ def pe(variable):
"mcaid_ben", # Medicaid benefits, assume none
"other_ben", # Other benefits, assume none
]
# specify Tax-Calculator array variable dictionary
# specify Tax-Calculator array variable dictionary and use it to create df
var = {}
for tcname, pename in vnames.items():
var[tcname] = pe(pename)
Expand All @@ -140,7 +140,7 @@ def pe(variable):
.values
)
var["EIC"] = np.minimum(pe("eitc_child_count"), 3)
ones = np.ones_like(var["RECID"], dtype=int)
ones = np.ones_like(zeros, dtype=int)
var["FLPDYR"] = ones * year
if "puf" in pe_dataset.__name__.lower():
var["data_source"] = ones
Expand All @@ -154,6 +154,7 @@ def pe(variable):
)
df = pd.DataFrame(var)

# specify person-to-tax_unit mapping function
map_to_tax_unit = lambda arr: pe_sim.map_result(arr, "person", "tax_unit")

# specify df head/spouse variables
Expand Down Expand Up @@ -201,7 +202,7 @@ def pe(variable):
renames[variable] = variable.upper()
elif variable.lower() in tc_variable_metadata["read"]:
renames[variable] = variable.lower()
df = df.rename(columns=renames)
df.rename(columns=renames, inplace=True)

return df

Expand Down

0 comments on commit 9c2189d

Please sign in to comment.