Skip to content

Commit

Permalink
Add more variables to var dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
martinholmer committed Aug 22, 2024
1 parent 00f2794 commit e1b7e76
Showing 1 changed file with 27 additions and 20 deletions.
47 changes: 27 additions & 20 deletions tax_microdata_benchmarking/datasets/taxcalc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,41 @@ def pe(variable):
var = {}
for tcname, pename in vnames.items():
var[tcname] = pe(pename)
zeros = np.zeros_like(var["RECID"])
zeros = np.zeros_like(var["RECID"], dtype=int)
for tcname in zero_names:
var[tcname] = zeros

var["E00600"] = pe("non_qualified_dividend_income") + pe(
"qualified_dividend_income"
)
var["E01500"] = pe("tax_exempt_pension_income") + pe(
"taxable_pension_income"
)
var["MARS"] = (
pd.Series(pe("filing_status"))
.map(
{
"SINGLE": 1,
"JOINT": 2,
"SEPARATE": 3,
"HEAD_OF_HOUSEHOLD": 4,
"SURVIVING_SPOUSE": 5,
}
)
.values
)
var["EIC"] = np.minimum(pe("eitc_child_count"), 3)
ones = np.ones_like(var["RECID"], dtype=int)
var["FLPDYR"] = ones * year
if "puf" in pe_dataset.__name__.lower():
var["data_source"] = ones
else:
var["data_source"] = zeros
var["e02000"] = (
pe("rental_income")
+ pe("partnership_s_corp_income")
+ pe("estate_income")
+ pe("farm_rent_income")
)

df = pd.DataFrame(var)

Expand Down Expand Up @@ -179,8 +204,6 @@ def pe(variable):
df["E03150"] = pe("traditional_ira_contributions")
df["E24515"] = pe("unrecaptured_section_1250_gain")
df["E27200"] = pe("farm_rent_income")
"""

df["MARS"] = (
pd.Series(pe("filing_status"))
.map(
Expand All @@ -194,25 +217,16 @@ def pe(variable):
)
.values
)

"""
df["RECID"] = pe("household_id")
df["S006"] = pe("tax_unit_weight")
"""

"""
df["a_lineno"] = 0 # TD-specific (CPS matched person ID)
df["agi_bin"] = 0 # TD-specific (AGI bin)
df["h_seq"] = 0 # TD-specific (CPS matched household ID)
df["ffpos"] = 0 # TD-specific (CPS matched family ID)
df["fips"] = 0 # No FIPS data
df["DSI"] = 0 # Claimed as dependent on another return, assume not
"""

df["EIC"] = np.minimum(pe("eitc_child_count"), 3)
df["FLPDYR"] = year

"""
df["MIDR"] = 0 # Separately filing spouse itemizes, assume not
df["PT_SSTB_income"] = (
0 # Business income is from specified service trade assume not
Expand All @@ -226,22 +240,15 @@ def pe(variable):
df["mcare_ben"] = 0 # Medicare benefits, assume none
df["mcaid_ben"] = 0 # Medicaid benefits, assume none
df["other_ben"] = 0 # Other benefits, assume none
"""

"""
df["PT_binc_w2_wages"] = pe("w2_wages_from_qualified_business")
df["PT_ubia_property"] = 0
"""

df["data_source"] = 1 if "puf" in pe_dataset.__name__.lower() else 0
df["e02000"] = (
pe("rental_income")
+ pe("partnership_s_corp_income")
+ pe("estate_income")
+ pe("farm_rent_income")
)

"""
df["e20400"] = pe("misc_deduction")
df["e07300"] = pe("foreign_tax_credit")
df["e62900"] = pe("amt_foreign_tax_credit")
Expand Down

0 comments on commit e1b7e76

Please sign in to comment.