diff --git a/Makefile b/Makefile
index 6a4da61..f91ff99 100644
--- a/Makefile
+++ b/Makefile
@@ -8,4 +8,7 @@ install:
 	pip install -e .[dev]
 
 docker:
-	docker buildx build --platform linux/amd64 . -t policyengine-us-data:latest
\ No newline at end of file
+	docker buildx build --platform linux/amd64 . -t policyengine-us-data:latest
+
+documentation:
+	streamlit run docs/Home.py
diff --git a/docs/Home.py b/docs/Home.py
new file mode 100644
index 0000000..f2fcc18
--- /dev/null
+++ b/docs/Home.py
@@ -0,0 +1,3 @@
+import streamlit as st
+
+st.title("PolicyEngine US Data")
diff --git a/docs/pages/Benchmarks.py b/docs/pages/Benchmarks.py
new file mode 100644
index 0000000..361e170
--- /dev/null
+++ b/docs/pages/Benchmarks.py
@@ -0,0 +1,72 @@
+import streamlit as st
+
+st.set_page_config(layout="wide")
+
+st.title("Benchmarks")
+
+from policyengine_us_data.datasets import CPS_2024, PUF_2024, EnhancedCPS_2024
+from policyengine_us_data.utils import build_loss_matrix
+from policyengine_us import Microsimulation
+import pandas as pd
+import plotly.express as px
+
+
+@st.cache_data
+def compare_datasets():
+    comparison_combined = pd.DataFrame()
+    for dataset in [CPS_2024, PUF_2024, EnhancedCPS_2024]:
+        sim = Microsimulation(dataset=dataset)
+        weights = sim.calculate("household_weight").values
+        loss_matrix, targets_array = build_loss_matrix(dataset, 2024)
+        target_names = loss_matrix.columns
+        estimates = weights @ loss_matrix.values
+        comparison = pd.DataFrame(
+            {
+                "Target": target_names,
+                "Estimate": estimates,
+                "Actual": targets_array,
+            }
+        )
+        comparison["Error"] = comparison["Estimate"] - comparison["Actual"]
+        comparison["Abs. Error"] = comparison["Error"].abs()
+        comparison["Abs. Error %"] = (
+            comparison["Abs. Error"] / comparison["Actual"]
+        )
+        comparison["Dataset"] = dataset.label
+        comparison_combined = pd.concat([comparison_combined, comparison])
+
+    return comparison_combined
+
+
+df = compare_datasets()
+
+mean_relative_error_by_dataset = (
+    df.groupby("Dataset")["Abs. Error %"].mean().reset_index()
+)
+
+st.write(mean_relative_error_by_dataset)
+
+metric = st.selectbox(
+    "Metric", ["Estimate", "Error", "Abs. Error", "Abs. Error %"]
+)
+target = st.selectbox("Target", df["Target"].unique())
+
+fig = px.bar(
+    df[df["Target"] == target],
+    x="Dataset",
+    y=metric,
+    title=f"{metric} for {target}",
+)
+
+if metric == "Estimate":
+    # Add a dashed line at the target
+    fig.add_shape(
+        type="line",
+        x0=-0.5,
+        x1=2.5,
+        y0=df.loc[df["Target"] == target, "Actual"].values[0],
+        y1=df.loc[df["Target"] == target, "Actual"].values[0],
+        line=dict(dash="dash"),
+    )
+
+st.plotly_chart(fig, use_container_width=True)
diff --git a/policyengine_us_data/data_storage/uprating_factors.csv b/policyengine_us_data/data_storage/uprating_factors.csv
index b55182a..b3f188b 100644
--- a/policyengine_us_data/data_storage/uprating_factors.csv
+++ b/policyengine_us_data/data_storage/uprating_factors.csv
@@ -15,7 +15,7 @@ early_withdrawal_penalty,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467
 educator_expense,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 employment_income,1.0,1.069,1.149,1.211,1.264,1.306,1.348,1.39,1.438,1.486,1.536,1.587,1.639,1.693,1.748
 employment_income_before_lsr,1.0,1.069,1.149,1.211,1.264,1.306,1.348,1.39,1.438,1.486,1.536,1.587,1.639,1.693,1.748
-employment_income_last_year,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
+employment_income_last_year,1.0,1.069,1.149,1.211,1.264,1.306,1.348,1.39,1.438,1.486,1.536,1.587,1.639,1.693,1.748
 energy_efficient_home_improvement_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 estate_income,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 excess_withheld_payroll_tax,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
@@ -42,6 +42,7 @@ non_sch_d_capital_gains,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,
 other_credits,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 partnership_s_corp_income,1.0,0.997,1.542,1.581,1.685,1.753,1.789,1.827,1.837,1.859,1.891,1.929,1.969,2.009,2.074
 person_weight,1.0,1.003,1.007,1.016,1.027,1.039,1.049,1.056,1.061,1.066,1.071,1.076,1.081,1.086,1.09
+population,1.0,1.0027545812166367,1.0065863897282326,1.0155402789988688,1.0271017184625957,1.0389212123758114,1.0486882732256506,1.0560668301011513,1.061272928587932,1.0663860074475715,1.0714000654138023,1.0763030999540903,1.0810831085359012,1.0857250879935667,1.0902200364276862
 pre_tax_contributions,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 prior_year_minimum_tax_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 qualified_dividend_income,1.0,1.2,1.269,1.283,1.325,1.376,1.414,1.445,1.483,1.533,1.624,1.714,1.801,1.885,1.966
@@ -58,7 +59,7 @@ self_employed_pension_contribution_ald,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.3
 self_employed_pension_contributions,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 self_employment_income,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779
 self_employment_income_before_lsr,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779
-self_employment_income_last_year,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
+self_employment_income_last_year,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779
 short_term_capital_gains,1.0,0.997,1.59,1.711,1.781,1.711,1.633,1.607,1.612,1.639,1.68,1.727,1.781,1.838,1.898
 snap_reported,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718
 social_security,1.0,1.276,1.355,1.55,1.718,1.841,1.937,2.031,2.143,2.268,2.398,2.519,2.654,2.805,2.951
diff --git a/policyengine_us_data/data_storage/uprating_growth_factors.csv b/policyengine_us_data/data_storage/uprating_growth_factors.csv
index e2d1f93..f06776b 100644
--- a/policyengine_us_data/data_storage/uprating_growth_factors.csv
+++ b/policyengine_us_data/data_storage/uprating_growth_factors.csv
@@ -15,7 +15,7 @@ early_withdrawal_penalty,0,0.16599999999999993,-0.015437392795883409,0.058362369
 educator_expense,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 employment_income,0,0.06899999999999995,0.07483629560336769,0.05395996518711921,0.04376548307184147,0.03322784810126578,0.03215926493108734,0.03115727002967339,0.03453237410071952,0.033379694019471495,0.03364737550471064,0.033203125,0.032766225582860686,0.032946918852959195,0.03248670998227987
 employment_income_before_lsr,0,0.06899999999999995,0.07483629560336769,0.05395996518711921,0.04376548307184147,0.03322784810126578,0.03215926493108734,0.03115727002967339,0.03453237410071952,0.033379694019471495,0.03364737550471064,0.033203125,0.032766225582860686,0.032946918852959195,0.03248670998227987
-employment_income_last_year,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
+employment_income_last_year,0,0.06899999999999995,0.07483629560336769,0.05395996518711921,0.04376548307184147,0.03322784810126578,0.03215926493108734,0.03115727002967339,0.03453237410071952,0.033379694019471495,0.03364737550471064,0.033203125,0.032766225582860686,0.032946918852959195,0.03248670998227987
 energy_efficient_home_improvement_credit,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 estate_income,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 excess_withheld_payroll_tax,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
@@ -42,6 +42,7 @@ non_sch_d_capital_gains,0,0.16599999999999993,-0.015437392795883409,0.0583623693
 other_credits,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 partnership_s_corp_income,0,-0.0030000000000000027,0.546639919759278,0.02529182879377423,0.0657811511701456,0.04035608308605321,0.02053622361665708,0.02124091671324768,0.005473453749315738,0.011976047904191711,0.017213555675094083,0.0200951877313591,0.02073613271124941,0.02031488065007614,0.03235440517670485
 person_weight,0,0.0029999999999998916,0.003988035892322994,0.008937437934458892,0.010826771653543288,0.011684518013632017,0.009624639076034613,0.006673021925643674,0.004734848484848397,0.004712535344015167,0.004690431519699612,0.004668534080298992,0.004646840148698761,0.0046253469010177906,0.0036832412523020164
+population,0,0.0027545812166367423,0.0038212824786565402,0.008895301349200357,0.011384520833702672,0.011507617698184314,0.009401156443330061,0.007035986826480878,0.0049297055246797505,0.00481787363260322,0.004701916502291681,0.004576287325868789,0.004441136127931511,0.004293822945723447,0.004140042892834206
 pre_tax_contributions,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 prior_year_minimum_tax_credit,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 qualified_dividend_income,0,0.19999999999999996,0.057499999999999885,0.011032308904649346,0.03273577552611062,0.03849056603773571,0.02761627906976738,0.02192362093352207,0.02629757785467124,0.033715441672285795,0.05936073059360747,0.05541871921182251,0.05075845974329063,0.046640755136035494,0.04297082228116711
@@ -58,7 +59,7 @@ self_employed_pension_contribution_ald,0,0.16599999999999993,-0.0154373927958834
 self_employed_pension_contributions,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 self_employment_income,0,0.2549999999999999,0.05338645418326715,0.02647503782148264,0.06558585114222537,0.04011065006915637,0.020611702127659504,0.02084690553745938,0.00574345883854499,0.012055837563451632,0.01692789968652053,0.020345252774352618,0.020543806646525775,0.02013025458851403,0.032501450957632017
 self_employment_income_before_lsr,0,0.2549999999999999,0.05338645418326715,0.02647503782148264,0.06558585114222537,0.04011065006915637,0.020611702127659504,0.02084690553745938,0.00574345883854499,0.012055837563451632,0.01692789968652053,0.020345252774352618,0.020543806646525775,0.02013025458851403,0.032501450957632017
-self_employment_income_last_year,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
+self_employment_income_last_year,0,0.2549999999999999,0.05338645418326715,0.02647503782148264,0.06558585114222537,0.04011065006915637,0.020611702127659504,0.02084690553745938,0.00574345883854499,0.012055837563451632,0.01692789968652053,0.020345252774352618,0.020543806646525775,0.02013025458851403,0.032501450957632017
 short_term_capital_gains,0,-0.0030000000000000027,0.5947843530591777,0.0761006289308177,0.040911747516072294,-0.03930376193149909,-0.0455873758036236,-0.015921616656460524,0.0031113876789048422,0.01674937965260548,0.02501525320317266,0.0279761904761906,0.03126809496236227,0.03200449185850651,0.03264417845484213
 snap_reported,0,0.16599999999999993,-0.015437392795883409,0.058362369337979336,0.05349794238683114,0.02968750000000009,0.02427921092564489,0.028888888888888742,0.02807775377969768,0.027310924369748024,0.03135650988411709,0.03172504957039002,0.03203074951953888,0.0322780881440099,0.03307276007215876
 social_security,0,0.276,0.06191222570532906,0.1439114391143912,0.10838709677419356,0.07159487776484275,0.05214557305812062,0.04852865255549821,0.05514524864598713,0.058329444703686395,0.057319223985890844,0.050458715596330306,0.05359269551409285,0.056895252449133515,0.05204991087344024
diff --git a/policyengine_us_data/datasets/cps/__init__.py b/policyengine_us_data/datasets/cps/__init__.py
index 213a613..2411ca4 100644
--- a/policyengine_us_data/datasets/cps/__init__.py
+++ b/policyengine_us_data/datasets/cps/__init__.py
@@ -1,2 +1,3 @@
 from .cps import *
 from .extended_cps import *
+from .enhanced_cps import *
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 5d87f29..7a79d3e 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -562,6 +562,6 @@ class CPS_2022(CPS):
 
 class CPS_2024(CPS):
     name = "cps_2024"
-    label = "CPS 2024"
+    label = "CPS 2024 (2022-based)"
     file_path = STORAGE_FOLDER / "cps_2024.h5"
     time_period = 2024
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index 572824c..f960612 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -1,5 +1,208 @@
 from policyengine_core.data import Dataset
+import pandas as pd
+from policyengine_us_data.utils import (
+    pe_to_soi,
+    get_soi,
+    build_loss_matrix,
+    fmt,
+)
+import numpy as np
+from typing import Type
+from policyengine_us_data.data_storage import STORAGE_FOLDER
+from policyengine_us_data.datasets.cps import ExtendedCPS_2024
+import torch
+
+
+def build_loss_matrix(dataset: type, time_period):
+    loss_matrix = pd.DataFrame()
+    df = pe_to_soi(dataset, time_period)
+    agi = df["adjusted_gross_income"].values
+    filer = df["is_tax_filer"].values
+    soi_subset = get_soi(time_period)
+    targets_array = []
+    agi_level_targeted_variables = [
+        "adjusted_gross_income",
+        "count",
+        "employment_income",
+        "business_net_profits",
+        "capital_gains_gross",
+        "ordinary_dividends",
+        "partnership_and_s_corp_income",
+        "qualified_dividends",
+        "taxable_interest_income",
+        "total_pension_income",
+        "total_social_security",
+    ]
+    aggregate_level_targeted_variables = [
+        "business_net_losses",
+        "capital_gains_distributions",
+        "capital_gains_losses",
+        "estate_income",
+        "estate_losses",
+        "exempt_interest",
+        "ira_distributions",
+        "partnership_and_s_corp_losses",
+        "rent_and_royalty_net_income",
+        "rent_and_royalty_net_losses",
+        "taxable_pension_income",
+        "taxable_social_security",
+        "unemployment_compensation",
+    ]
+    aggregate_level_targeted_variables = [
+        variable
+        for variable in aggregate_level_targeted_variables
+        if variable in df.columns
+    ]
+    soi_subset = soi_subset[
+        soi_subset.Variable.isin(agi_level_targeted_variables)
+        & (
+            (soi_subset["AGI lower bound"] != -np.inf)
+            | (soi_subset["AGI upper bound"] != np.inf)
+        )
+        | (
+            soi_subset.Variable.isin(aggregate_level_targeted_variables)
+            & (soi_subset["AGI lower bound"] == -np.inf)
+            & (soi_subset["AGI upper bound"] == np.inf)
+        )
+    ]
+    for _, row in soi_subset.iterrows():
+        if row["Taxable only"]:
+            continue  # exclude "taxable returns" statistics
+
+        mask = (
+            (agi >= row["AGI lower bound"])
+            * (agi < row["AGI upper bound"])
+            * filer
+        ) > 0
+
+        if row["Filing status"] == "Single":
+            mask *= df["filing_status"].values == "SINGLE"
+        elif row["Filing status"] == "Married Filing Jointly/Surviving Spouse":
+            mask *= df["filing_status"].values == "JOINT"
+        elif row["Filing status"] == "Head of Household":
+            mask *= df["filing_status"].values == "HEAD_OF_HOUSEHOLD"
+        elif row["Filing status"] == "Married Filing Separately":
+            mask *= df["filing_status"].values == "SEPARATE"
+
+        values = df[row["Variable"]].values
+
+        if row["Count"]:
+            values = (values > 0).astype(float)
+
+        agi_range_label = (
+            f"{fmt(row['AGI lower bound'])}-{fmt(row['AGI upper bound'])}"
+        )
+        taxable_label = (
+            "taxable" if row["Taxable only"] else "all" + " returns"
+        )
+        filing_status_label = row["Filing status"]
+
+        variable_label = row["Variable"].replace("_", " ")
+
+        if row["Count"] and not row["Variable"] == "count":
+            label = (
+                f"{variable_label}/count/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+        elif row["Variable"] == "count":
+            label = (
+                f"{variable_label}/count/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+        else:
+            label = (
+                f"{variable_label}/total/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+
+        if label not in loss_matrix.columns:
+            loss_matrix[label] = mask * values
+            targets_array.append(row["Value"])
+
+    # Convert tax-unit level df to household-level df
+
+    from policyengine_us import Microsimulation
+
+    sim = Microsimulation(dataset=dataset)
+    hh_id = sim.calculate("household_id", map_to="person")
+    tax_unit_hh_id = sim.map_result(
+        hh_id, "person", "tax_unit", how="value_from_first_person"
+    )
+
+    loss_matrix = loss_matrix.groupby(tax_unit_hh_id).sum()
+
+    return loss_matrix.values, np.array(targets_array)
+
+
+def reweight(
+    original_weights,
+    loss_matrix,
+    targets_array,
+):
+    loss_matrix = torch.tensor(loss_matrix, dtype=torch.float32)
+    targets_array = torch.tensor(targets_array, dtype=torch.float32)
+
+    # TODO: replace this with a call to the python reweight.py package.
+    def loss(weights):
+        estimate = weights @ loss_matrix
+        rel_error = ((estimate - targets_array) / targets_array) ** 2
+        return rel_error.mean()
+
+    weights = torch.tensor(
+        np.log(original_weights), requires_grad=True, dtype=torch.float32
+    )
+    optimizer = torch.optim.Adam([weights], lr=1e-2)
+    from tqdm import trange
+
+    iterator = trange(1_000)
+    for i in iterator:
+        optimizer.zero_grad()
+        l = loss(torch.exp(weights))
+        l.backward()
+        iterator.set_postfix({"loss": l.item()})
+        optimizer.step()
+
+    return torch.exp(weights).detach().numpy()
 
 
 class EnhancedCPS(Dataset):
-    pass
+    data_format = Dataset.FLAT_FILE
+    input_dataset: Type[Dataset]
+    start_year: int
+    end_year: int
+
+    def generate(self):
+        df = self.input_dataset(require=True).load()
+        from policyengine_us import Microsimulation
+
+        sim = Microsimulation(dataset=self.input_dataset)
+        original_weights = sim.calculate("household_weight")
+        original_weights = original_weights.values + np.random.normal(
+            10, 1, len(original_weights)
+        )
+        for year in range(self.start_year, self.end_year + 1):
+            print(f"Enhancing CPS for {year}")
+            loss_matrix, targets_array = build_loss_matrix(
+                self.input_dataset, year
+            )
+            optimised_weights = reweight(
+                original_weights, loss_matrix, targets_array
+            )
+            df[f"household_weight__{year}"] = sim.map_result(
+                optimised_weights, "household", "person"
+            )
+
+        self.save_dataset(df)
+
+
+class EnhancedCPS_2024(EnhancedCPS):
+    input_dataset = ExtendedCPS_2024
+    start_year = 2024
+    end_year = 2024
+    name = "enhanced_cps_2024"
+    label = "Enhanced CPS 2024"
+    file_path = STORAGE_FOLDER / "enhanced_cps_2024.csv"
+
+
+if __name__ == "__main__":
+    EnhancedCPS_2024().generate()
diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
index f84ee7b..13003b0 100644
--- a/policyengine_us_data/datasets/cps/extended_cps.py
+++ b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -65,8 +65,6 @@
     "w2_wages_from_qualified_business",
 ]
 
-IMPUTED_VARIABLES = ["employment_income"]
-
 
 class ExtendedCPS(Dataset):
     cps: Type[CPS]
@@ -95,21 +93,39 @@ def generate(self):
 
         model = Imputation()
 
-        model.train(X_train, y_train, verbose=True)
+        model.train(X_train, y_train, verbose=True, num_trees=10)
 
         X = cps_sim.calculate_dataframe(INPUTS)
         y = model.predict(X, verbose=True)
 
         original_dataset = cps_sim.to_input_dataframe()
-        original_dataset["employment_income"] = (
-            original_dataset.employment_income_before_lsr
-        )
-        original_dataset["self_employment_income"] = (
-            original_dataset.self_employment_income_before_lsr
-        )
+        renames = {
+            f"employment_income_before_lsr__{self.time_period}": f"employment_income__{self.time_period}",
+            f"self_employment_income_before_lsr__{self.time_period}": f"self_employment_income__{self.time_period}",
+        }
+        for a, b in renames.items():
+            original_dataset[b] = original_dataset[a]
+            del original_dataset[a]
         imputed_dataset = original_dataset.copy().reset_index()
 
-        imputed_dataset[IMPUTED_VARIABLES] = y
+        for variable in IMPUTED_VARIABLES:
+            imputed_dataset[f"{variable}__{self.time_period}"] = y[variable]
+
+        ENTITIES = ("person", "tax_unit", "family", "spm_unit", "household")
+        for entity in ENTITIES:
+            for id_name in [
+                f"{entity}_id__{self.time_period}",
+                f"person_{entity}_id__{self.time_period}",
+            ]:
+                if "person_person" in id_name:
+                    continue
+                original_ids = original_dataset[id_name].values
+                new_ids = original_ids + original_ids.max()
+                imputed_dataset[id_name] = new_ids
+
+        for variable in imputed_dataset.columns:
+            if "_weight" in variable:
+                imputed_dataset[variable] = 0
         original_dataset["data_source"] = "cps"
         imputed_dataset["data_source"] = "puf_imputed"
         combined = pd.concat([original_dataset, imputed_dataset])
@@ -123,3 +139,4 @@ class ExtendedCPS_2024(ExtendedCPS):
     name = "extended_cps_2024"
     label = "Extended CPS (2024)"
     file_path = STORAGE_FOLDER / "extended_cps_2024.csv"
+    time_period = 2024
diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py
index f86d95b..5f17f7e 100644
--- a/policyengine_us_data/datasets/puf/puf.py
+++ b/policyengine_us_data/datasets/puf/puf.py
@@ -501,7 +501,7 @@ class PUF_2021(PUF):
 
 
 class PUF_2024(PUF):
-    label = "PUF 2024"
+    label = "PUF 2024 (2015-based)"
     name = "puf_2024"
     time_period = 2024
     file_path = STORAGE_FOLDER / "pe_puf_2024.h5"
diff --git a/policyengine_us_data/utils/__init__.py b/policyengine_us_data/utils/__init__.py
index e69de29..1ccbd39 100644
--- a/policyengine_us_data/utils/__init__.py
+++ b/policyengine_us_data/utils/__init__.py
@@ -0,0 +1,4 @@
+from .github import *
+from .soi import *
+from .uprating import *
+from .loss import *
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
new file mode 100644
index 0000000..baf8f99
--- /dev/null
+++ b/policyengine_us_data/utils/loss.py
@@ -0,0 +1,138 @@
+import pandas as pd
+from .soi import pe_to_soi, get_soi
+import numpy as np
+
+
+def fmt(x):
+    if x == -np.inf:
+        return "-inf"
+    if x == np.inf:
+        return "inf"
+    if x < 1e3:
+        return f"{x:.0f}"
+    if x < 1e6:
+        return f"{x/1e3:.0f}k"
+    if x < 1e9:
+        return f"{x/1e6:.0f}m"
+    return f"{x/1e9:.1f}bn"
+
+
+def build_loss_matrix(dataset: type, time_period):
+    loss_matrix = pd.DataFrame()
+    df = pe_to_soi(dataset, time_period)
+    agi = df["adjusted_gross_income"].values
+    filer = df["is_tax_filer"].values
+    soi_subset = get_soi(time_period)
+    targets_array = []
+    agi_level_targeted_variables = [
+        "adjusted_gross_income",
+        "count",
+        "employment_income",
+        "business_net_profits",
+        "capital_gains_gross",
+        "ordinary_dividends",
+        "partnership_and_s_corp_income",
+        "qualified_dividends",
+        "taxable_interest_income",
+        "total_pension_income",
+        "total_social_security",
+    ]
+    aggregate_level_targeted_variables = [
+        "business_net_losses",
+        "capital_gains_distributions",
+        "capital_gains_losses",
+        "estate_income",
+        "estate_losses",
+        "exempt_interest",
+        "ira_distributions",
+        "partnership_and_s_corp_losses",
+        "rent_and_royalty_net_income",
+        "rent_and_royalty_net_losses",
+        "taxable_pension_income",
+        "taxable_social_security",
+        "unemployment_compensation",
+    ]
+    aggregate_level_targeted_variables = [
+        variable
+        for variable in aggregate_level_targeted_variables
+        if variable in df.columns
+    ]
+    soi_subset = soi_subset[
+        soi_subset.Variable.isin(agi_level_targeted_variables)
+        & (
+            (soi_subset["AGI lower bound"] != -np.inf)
+            | (soi_subset["AGI upper bound"] != np.inf)
+        )
+        | (
+            soi_subset.Variable.isin(aggregate_level_targeted_variables)
+            & (soi_subset["AGI lower bound"] == -np.inf)
+            & (soi_subset["AGI upper bound"] == np.inf)
+        )
+    ]
+    for _, row in soi_subset.iterrows():
+        if row["Taxable only"]:
+            continue  # exclude "taxable returns" statistics
+
+        mask = (
+            (agi >= row["AGI lower bound"])
+            * (agi < row["AGI upper bound"])
+            * filer
+        ) > 0
+
+        if row["Filing status"] == "Single":
+            mask *= df["filing_status"].values == "SINGLE"
+        elif row["Filing status"] == "Married Filing Jointly/Surviving Spouse":
+            mask *= df["filing_status"].values == "JOINT"
+        elif row["Filing status"] == "Head of Household":
+            mask *= df["filing_status"].values == "HEAD_OF_HOUSEHOLD"
+        elif row["Filing status"] == "Married Filing Separately":
+            mask *= df["filing_status"].values == "SEPARATE"
+
+        values = df[row["Variable"]].values
+
+        if row["Count"]:
+            values = (values > 0).astype(float)
+
+        agi_range_label = (
+            f"{fmt(row['AGI lower bound'])}-{fmt(row['AGI upper bound'])}"
+        )
+        taxable_label = (
+            "taxable" if row["Taxable only"] else "all" + " returns"
+        )
+        filing_status_label = row["Filing status"]
+
+        variable_label = row["Variable"].replace("_", " ")
+
+        if row["Count"] and not row["Variable"] == "count":
+            label = (
+                f"{variable_label}/count/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+        elif row["Variable"] == "count":
+            label = (
+                f"{variable_label}/count/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+        else:
+            label = (
+                f"{variable_label}/total/AGI in "
+                f"{agi_range_label}/{taxable_label}/{filing_status_label}"
+            )
+
+        if label not in loss_matrix.columns:
+            loss_matrix[label] = mask * values
+            targets_array.append(row["Value"])
+
+    # Convert tax-unit level df to household-level df
+
+    from policyengine_us import Microsimulation
+
+    sim = Microsimulation(dataset=dataset)
+    hh_id = sim.calculate("household_id", map_to="person")
+    tax_unit_hh_id = sim.map_result(
+        hh_id, "person", "tax_unit", how="value_from_first_person"
+    )
+
+    loss_matrix = loss_matrix.groupby(tax_unit_hh_id).sum()
+
+    return loss_matrix, np.array(targets_array)
diff --git a/policyengine_us_data/utils/soi.py b/policyengine_us_data/utils/soi.py
new file mode 100644
index 0000000..61eb0c0
--- /dev/null
+++ b/policyengine_us_data/utils/soi.py
@@ -0,0 +1,183 @@
+import pandas as pd
+import numpy as np
+from .uprating import create_policyengine_uprating_factors_table
+from policyengine_us_data.data_storage import STORAGE_FOLDER
+
+
+def pe_to_soi(pe_dataset, year):
+    from policyengine_us import Microsimulation
+
+    pe_sim = Microsimulation(dataset=pe_dataset)
+    df = pd.DataFrame()
+
+    pe = lambda variable: np.array(
+        pe_sim.calculate(variable, map_to="tax_unit")
+    )
+
+    df["adjusted_gross_income"] = pe("adjusted_gross_income")
+    df["exemption"] = pe("exemptions")
+    df["itemded"] = pe("itemized_taxable_income_deductions")
+    df["income_tax_after_credits"] = pe("income_tax")
+    df["total_income_tax"] = pe("income_tax_before_credits")
+    df["taxable_income"] = pe("taxable_income")
+    df["business_net_profits"] = pe("self_employment_income") * (
+        pe("self_employment_income") > 0
+    )
+    df["business_net_losses"] = -pe("self_employment_income") * (
+        pe("self_employment_income") < 0
+    )
+    df["capital_gains_distributions"] = pe("non_sch_d_capital_gains")
+    df["capital_gains_gross"] = pe("loss_limited_net_capital_gains") * (
+        pe("loss_limited_net_capital_gains") > 0
+    )
+    df["capital_gains_losses"] = -pe("loss_limited_net_capital_gains") * (
+        pe("loss_limited_net_capital_gains") < 0
+    )
+    df["estate_income"] = pe("estate_income") * (pe("estate_income") > 0)
+    df["estate_losses"] = -pe("estate_income") * (pe("estate_income") < 0)
+    df["exempt_interest"] = pe("tax_exempt_interest_income")
+    df["ira_distributions"] = pe("taxable_ira_distributions")
+    df["count_of_exemptions"] = pe("exemptions_count")
+    df["ordinary_dividends"] = pe("non_qualified_dividend_income") + pe(
+        "qualified_dividend_income"
+    )
+    df["partnership_and_s_corp_income"] = pe("partnership_s_corp_income") * (
+        pe("partnership_s_corp_income") > 0
+    )
+    df["partnership_and_s_corp_losses"] = -pe("partnership_s_corp_income") * (
+        pe("partnership_s_corp_income") < 0
+    )
+    df["total_pension_income"] = pe("pension_income")
+    df["taxable_pension_income"] = pe("taxable_pension_income")
+    df["qualified_dividends"] = pe("qualified_dividend_income")
+    df["rent_and_royalty_net_income"] = pe("rental_income") * (
+        pe("rental_income") > 0
+    )
+    df["rent_and_royalty_net_losses"] = -pe("rental_income") * (
+        pe("rental_income") < 0
+    )
+    df["total_social_security"] = pe("social_security")
+    df["taxable_social_security"] = pe("taxable_social_security")
+    df["income_tax_before_credits"] = pe("income_tax_before_credits")
+    df["taxable_interest_income"] = pe("taxable_interest_income")
+    df["unemployment_compensation"] = pe("taxable_unemployment_compensation")
+    df["employment_income"] = pe("irs_employment_income")
+    df["qualified_business_income_deduction"] = pe(
+        "qualified_business_income_deduction"
+    )
+    df["charitable_contributions_deduction"] = pe("charitable_deduction")
+    df["interest_paid_deductions"] = pe("interest_deduction")
+    df["medical_expense_deductions_uncapped"] = pe("medical_expense_deduction")
+    df["state_and_local_tax_deductions"] = pe("salt_deduction")
+    df["itemized_state_income_and_sales_tax_deductions"] = pe(
+        "state_and_local_sales_or_income_tax"
+    )
+    df["itemized_real_estate_tax_deductions"] = pe("real_estate_taxes")
+    df["is_tax_filer"] = pe("tax_unit_is_filer")
+    df["count"] = 1
+
+    df["filing_status"] = pe("filing_status")
+    df["weight"] = pe("household_weight")
+    df["household_id"] = pe("household_id")
+
+    return df
+
+
+def puf_to_soi(puf, year):
+    df = pd.DataFrame()
+
+    df["adjusted_gross_income"] = puf.E00100
+    df["total_income_tax"] = puf.E06500
+    df["employment_income"] = puf.E00200
+    df["capital_gains_distributions"] = puf.E01100
+    df["capital_gains_gross"] = puf["E01000"] * (puf["E01000"] > 0)
+    df["capital_gains_losses"] = -puf["E01000"] * (puf["E01000"] < 0)
+    df["estate_income"] = puf.E26390
+    df["estate_losses"] = puf.E26400
+    df["exempt_interest"] = puf.E00400
+    df["ira_distributions"] = puf.E01400
+    df["count_of_exemptions"] = puf.XTOT
+    df["ordinary_dividends"] = puf.E00600
+    df["partnership_and_s_corp_income"] = puf.E26270 * (puf.E26270 > 0)
+    df["partnership_and_s_corp_losses"] = -puf.E26270 * (puf.E26270 < 0)
+    df["total_pension_income"] = puf.E01500
+    df["taxable_pension_income"] = puf.E01700
+    df["qualified_dividends"] = puf.E00650
+    df["rent_and_royalty_net_income"] = puf.E25850
+    df["rent_and_royalty_net_losses"] = puf.E25860
+    df["total_social_security"] = puf.E02400
+    df["taxable_social_security"] = puf.E02500
+    df["income_tax_before_credits"] = puf.E06500
+    df["taxable_interest_income"] = puf.E00300
+    df["unemployment_compensation"] = puf.E02300
+    df["employment_income"] = puf.E00200
+    df["charitable_contributions_deduction"] = puf.E19700
+    df["interest_paid_deductions"] = puf.E19200
+    df["medical_expense_deductions_uncapped"] = puf.E17500
+    df["itemized_state_income_and_sales_tax_deductions"] = puf.E18400
+    df["itemized_real_estate_tax_deductions"] = puf.E18500
+    df["state_and_local_tax_deductions"] = puf.E18400 + puf.E18500
+    df["income_tax_after_credits"] = puf.E08800
+    df["business_net_profits"] = puf.E00900 * (puf.E00900 > 0)
+    df["business_net_losses"] = -puf.E00900 * (puf.E00900 < 0)
+    df["taxable_income"] = puf.E04800
+    df["is_tax_filer"] = True
+    df["count"] = 1
+    df["filing_status"] = puf.MARS.map(
+        {
+            0: "SINGLE",  # Assume the aggregate record is single
+            1: "SINGLE",
+            2: "JOINT",
+            3: "SEPARATE",
+            4: "HEAD_OF_HOUSEHOLD",
+        }
+    )
+
+    df["weight"] = puf["S006"] / 100
+
+    return df
+
+
+def get_soi(year: int) -> pd.DataFrame:
+    uprating = create_policyengine_uprating_factors_table()
+
+    uprating_map = {
+        "adjusted_gross_income": "adjusted_gross_income",
+        "count": "population",
+        "employment_income": "employment_income",
+        "business_net_profits": "self_employment_income",
+        "capital_gains_gross": "long_term_capital_gains",
+        "ordinary_dividends": "non_qualified_dividend_income",
+        "partnership_and_s_corp_income": "partnership_s_corp_income",
+        "qualified_dividends": "qualified_dividend_income",
+        "taxable_interest_income": "taxable_interest_income",
+        "total_pension_income": "pension_income",
+        "total_social_security": "social_security",
+        "business_net_losses": "self_employment_income",
+        "capital_gains_distributions": "long_term_capital_gains",
+        "capital_gains_losses": "long_term_capital_gains",
+        "estate_income": "estate_income",
+        "estate_losses": "estate_income",
+        "exempt_interest": "tax_exempt_interest_income",
+        "ira_distributions": "taxable_ira_distributions",
+        "partnership_and_s_corp_losses": "partnership_s_corp_income",
+        "rent_and_royalty_net_income": "rental_income",
+        "rent_and_royalty_net_losses": "rental_income",
+        "taxable_pension_income": "taxable_pension_income",
+        "taxable_social_security": "taxable_social_security",
+        "unemployment_compensation": "unemployment_compensation",
+    }
+    soi = pd.read_csv(STORAGE_FOLDER / "soi.csv")
+
+    uprating_factors = {
+        variable: uprating.loc[variable, year]
+        / uprating.loc[variable, soi.Year.max()]
+        for variable in uprating.index
+    }
+
+    soi = soi[soi.Year == soi.Year.max()]
+
+    for variable, uprating_factor in uprating_factors.items():
+        soi.loc[soi.Variable == variable, "Value"] *= uprating_factor
+
+    return soi
diff --git a/policyengine_us_data/utils/uprating.py b/policyengine_us_data/utils/uprating.py
index a2d2843..5c5be1c 100644
--- a/policyengine_us_data/utils/uprating.py
+++ b/policyengine_us_data/utils/uprating.py
@@ -35,6 +35,15 @@ def create_policyengine_uprating_factors_table():
                     per_capita_growth = growth
                 index_values.append(round(per_capita_growth, 3))
 
+    # Add population growth
+
+    for year in range(START_YEAR, END_YEAR + 1):
+        variable_names.append("population")
+        years.append(year)
+        index_values.append(
+            population_size(year) / population_size(START_YEAR)
+        )
+
     df["Variable"] = variable_names
     df["Year"] = years
     df["Value"] = index_values