Skip to content

Commit

Permalink
Fix Uprate statistical targets for constituencies #47
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Dec 3, 2024
1 parent 08853c4 commit 7daadf7
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 4 deletions.
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ test:
pytest

install:
pip install policyengine-uk==2.1.1
pip install -e ".[dev]"
pip install -e ".[dev]" --config-settings editable_mode=compat

download:
python policyengine_uk_data/storage/download_private_prerequisites.py
Expand Down
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Target uprating for constituencies.
1 change: 1 addition & 0 deletions policyengine_uk_data/datasets/frs/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,4 +861,5 @@ def impute_brmas(dataset, frs):


if __name__ == "__main__":
FRS_2020_21().generate()
FRS_2022_23().generate()
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'policyengine_uk_data.datasets.frs.local_areas'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/r_/j9kk4vmd3tj29ljn52_76m4h0000gn/T/ipykernel_94907/242637587.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mh5py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mtransform_2010_to_2024\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m )\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'policyengine_uk_data.datasets.frs.local_areas'"
]
}
],
"source": [
"import torch\n",
"from policyengine_uk import Microsimulation\n",
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"import h5py\n",
"from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n",
" transform_2010_to_2024,\n",
")\n",
"\n",
"# Fill in missing constituencies with average column values\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n",
" create_constituency_target_matrix,\n",
" create_national_target_matrix,\n",
")\n",
"from pathlib import Path\n",
"from policyengine_uk_data.storage import STORAGE_FOLDER"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@


def create_constituency_target_matrix(
dataset: str = "enhanced_frs_2022_23", time_period: int = 2025, reform=None
dataset: str = "enhanced_frs_2022_23",
time_period: int = 2025,
reform=None,
uprate: bool = True,
):
ages = pd.read_csv(FOLDER / "targets" / "age.csv")
incomes = pd.read_csv(FOLDER / "targets" / "total_income.csv")
Expand Down Expand Up @@ -90,4 +93,67 @@ def create_constituency_target_matrix(
& (employment_incomes.employment_income_upper_bound == upper_bound)
].employment_income_amount.values

if uprate:
y = uprate_targets(y, time_period)

return matrix, y


def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame:
# Uprate age targets from 2020, taxable income targets from 2021, employment income targets from 2023.
# Use PolicyEngine uprating factors.
sim = Microsimulation(dataset="frs_2020_21")
matrix_20, y_20 = create_constituency_target_matrix(
"frs_2020_21", 2020, uprate=False
)
matrix_21, y_21 = create_constituency_target_matrix(
"frs_2020_21", 2021, uprate=False
)
matrix_23, y_23 = create_constituency_target_matrix(
"frs_2020_21", 2023, uprate=False
)
matrix_final, y_final = create_constituency_target_matrix(
"frs_2020_21", target_year, uprate=False
)
weights_20 = sim.calculate("household_weight", 2020)
weights_21 = sim.calculate("household_weight", 2021)
weights_23 = sim.calculate("household_weight", 2023)
weights_final = sim.calculate("household_weight", target_year)

rel_change_20_final = (weights_final @ matrix_final) / (
weights_20 @ matrix_20
) - 1
is_uprated_from_2020 = [
col.startswith("age/") for col in matrix_20.columns
]
uprating_from_2020 = np.zeros_like(matrix_20.columns, dtype=float)
uprating_from_2020[is_uprated_from_2020] = rel_change_20_final[
is_uprated_from_2020
]

rel_change_21_final = (weights_final @ matrix_final) / (
weights_21 @ matrix_21
) - 1
is_uprated_from_2021 = [
col.startswith("hmrc/") for col in matrix_21.columns
]
uprating_from_2021 = np.zeros_like(matrix_21.columns, dtype=float)
uprating_from_2021[is_uprated_from_2021] = rel_change_21_final[
is_uprated_from_2021
]

rel_change_23_final = (weights_final @ matrix_final) / (
weights_23 @ matrix_23
) - 1
is_uprated_from_2023 = [
col.startswith("hmrc/") for col in matrix_23.columns
]
uprating_from_2023 = np.zeros_like(matrix_23.columns, dtype=float)
uprating_from_2023[is_uprated_from_2023] = rel_change_23_final[
is_uprated_from_2023
]

uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023
y = y * (1 + uprating)

return y
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Data

* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK.
* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. The data is from 2020.
* Employment incomes are from Nomis, and are from 2023.
* HMRC total income is from 2021.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def zip_folder(folder):
FOLDER = Path(__file__).parent

FILES = [
"frs_2020_21.zip",
"frs_2022_23.zip",
"lcfs_2021_22.zip",
"was_2006_20.zip",
Expand Down

0 comments on commit 7daadf7

Please sign in to comment.