diff --git a/Makefile b/Makefile index eeb09ab..cd88a43 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,7 @@ test: pytest install: - pip install policyengine-uk==2.1.1 - pip install -e ".[dev]" + pip install -e ".[dev]" --config-settings editable_mode=compat download: python policyengine_uk_data/storage/download_private_prerequisites.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..e01f5ab 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Target uprating for constituencies. diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index 12d507a..c725736 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -861,4 +861,5 @@ def impute_brmas(dataset, frs): if __name__ == "__main__": + FRS_2020_21().generate() FRS_2022_23().generate() diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb new file mode 100644 index 0000000..5493055 --- /dev/null +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'policyengine_uk_data.datasets.frs.local_areas'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/r_/j9kk4vmd3tj29ljn52_76m4h0000gn/T/ipykernel_94907/242637587.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mh5py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mtransform_2010_to_2024\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m )\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'policyengine_uk_data.datasets.frs.local_areas'" + ] + } + ], + "source": [ + "import torch\n", + "from policyengine_uk import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "import h5py\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n", + " transform_2010_to_2024,\n", + ")\n", + "\n", + "# Fill in missing constituencies with average column values\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n", + " create_constituency_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", + "from pathlib import Path\n", + "from policyengine_uk_data.storage import STORAGE_FOLDER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py index 0169498..40b8c09 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py @@ -16,7 +16,10 @@ def create_constituency_target_matrix( - dataset: str = "enhanced_frs_2022_23", time_period: int = 2025, reform=None + dataset: str = "enhanced_frs_2022_23", + time_period: int = 2025, + reform=None, + uprate: bool = True, ): ages = pd.read_csv(FOLDER / "targets" / "age.csv") incomes = pd.read_csv(FOLDER / "targets" / "total_income.csv") @@ -90,4 +93,67 @@ def create_constituency_target_matrix( & (employment_incomes.employment_income_upper_bound == upper_bound) ].employment_income_amount.values + if uprate: + y = uprate_targets(y, time_period) + return matrix, y + + +def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame: + # Uprate age targets from 2020, taxable income targets from 2021, employment income targets from 2023. + # Use PolicyEngine uprating factors. + sim = Microsimulation(dataset="frs_2020_21") + matrix_20, y_20 = create_constituency_target_matrix( + "frs_2020_21", 2020, uprate=False + ) + matrix_21, y_21 = create_constituency_target_matrix( + "frs_2020_21", 2021, uprate=False + ) + matrix_23, y_23 = create_constituency_target_matrix( + "frs_2020_21", 2023, uprate=False + ) + matrix_final, y_final = create_constituency_target_matrix( + "frs_2020_21", target_year, uprate=False + ) + weights_20 = sim.calculate("household_weight", 2020) + weights_21 = sim.calculate("household_weight", 2021) + weights_23 = sim.calculate("household_weight", 2023) + weights_final = sim.calculate("household_weight", target_year) + + rel_change_20_final = (weights_final @ matrix_final) / ( + weights_20 @ matrix_20 + ) - 1 + is_uprated_from_2020 = [ + col.startswith("age/") for col in matrix_20.columns + ] + uprating_from_2020 = np.zeros_like(matrix_20.columns, dtype=float) + uprating_from_2020[is_uprated_from_2020] = rel_change_20_final[ + is_uprated_from_2020 + ] + + rel_change_21_final = (weights_final @ matrix_final) / ( + weights_21 @ matrix_21 + ) - 1 + is_uprated_from_2021 = [ + col.startswith("hmrc/") for col in matrix_21.columns + ] + uprating_from_2021 = np.zeros_like(matrix_21.columns, dtype=float) + uprating_from_2021[is_uprated_from_2021] = rel_change_21_final[ + is_uprated_from_2021 + ] + + rel_change_23_final = (weights_final @ matrix_final) / ( + weights_23 @ matrix_23 + ) - 1 + is_uprated_from_2023 = [ + col.startswith("hmrc/") for col in matrix_23.columns + ] + uprating_from_2023 = np.zeros_like(matrix_23.columns, dtype=float) + uprating_from_2023[is_uprated_from_2023] = rel_change_23_final[ + is_uprated_from_2023 + ] + + uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023 + y = y * (1 + uprating) + + return y diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md index e5890e1..1e6f948 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md @@ -1,3 +1,6 @@ # Data -* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. +* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. The data is from 2020. +* Employment incomes are from Nomis, and are from 2023. +* HMRC total income is from 2021. + diff --git a/policyengine_uk_data/storage/upload_private_prerequisites.py b/policyengine_uk_data/storage/upload_private_prerequisites.py index bc5bcac..b821c93 100644 --- a/policyengine_uk_data/storage/upload_private_prerequisites.py +++ b/policyengine_uk_data/storage/upload_private_prerequisites.py @@ -13,6 +13,7 @@ def zip_folder(folder): FOLDER = Path(__file__).parent FILES = [ + "frs_2020_21.zip", "frs_2022_23.zip", "lcfs_2021_22.zip", "was_2006_20.zip",