diff --git a/CHANGELOG.md b/CHANGELOG.md index 468da82..74cc466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.10.0] - 2024-12-03 11:21:54 + +### Added + +- Target uprating for constituencies. + ## [1.9.2] - 2024-11-30 13:23:17 ### Fixed @@ -103,6 +109,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +[1.10.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.2...1.10.0 [1.9.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.1...1.9.2 [1.9.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.0...1.9.1 [1.9.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.8.0...1.9.0 diff --git a/Makefile b/Makefile index eeb09ab..2a097d7 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ test: pytest install: - pip install policyengine-uk==2.1.1 - pip install -e ".[dev]" + pip install policyengine-uk + pip install -e ".[dev]" --config-settings editable_mode=compat download: python policyengine_uk_data/storage/download_private_prerequisites.py diff --git a/changelog.yaml b/changelog.yaml index c33312f..4f30e03 100644 --- a/changelog.yaml +++ b/changelog.yaml @@ -85,3 +85,8 @@ fixed: - Constituency weights are in A-Z order. date: 2024-11-30 13:23:17 +- bump: minor + changes: + added: + - Target uprating for constituencies. + date: 2024-12-03 11:21:54 diff --git a/policyengine_uk_data/datasets/frs/dwp_frs.py b/policyengine_uk_data/datasets/frs/dwp_frs.py index cc3f3cf..dc975a5 100644 --- a/policyengine_uk_data/datasets/frs/dwp_frs.py +++ b/policyengine_uk_data/datasets/frs/dwp_frs.py @@ -109,4 +109,5 @@ class DWP_FRS_2022_23(DWP_FRS): if __name__ == "__main__": + DWP_FRS_2020_21().generate() DWP_FRS_2022_23().generate() diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index 12d507a..c725736 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -861,4 +861,5 @@ def impute_brmas(dataset, frs): if __name__ == "__main__": + FRS_2020_21().generate() FRS_2022_23().generate() diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb new file mode 100644 index 0000000..5493055 --- /dev/null +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'policyengine_uk_data.datasets.frs.local_areas'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/r_/j9kk4vmd3tj29ljn52_76m4h0000gn/T/ipykernel_94907/242637587.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mh5py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mtransform_2010_to_2024\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m )\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'policyengine_uk_data.datasets.frs.local_areas'" + ] + } + ], + "source": [ + "import torch\n", + "from policyengine_uk import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "import h5py\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n", + " transform_2010_to_2024,\n", + ")\n", + "\n", + "# Fill in missing constituencies with average column values\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n", + " create_constituency_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", + "from pathlib import Path\n", + "from policyengine_uk_data.storage import STORAGE_FOLDER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py index 0169498..40b8c09 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py @@ -16,7 +16,10 @@ def create_constituency_target_matrix( - dataset: str = "enhanced_frs_2022_23", time_period: int = 2025, reform=None + dataset: str = "enhanced_frs_2022_23", + time_period: int = 2025, + reform=None, + uprate: bool = True, ): ages = pd.read_csv(FOLDER / "targets" / "age.csv") incomes = pd.read_csv(FOLDER / "targets" / "total_income.csv") @@ -90,4 +93,67 @@ def create_constituency_target_matrix( & (employment_incomes.employment_income_upper_bound == upper_bound) ].employment_income_amount.values + if uprate: + y = uprate_targets(y, time_period) + return matrix, y + + +def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame: + # Uprate age targets from 2020, taxable income targets from 2021, employment income targets from 2023. + # Use PolicyEngine uprating factors. + sim = Microsimulation(dataset="frs_2020_21") + matrix_20, y_20 = create_constituency_target_matrix( + "frs_2020_21", 2020, uprate=False + ) + matrix_21, y_21 = create_constituency_target_matrix( + "frs_2020_21", 2021, uprate=False + ) + matrix_23, y_23 = create_constituency_target_matrix( + "frs_2020_21", 2023, uprate=False + ) + matrix_final, y_final = create_constituency_target_matrix( + "frs_2020_21", target_year, uprate=False + ) + weights_20 = sim.calculate("household_weight", 2020) + weights_21 = sim.calculate("household_weight", 2021) + weights_23 = sim.calculate("household_weight", 2023) + weights_final = sim.calculate("household_weight", target_year) + + rel_change_20_final = (weights_final @ matrix_final) / ( + weights_20 @ matrix_20 + ) - 1 + is_uprated_from_2020 = [ + col.startswith("age/") for col in matrix_20.columns + ] + uprating_from_2020 = np.zeros_like(matrix_20.columns, dtype=float) + uprating_from_2020[is_uprated_from_2020] = rel_change_20_final[ + is_uprated_from_2020 + ] + + rel_change_21_final = (weights_final @ matrix_final) / ( + weights_21 @ matrix_21 + ) - 1 + is_uprated_from_2021 = [ + col.startswith("hmrc/") for col in matrix_21.columns + ] + uprating_from_2021 = np.zeros_like(matrix_21.columns, dtype=float) + uprating_from_2021[is_uprated_from_2021] = rel_change_21_final[ + is_uprated_from_2021 + ] + + rel_change_23_final = (weights_final @ matrix_final) / ( + weights_23 @ matrix_23 + ) - 1 + is_uprated_from_2023 = [ + col.startswith("hmrc/") for col in matrix_23.columns + ] + uprating_from_2023 = np.zeros_like(matrix_23.columns, dtype=float) + uprating_from_2023[is_uprated_from_2023] = rel_change_23_final[ + is_uprated_from_2023 + ] + + uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023 + y = y * (1 + uprating) + + return y diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md index e5890e1..1e6f948 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md @@ -1,3 +1,6 @@ # Data -* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. +* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. The data is from 2020. +* Employment incomes are from Nomis, and are from 2023. +* HMRC total income is from 2021. + diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py index ef81526..2094a64 100644 --- a/policyengine_uk_data/storage/download_private_prerequisites.py +++ b/policyengine_uk_data/storage/download_private_prerequisites.py @@ -12,6 +12,7 @@ def extract_zipped_folder(folder): FOLDER = Path(__file__).parent FILES = [ + "frs_2020_21.zip", "frs_2022_23.zip", "lcfs_2021_22.zip", "was_2006_20.zip", diff --git a/policyengine_uk_data/storage/upload_private_prerequisites.py b/policyengine_uk_data/storage/upload_private_prerequisites.py index bc5bcac..b821c93 100644 --- a/policyengine_uk_data/storage/upload_private_prerequisites.py +++ b/policyengine_uk_data/storage/upload_private_prerequisites.py @@ -13,6 +13,7 @@ def zip_folder(folder): FOLDER = Path(__file__).parent FILES = [ + "frs_2020_21.zip", "frs_2022_23.zip", "lcfs_2021_22.zip", "was_2006_20.zip", diff --git a/pyproject.toml b/pyproject.toml index 8fd95ad..4381956 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "policyengine_uk_data" -version = "1.9.2" +version = "1.10.0" description = "A package to create representative microdata for the UK." readme = "README.md" authors = [