From 7daadf7f67e894cad8e1fb149cc8e01c147d61f3 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 3 Dec 2024 11:21:51 +0000 Subject: [PATCH 1/5] Fix Uprate statistical targets for constituencies #47 --- Makefile | 3 +- changelog_entry.yaml | 4 ++ policyengine_uk_data/datasets/frs/frs.py | 1 + .../local_areas/constituencies/ageing.ipynb | 72 +++++++++++++++++++ .../frs/local_areas/constituencies/loss.py | 68 +++++++++++++++++- .../constituencies/targets/README.md | 5 +- .../storage/upload_private_prerequisites.py | 1 + 7 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb diff --git a/Makefile b/Makefile index eeb09ab..cd88a43 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,7 @@ test: pytest install: - pip install policyengine-uk==2.1.1 - pip install -e ".[dev]" + pip install -e ".[dev]" --config-settings editable_mode=compat download: python policyengine_uk_data/storage/download_private_prerequisites.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..e01f5ab 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Target uprating for constituencies. diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index 12d507a..c725736 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -861,4 +861,5 @@ def impute_brmas(dataset, frs): if __name__ == "__main__": + FRS_2020_21().generate() FRS_2022_23().generate() diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb new file mode 100644 index 0000000..5493055 --- /dev/null +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'policyengine_uk_data.datasets.frs.local_areas'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/r_/j9kk4vmd3tj29ljn52_76m4h0000gn/T/ipykernel_94907/242637587.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtqdm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mh5py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mtransform_2010_to_2024\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m )\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'policyengine_uk_data.datasets.frs.local_areas'" + ] + } + ], + "source": [ + "import torch\n", + "from policyengine_uk import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "import h5py\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import (\n", + " transform_2010_to_2024,\n", + ")\n", + "\n", + "# Fill in missing constituencies with average column values\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n", + " create_constituency_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", + "from pathlib import Path\n", + "from policyengine_uk_data.storage import STORAGE_FOLDER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py index 0169498..40b8c09 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py @@ -16,7 +16,10 @@ def create_constituency_target_matrix( - dataset: str = "enhanced_frs_2022_23", time_period: int = 2025, reform=None + dataset: str = "enhanced_frs_2022_23", + time_period: int = 2025, + reform=None, + uprate: bool = True, ): ages = pd.read_csv(FOLDER / "targets" / "age.csv") incomes = pd.read_csv(FOLDER / "targets" / "total_income.csv") @@ -90,4 +93,67 @@ def create_constituency_target_matrix( & (employment_incomes.employment_income_upper_bound == upper_bound) ].employment_income_amount.values + if uprate: + y = uprate_targets(y, time_period) + return matrix, y + + +def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame: + # Uprate age targets from 2020, taxable income targets from 2021, employment income targets from 2023. + # Use PolicyEngine uprating factors. + sim = Microsimulation(dataset="frs_2020_21") + matrix_20, y_20 = create_constituency_target_matrix( + "frs_2020_21", 2020, uprate=False + ) + matrix_21, y_21 = create_constituency_target_matrix( + "frs_2020_21", 2021, uprate=False + ) + matrix_23, y_23 = create_constituency_target_matrix( + "frs_2020_21", 2023, uprate=False + ) + matrix_final, y_final = create_constituency_target_matrix( + "frs_2020_21", target_year, uprate=False + ) + weights_20 = sim.calculate("household_weight", 2020) + weights_21 = sim.calculate("household_weight", 2021) + weights_23 = sim.calculate("household_weight", 2023) + weights_final = sim.calculate("household_weight", target_year) + + rel_change_20_final = (weights_final @ matrix_final) / ( + weights_20 @ matrix_20 + ) - 1 + is_uprated_from_2020 = [ + col.startswith("age/") for col in matrix_20.columns + ] + uprating_from_2020 = np.zeros_like(matrix_20.columns, dtype=float) + uprating_from_2020[is_uprated_from_2020] = rel_change_20_final[ + is_uprated_from_2020 + ] + + rel_change_21_final = (weights_final @ matrix_final) / ( + weights_21 @ matrix_21 + ) - 1 + is_uprated_from_2021 = [ + col.startswith("hmrc/") for col in matrix_21.columns + ] + uprating_from_2021 = np.zeros_like(matrix_21.columns, dtype=float) + uprating_from_2021[is_uprated_from_2021] = rel_change_21_final[ + is_uprated_from_2021 + ] + + rel_change_23_final = (weights_final @ matrix_final) / ( + weights_23 @ matrix_23 + ) - 1 + is_uprated_from_2023 = [ + col.startswith("hmrc/") for col in matrix_23.columns + ] + uprating_from_2023 = np.zeros_like(matrix_23.columns, dtype=float) + uprating_from_2023[is_uprated_from_2023] = rel_change_23_final[ + is_uprated_from_2023 + ] + + uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023 + y = y * (1 + uprating) + + return y diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md index e5890e1..1e6f948 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/README.md @@ -1,3 +1,6 @@ # Data -* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. +* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. The data is from 2020. +* Employment incomes are from Nomis, and are from 2023. +* HMRC total income is from 2021. + diff --git a/policyengine_uk_data/storage/upload_private_prerequisites.py b/policyengine_uk_data/storage/upload_private_prerequisites.py index bc5bcac..b821c93 100644 --- a/policyengine_uk_data/storage/upload_private_prerequisites.py +++ b/policyengine_uk_data/storage/upload_private_prerequisites.py @@ -13,6 +13,7 @@ def zip_folder(folder): FOLDER = Path(__file__).parent FILES = [ + "frs_2020_21.zip", "frs_2022_23.zip", "lcfs_2021_22.zip", "was_2006_20.zip", From 5cc350097e35126d3e9ef3cb2b198806b0cc7d30 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 3 Dec 2024 11:22:03 +0000 Subject: [PATCH 2/5] Versioning --- CHANGELOG.md | 7 +++++++ changelog.yaml | 5 +++++ changelog_entry.yaml | 4 ---- pyproject.toml | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 468da82..74cc466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.10.0] - 2024-12-03 11:21:54 + +### Added + +- Target uprating for constituencies. + ## [1.9.2] - 2024-11-30 13:23:17 ### Fixed @@ -103,6 +109,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +[1.10.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.2...1.10.0 [1.9.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.1...1.9.2 [1.9.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.9.0...1.9.1 [1.9.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.8.0...1.9.0 diff --git a/changelog.yaml b/changelog.yaml index c33312f..4f30e03 100644 --- a/changelog.yaml +++ b/changelog.yaml @@ -85,3 +85,8 @@ fixed: - Constituency weights are in A-Z order. date: 2024-11-30 13:23:17 +- bump: minor + changes: + added: + - Target uprating for constituencies. + date: 2024-12-03 11:21:54 diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e01f5ab..e69de29 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,4 +0,0 @@ -- bump: minor - changes: - added: - - Target uprating for constituencies. diff --git a/pyproject.toml b/pyproject.toml index 8fd95ad..4381956 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "policyengine_uk_data" -version = "1.9.2" +version = "1.10.0" description = "A package to create representative microdata for the UK." readme = "README.md" authors = [ From f2218b05148070144f0af88f46b1e9f1f08c470c Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 3 Dec 2024 11:30:30 +0000 Subject: [PATCH 3/5] Add frs to generation script --- policyengine_uk_data/datasets/frs/dwp_frs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/policyengine_uk_data/datasets/frs/dwp_frs.py b/policyengine_uk_data/datasets/frs/dwp_frs.py index cc3f3cf..dc975a5 100644 --- a/policyengine_uk_data/datasets/frs/dwp_frs.py +++ b/policyengine_uk_data/datasets/frs/dwp_frs.py @@ -109,4 +109,5 @@ class DWP_FRS_2022_23(DWP_FRS): if __name__ == "__main__": + DWP_FRS_2020_21().generate() DWP_FRS_2022_23().generate() From 98984c1d4dbe2375caeb00a4eafbf63354a9bf96 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 3 Dec 2024 11:35:57 +0000 Subject: [PATCH 4/5] Add missing data download --- policyengine_uk_data/storage/download_private_prerequisites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py index ef81526..2094a64 100644 --- a/policyengine_uk_data/storage/download_private_prerequisites.py +++ b/policyengine_uk_data/storage/download_private_prerequisites.py @@ -12,6 +12,7 @@ def extract_zipped_folder(folder): FOLDER = Path(__file__).parent FILES = [ + "frs_2020_21.zip", "frs_2022_23.zip", "lcfs_2021_22.zip", "was_2006_20.zip", From 4f37d238024a1c1ad7d12de58f4a487393c025cd Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 3 Dec 2024 11:52:49 +0000 Subject: [PATCH 5/5] Re-add missing PE-UK dep --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index cd88a43..2a097d7 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ test: pytest install: + pip install policyengine-uk pip install -e ".[dev]" --config-settings editable_mode=compat download: