diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 7f2e498..3effa65 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -30,6 +30,8 @@ jobs: test: name: Build and test runs-on: ubuntu-latest + env: + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -47,7 +49,19 @@ jobs: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - name: Build datasets run: make data + env: + DATA_LITE: true - name: Run tests run: pytest - name: Test documentation builds run: make documentation + + - name: Check documentation build + run: | + for notebook in $(find docs/_build/jupyter_execute -name "*.ipynb"); do + if grep -q '"output_type": "error"' "$notebook"; then + echo "Error found in $notebook" + cat "$notebook" + exit 1 + fi + done diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 2468588..94ae553 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -26,6 +26,8 @@ jobs: test: name: Build and test runs-on: ubuntu-latest + env: + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/Makefile b/Makefile index f7b2c55..7aa5911 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ test: install: pip install policyengine-uk - pip install policyengine + pip install policyengine>=2.4 pip install -e ".[dev]" --config-settings editable_mode=compat download: @@ -22,7 +22,7 @@ docker: documentation: jb clean docs && jb build docs - python docs/add_plotly_to_book.py docs/book + python docs/add_plotly_to_book.py docs data: python policyengine_uk_data/datasets/frs/dwp_frs.py diff --git a/docs/_toc.yml b/docs/_toc.yml index 5c0c742..9f3e365 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -8,3 +8,4 @@ chapters: - file: validation/constituencies.ipynb - file: validation/local_authorities.ipynb - file: pension_contributions.ipynb +- file: constituency_methodology.ipynb \ No newline at end of file diff --git a/docs/constituency_methodology.ipynb b/docs/constituency_methodology.ipynb new file mode 100644 index 0000000..e34448e --- /dev/null +++ b/docs/constituency_methodology.ipynb @@ -0,0 +1,5697 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Constituency methodology" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "When policy changes in the UK - taxes, benefits, or public spending - it affects places and people differently. PolicyEngine UK builds tools to analyze incomes, jobs, and population patterns in each constituency. This documentation explains how we create a microsimulation model that works at the constituency level. The system combines workplace surveys of jobs and earnings, HMRC tax records, and population statistics. We map data between 2010 and 2024 constituency boundaries, estimate income distributions, and optimize geographic weights.\n", + "\n", + "This guide shows how to use PolicyEngine UK for constituency analysis. We start with data collection, transform it for modeling, and build tools to examine policies. The guide provides examples and code to implement these methods. Users can measure changes in household budgets, track employment, and understand economic patterns on different constituencies. This document starts with data collection from workplace surveys, tax records, and population counts, then explains how we convert this data into usable forms through income brackets and boundary mapping. It concludes with technical details about accuracy measurement and calibration, plus example code for analysis and visualization." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "In this section, we describe three main data sources that form the foundation of our constituency-level analysis: earning and jobs data from NOMIS ASHE, income statistics from HMRC, and population age distributions from the House of Commons Library.\n", + "\n", + "### Earning and jobs data\n", + "\n", + "Data is extracted from NOMIS Annual Survey of Hours and Earnings (ASHE) - workplace analysis dataset, containing number of jobs and earnings percentiles for all UK parliamentary constituencies from the [NOMIS website](https://www.nomisweb.co.uk/datasets/ashe). This dataset is stored as [`nomis_earning_jobs_data.xlsx`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/nomis_earning_jobs_data.xlsx). To download the data, follow the variable selection process shown in the image below:\n", + "\n", + "![](pictures/nomis_screenshot1.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Income data\n", + "\n", + "Income data for UK parliamentary constituencies is obtained from [HMRC](https://www.gov.uk/government/statistics/income-and-tax-by-parliamentary-constituency-confidence-intervals). This dataset provides detailed information about income and tax by Parliamentary constituency with confidence intervals, and is stored as [`total_income.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv), including two key variables:\n", + "\n", + "- [`total_income_count`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv#L1): the total number of taxpayers in each constituency\n", + "\n", + "- [`total_income_amount`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv#L1): the total amount of income for all taxpayers in each constituency\n", + "\n", + "We use these measures to identify similar constituencies when employment distribution data is missing. Our approach assumes that constituencies with similar income patterns (measured by both taxpayer counts and total income) will have similar earnings distributions. The following table shows the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "\n", + "\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
codenametotal_income_counttotal_income_amount
\n", + "\n", + "
\n", + "Loading ITables v2.2.3 from the init_notebook_mode cell...\n", + "(need help?)
\n", + "\n" + ], + "text/plain": [ + " code name total_income_count \\\n", + "0 E14000530 Aldershot 56000.0 \n", + "1 E14000531 Aldridge-Brownhills 40000.0 \n", + "2 E14000532 Altrincham and Sale West 53000.0 \n", + "3 E14000533 Amber Valley 46000.0 \n", + "4 E14000534 Arundel and South Downs 56000.0 \n", + ".. ... ... ... \n", + "645 W07000076 Caerphilly 38000.0 \n", + "646 W07000077 Islwyn 34000.0 \n", + "647 W07000078 Vale of Glamorgan 51000.0 \n", + "648 W07000079 Cardiff West 45000.0 \n", + "649 W07000080 Cardiff South and Penarth 50000.0 \n", + "\n", + " total_income_amount \n", + "0 1.999200e+09 \n", + "1 1.312000e+09 \n", + "2 3.180000e+09 \n", + "3 1.389200e+09 \n", + "4 2.665600e+09 \n", + ".. ... \n", + "645 1.193200e+09 \n", + "646 9.656000e+08 \n", + "647 1.810500e+09 \n", + "648 1.647000e+09 \n", + "649 1.640000e+09 \n", + "\n", + "[650 rows x 4 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from itables import init_notebook_mode, show\n", + "import itables.options as opt\n", + "opt.maxBytes = \"1MB\"\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Population data by age\n", + "\n", + "Population data by age groups for UK parliamentary constituencies can be downloaded from the [House of Commons Library data dashboard](https://commonslibrary.parliament.uk/constituency-statistics-population-by-age/). The dataset provides detailed age breakdowns for each UK constituency, containing population counts for every age from 0 to 90+ years old across all parliamentary constituencies in England, Wales, Northern Ireland, and Scotland. The data is stored as [`age.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv). The following table shows the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "\n", + "\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
codenameall0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990+
\n", + "\n", + "
\n", + "Loading ITables v2.2.3 from the init_notebook_mode cell...\n", + "(need help?)
\n", + "\n" + ], + "text/plain": [ + " code name all 0 1 2 \\\n", + "0 E14000530 Aldershot 105168.0 1313.0 1401.0 1436.0 \n", + "1 E14000531 Aldridge-Brownhills 77683.0 783.0 789.0 840.0 \n", + "2 E14000532 Altrincham and Sale West 102444.0 943.0 1058.0 1130.0 \n", + "3 E14000533 Amber Valley 92277.0 815.0 902.0 932.0 \n", + "4 E14000534 Arundel and South Downs 102673.0 789.0 779.0 903.0 \n", + ".. ... ... ... ... ... ... \n", + "645 W07000076 Caerphilly 88586.0 886.0 906.0 979.0 \n", + "646 W07000077 Islwyn 76917.0 689.0 732.0 838.0 \n", + "647 W07000078 Vale of Glamorgan 105094.0 976.0 1050.0 1165.0 \n", + "648 W07000079 Cardiff West 94951.0 1116.0 1111.0 1132.0 \n", + "649 W07000080 Cardiff South and Penarth 118913.0 1282.0 1277.0 1392.0 \n", + "\n", + " 3 4 5 6 ... 81 82 83 84 85 \\\n", + "0 1294.0 1347.0 1491.0 1323.0 ... 513.0 455.0 449.0 362.0 317.0 \n", + "1 784.0 822.0 908.0 897.0 ... 634.0 638.0 568.0 461.0 412.0 \n", + "2 1198.0 1390.0 1287.0 1416.0 ... 610.0 557.0 511.0 455.0 436.0 \n", + "3 1008.0 957.0 964.0 939.0 ... 562.0 515.0 466.0 409.0 350.0 \n", + "4 938.0 984.0 1097.0 1052.0 ... 916.0 892.0 737.0 690.0 590.0 \n", + ".. ... ... ... ... ... ... ... ... ... ... \n", + "645 1029.0 1099.0 1053.0 1070.0 ... 491.0 477.0 387.0 363.0 334.0 \n", + "646 789.0 836.0 872.0 852.0 ... 463.0 405.0 351.0 338.0 312.0 \n", + "647 1261.0 1192.0 1271.0 1217.0 ... 656.0 592.0 531.0 447.0 389.0 \n", + "648 1165.0 1236.0 1274.0 1284.0 ... 436.0 422.0 353.0 359.0 273.0 \n", + "649 1406.0 1571.0 1511.0 1489.0 ... 516.0 513.0 405.0 421.0 355.0 \n", + "\n", + " 86 87 88 89 90+ \n", + "0 322.0 230.0 186.0 179.0 802.0 \n", + "1 348.0 333.0 319.0 253.0 922.0 \n", + "2 376.0 346.0 311.0 292.0 1252.0 \n", + "3 327.0 318.0 237.0 191.0 897.0 \n", + "4 552.0 496.0 436.0 427.0 1562.0 \n", + ".. ... ... ... ... ... \n", + "645 314.0 247.0 200.0 200.0 548.0 \n", + "646 304.0 200.0 201.0 161.0 606.0 \n", + "647 390.0 325.0 275.0 247.0 870.0 \n", + "648 294.0 250.0 218.0 178.0 695.0 \n", + "649 345.0 294.0 289.0 227.0 923.0 \n", + "\n", + "[650 rows x 94 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from itables import init_notebook_mode, show\n", + "import itables.options as opt\n", + "opt.maxBytes = \"1MB\"\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing\n", + "\n", + "In this section, we detail two key preprocessing steps necessary for our constituency-level analysis: converting earnings percentiles into practical income brackets, and mapping between different constituency boundary definitions (2010 to 2024). \n", + "\n", + "### Convert earning percentiles to brackets\n", + "\n", + "To analyze earnings data effectively, we convert earning percentiles into earning brackets through the following process:\n", + "\n", + "1. First, we estimate the full distribution of earnings by:\n", + " - Using known percentile data (10th to 90th) from the ASHE dataset\n", + " - Extending this to estimate the 90th-99th percentiles using ratios derived from [this government statistics report](https://www.gov.uk/government/statistics/percentile-points-from-1-to-99-for-total-income-before-and-after-tax#:~:text=Details,in%20the%20Background%20Quality%20Report)\n", + "\n", + "2. This estimation allows us to map earnings data into brackets that align with policy thresholds.\n", + "\n", + "The following code and visualization demonstrate this process using an example constituency:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```{code-block} python\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Sample data for Darlington\n", + "income_data = {\n", + " 'parliamentary constituency 2010': ['Darlington'],\n", + " 'constituency_code': ['E14000658'],\n", + " 'Number of jobs': ['31000'],\n", + " '10 percentile': [13298.0],\n", + " '20 percentile': [16723.0],\n", + " '30 percentile': [20778.0],\n", + " '40 percentile': [23407.0],\n", + " '50 percentile': [27158.0],\n", + " '60 percentile': [30471.0],\n", + " '70 percentile': [33812.0],\n", + " '80 percentile': [40717.0],\n", + " '90 percentile': [55762.0],\n", + " '91 percentile': [58878.0],\n", + " '92 percentile': [62394.4],\n", + " '93 percentile': [66722.3],\n", + " '94 percentile': [71952.0],\n", + " '95 percentile': [78804.5],\n", + " '96 percentile': [87640.7],\n", + " '97 percentile': [100083.5],\n", + " '98 percentile': [123526.5],\n", + " '100 percentile': [179429.0]\n", + "}\n", + "\n", + "income_sample = pd.DataFrame(income_data)\n", + "\n", + "# Excel Data Method\n", + "def load_real_data():\n", + " # Read Excel data\n", + " income_real = pd.read_excel(\"nomis_earning_jobs_data.xlsx\", skiprows=7)\n", + " income_real.columns = income_real.iloc[0]\n", + " income_real = income_real.drop(index=0).reset_index(drop=True)\n", + " \n", + " # Select and rename columns\n", + " columns_to_keep = [\n", + " 'parliamentary constituency 2010',\n", + " 'constituency_code',\n", + " 'Number of jobs',\n", + " 'Median',\n", + " '10 percentile',\n", + " '20 percentile',\n", + " '30 percentile',\n", + " '40 percentile',\n", + " '60 percentile',\n", + " '70 percentile',\n", + " '80 percentile',\n", + " '90 percentile'\n", + " ]\n", + " income_real = income_real[columns_to_keep]\n", + " income_real = income_real.rename(columns={'Median': '50 percentile'})\n", + " return income_real\n", + "\n", + "# Plotting function\n", + "def plot_constituency_distribution(income_df, constituency_name, detailed=True):\n", + " constituency_data = income_df[income_df['parliamentary constituency 2010'] == constituency_name].iloc[0]\n", + " \n", + " percentiles = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 91, 92, 93, 94, 95, 96, 97, 98, 100]\n", + " income_values = [\n", + " 0,\n", + " constituency_data['10 percentile'],\n", + " constituency_data['20 percentile'],\n", + " constituency_data['30 percentile'],\n", + " constituency_data['40 percentile'],\n", + " constituency_data['50 percentile'],\n", + " constituency_data['60 percentile'],\n", + " constituency_data['70 percentile'],\n", + " constituency_data['80 percentile'],\n", + " constituency_data['90 percentile'],\n", + " constituency_data['91 percentile'],\n", + " constituency_data['92 percentile'],\n", + " constituency_data['93 percentile'],\n", + " constituency_data['94 percentile'],\n", + " constituency_data['95 percentile'],\n", + " constituency_data['96 percentile'],\n", + " constituency_data['97 percentile'],\n", + " constituency_data['98 percentile'],\n", + " constituency_data['100 percentile']\n", + " ]\n", + " \n", + " valid_data = [(p, v) for p, v in zip(percentiles, income_values) if pd.notna(v)]\n", + " filtered_percentiles, filtered_income = zip(*valid_data)\n", + " \n", + " plt.figure(figsize=(8, 6))\n", + " plt.plot(filtered_percentiles, filtered_income, marker='o')\n", + " plt.xlabel('Percentiles')\n", + " plt.ylabel('Income')\n", + " plt.title(f'Income Distribution for {constituency_name}')\n", + " plt.grid(True)\n", + " plt.show()\n", + "\n", + "# Plot sample data (Darlington with detailed percentiles)\n", + "plot_constituency_distribution(income_sample, 'Darlington', detailed=True) \n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](pictures/earning_dist.png)\n", + "\n", + "After estimating the full earnings distribution, we convert the data into income brackets. We calculate the number of jobs and total earnings for each constituency and income bracket based on the estimated earnings distribution. When we encounter constituencies with missing data, we estimate their earnings distribution pattern using data from constituencies with similar total number of taxpayers and total income levels. \n", + "\n", + "The Python script [`create_employment_incomes.py`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/create_employment_incomes.py) generates [`employment_income.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv) containing number of jobs ([`employment_income_count`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv#L1)) and total earnings ([`employment_income_amount`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv#L1)) for each constituency and income bracket. The following table shows employment and income across different brackets for constituencies:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "\n", + "\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
codenameemployment_income_lower_boundemployment_income_upper_boundemployment_income_countemployment_income_amount
\n", + "\n", + "
\n", + "Loading ITables v2.2.3 from the init_notebook_mode cell...\n", + "(need help?)
\n", + "\n" + ], + "text/plain": [ + " code name employment_income_lower_bound \\\n", + "0 E14000530 Aldershot 200000 \n", + "1 E14000530 Aldershot 500000 \n", + "2 E14000530 Aldershot 300000 \n", + "3 E14000530 Aldershot 0 \n", + "4 E14000530 Aldershot 12570 \n", + "... ... ... ... \n", + "8445 W07000080 Cardiff South and Penarth 100000 \n", + "8446 W07000080 Cardiff South and Penarth 150000 \n", + "8447 W07000080 Cardiff South and Penarth 200000 \n", + "8448 W07000080 Cardiff South and Penarth 0 \n", + "8449 W07000080 Cardiff South and Penarth 500000 \n", + "\n", + " employment_income_upper_bound employment_income_count \\\n", + "0 300000.0 660.099464 \n", + "1 inf 0.000000 \n", + "2 500000.0 0.000000 \n", + "3 12570.0 1073.632697 \n", + "4 15000.0 623.501391 \n", + "... ... ... \n", + "8445 150000.0 2510.857214 \n", + "8446 200000.0 1920.943459 \n", + "8447 300000.0 0.000000 \n", + "8448 12570.0 700.837422 \n", + "8449 inf 0.000000 \n", + "\n", + " employment_income_amount \n", + "0 1.650249e+08 \n", + "1 0.000000e+00 \n", + "2 0.000000e+00 \n", + "3 6.747782e+06 \n", + "4 8.594967e+06 \n", + "... ... \n", + "8445 3.138572e+08 \n", + "8446 3.361651e+08 \n", + "8447 0.000000e+00 \n", + "8448 4.404763e+06 \n", + "8449 0.000000e+00 \n", + "\n", + "[8450 rows x 6 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from itables import init_notebook_mode, show\n", + "import itables.options as opt\n", + "opt.maxBytes = \"1MB\"\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mapping constituencies from 2010 to 2024\n", + "\n", + "PolicyEngine uses HMRC income data which aligns with 2010 constituency boundaries; to handle this issue and align it with 2024 constituency boundaries definitions, we follow these processes:\n", + "\n", + "1. Download the mapping data from the [ONS website](https://www.data.gov.uk/dataset/20c4ffe5-7d86-419f-808e-da98e46f4f52/westminster-pcon-may-2010-to-westminster-pcon-july-2024-lookup-in-the-uk-v2) that contains the official lookup table between 2010 and 2024 Westminster Parliamentary Constituencies.\n", + "\n", + "2. Create a mapping matrix (650 x 650) which maps each constituency from 2010 to corresponding constituency in 2024 using the [`construct_mapping_matrix.py`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/mapping_2010_to_2024/construct_mapping_matrix.py) script. This is a many-to-many mapping, as 2010 constituencies can be split across multiple 2024 constituencies, and 2024 constituencies can contain parts of multiple 2010 constituencies. The matrix structure has rows representing 2010 constituencies and columns representing 2024 constituencies.\n", + "\n", + "3. For each row in the matrix (representing a 2010 constituency), normalize the weights so they sum to 1. This ensures that when we redistribute data from 2010 boundaries to 2024 boundaries, we maintain the correct proportions.\n", + "\n", + "The following table represents this mapping matrix:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "\n", + "\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
Unnamed: 0E14001063E14001064E14001065E14001066E14001067E14001294E14001366E14001599E14001068E14001140E14001069E14001570E14001070E14001352E14001071E14001360E14001600E14001072E14001090E14001073E14001189E14001074E14001075E14001076E14001077E14001078E14001392E14001403E14001079E14001375E14001080E14001196E14001506E14001081E14001434E14001082E14001162E14001083E14001137E14001084E14001359E14001384E14001085E14001421E14001559E14001285E14001397E14001086E14001525E14001087E14001127E14001088E14001274E14001330E14001533E14001089E14001229E14001414E14001091E14001092E14001096E14001097E14001093E14001094E14001099E14001100E14001095E14001098E14001101E14001382E14001102E14001450E14001103E14001145E14001459E14001104E14001105E14001106E14001244E14001567E14001107E14001183E14001108E14001166E14001109E14001391E14001110E14001111E14001112E14001329E14001113E14001114E14001343E14001288E14001364E14001115E14001116E14001363E14001429N05000012N05000006N05000007N05000010N05000008N05000018N05000009N05000015N05000011N05000017N05000016S14000060S14000061S14000063S14000070S14000065S14000066S14000067S14000107S14000062S14000091S14000108S14000069S14000109S14000072S14000097S14000073S14000074S14000075S14000071S14000076S14000086S14000077S14000092S14000104S14000078S14000096S14000021S14000080S14000079S14000082S14000081S14000027S14000064S14000083S14000084S14000085S14000087S14000088S14000089S14000106S14000101S14000090S14000100S14000093S14000094S14000098S14000110S14000099S14000068S14000095S14000045S14000048S14000103S14000105S14000051S14000102S14000111W07000112W07000082W07000094W07000111W07000098W07000097W07000103W07000108W07000081W07000089W07000091W07000090W07000107W07000109W07000101W07000104W07000105W07000083W07000096W07000095W07000102W07000093W07000100W07000087W07000085W07000099W07000106W07000084W07000086W07000092W07000088W07000110
\n", + "\n", + "
\n", + "Loading ITables v2.2.3 from the init_notebook_mode cell...\n", + "(need help?)
\n", + "\n" + ], + "text/plain": [ + " Unnamed: 0 E14001063 E14001064 E14001065 E14001066 E14001067 \\\n", + "0 E14000530 0.5 0.0 0 0.0 0.000000 \n", + "1 E14000531 0.0 0.5 0 0.0 0.000000 \n", + "2 E14000532 0.0 0.0 1 0.0 0.000000 \n", + "3 E14000533 0.0 0.0 0 0.5 0.000000 \n", + "4 E14000534 0.0 0.0 0 0.0 0.333333 \n", + ".. ... ... ... ... ... ... \n", + "645 W07000076 0.0 0.0 0 0.0 0.000000 \n", + "646 W07000077 0.0 0.0 0 0.0 0.000000 \n", + "647 W07000078 0.0 0.0 0 0.0 0.000000 \n", + "648 W07000079 0.0 0.0 0 0.0 0.000000 \n", + "649 W07000080 0.0 0.0 0 0.0 0.000000 \n", + "\n", + " E14001294 E14001366 E14001599 E14001068 ... W07000100 W07000087 \\\n", + "0 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "4 0.5 0.5 0.5 0.0 ... 0.0 0.0 \n", + ".. ... ... ... ... ... ... ... \n", + "645 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "646 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "647 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "648 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "649 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "\n", + " W07000085 W07000099 W07000106 W07000084 W07000086 W07000092 \\\n", + "0 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "1 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "2 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "3 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "4 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + ".. ... ... ... ... ... ... \n", + "645 0.0 0.0 0.0 0.25 0.0 0.000000 \n", + "646 0.0 0.0 0.0 0.25 0.0 0.000000 \n", + "647 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "648 0.0 0.0 0.0 0.00 0.0 0.333333 \n", + "649 0.0 0.0 0.0 0.00 0.0 0.000000 \n", + "\n", + " W07000088 W07000110 \n", + "0 0.0 0 \n", + "1 0.0 0 \n", + "2 0.0 0 \n", + "3 0.0 0 \n", + "4 0.0 0 \n", + ".. ... ... \n", + "645 0.5 0 \n", + "646 0.5 0 \n", + "647 0.0 1 \n", + "648 0.0 0 \n", + "649 0.0 0 \n", + "\n", + "[650 rows x 651 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from itables import init_notebook_mode, show\n", + "import itables.options as opt\n", + "opt.maxBytes = \"1MB\"\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/mapping_2010_to_2024/mapping_matrix.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Methodology\n", + "\n", + "This section describes our approach to creating accurate constituency-level estimates through three key components: a loss function for evaluating accuracy, a calibration process for optimizing weights, and the mathematical framework behind the optimization. To see how well this methodology performs in practice, you can check our detailed [validation results page](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/docs/validation/constituencies.ipynb) comparing our estimates against actual data at both constituency and national levels.\n", + "\n", + "### Loss function\n", + "\n", + "The file [`loss.py`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py) defines a function [`create_constituency_target_matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L18) that creates target matrices for comparing simulated data against actual constituency-level data. The following process outlines how the function processes:\n", + "\n", + "1. Takes three main input parameters: dataset (defaults to [`enhanced_frs_2022_23`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L19)), time_period (defaults to 2025), and an optional reform parameter for policy changes.\n", + "\n", + "2. Reads three files containing real data: [`age.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv), [`total_income.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv), and [`employment_income.csv`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv).\n", + "\n", + "3. Creates a PolicyEngine Microsimulation object using the specified dataset and reform parameters.\n", + "\n", + "4. Creates two main matrices: [`matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L33) for simulated values from PolicyEngine, and [`y`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L34) for actual target values from both HMRC (income data) and ONS (age data).\n", + "\n", + "5. Calculates total income metrics at the national level, computing both total amounts and counts of people with income.\n", + "\n", + "6. Processes age distributions by creating 10-year age bands from 0 to 80, calculating how many people fall into each band.\n", + "\n", + "7. Processes both counts and amounts for different income bands between £12,570 and £70,000, excluding people under 16 for employment income.\n", + "\n", + "8. Maps individual-level results to household level throughout the [`sim.map_result()`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L88) function.\n", + "\n", + "9. The function returns both the simulated matrix and the target matrix [`(matrix, y)`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py#L34) which can be used for comparing the simulation results against actual data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calibration function\n", + "\n", + "The file [`calibrate.py`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py) defines a main [`calibrate()`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L25) function that performs weight calibration for constituency-level analysis.\n", + "\n", + "1. It imports necessary functions and matrices from other files including [`create_constituency_target_matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L16), [`create_national_target_matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L17) from [`loss.py`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py), and [`transform_2010_to_2024`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/transform_constituencies.py#L7) for constituency boundary transformations.\n", + "\n", + "2. Sets up initial matrices using the [`create_constituency_target_matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L26) and [`create_national_target_matrix`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L28) functions for both constituency and national level data.\n", + "\n", + "3. Creates a Microsimulation object using the [`enhanced_frs_2022_23`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L32) dataset.\n", + "\n", + "4. Initializes weights for 650 constituencies x 100180 households, starting with the log of household weights divided by constituency count.\n", + "\n", + "5. Converts all the matrices and weights into PyTorch tensors to enable optimization.\n", + "\n", + "6. Defines a loss function that calculates and combines both constituency-level and national-level mean squared errors into a single loss value.\n", + "\n", + "7. Uses Adam optimizer with a learning rate of 0.1 to minimize the loss over 512 epochs.\n", + "\n", + "8. Every 100 epochs during optimization, it updates the weights using the mapping matrix from 2010 to 2024 constituencies and saves the current weights to a `weights.h5` file.\n", + "\n", + "9. Includes an [`update_weights()`](https://github.com/PolicyEngine/policyengine-uk-data/blob/7c782c4839a024c729350a3ff2c76922bf21b0d4/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py#L95) function that applies the constituency mapping matrix to transform the weights between different boundary definitions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Optimization mathematics\n", + "\n", + "In this part, we explain the mathematics behind the calibration process that we discussed above. The optimization uses a two-part loss function that balances constituency-level and national-level accuracy, combining both local and national targets into a single optimization problem. The mathematical formulation can be expressed as follows:\n", + "\n", + "For the constituency-level component, we have:\n", + "- A set of households ($j$) with known characteristics ($metrics_j$) like income, age, etc.\n", + "- A set of constituencies ($i$) with known target values ($y_c$) from official statistics\n", + "- Weights in log space ($w_{ij}$) that we need to optimize for each household in each constituency\n", + "\n", + "Using these components, we calculate predicted constituency-level statistics. For each constituency metric (e.g. total income), the predicted value is:\n", + "\n", + "$$ \\text{pred}_c = \\sum_j (\\exp(w_{ij}) \\times \\text{metrics}_j) $$\n", + "\n", + "where $\\text{metrics}_j$ represents the household-level characteristics for that specific metric (e.g. household income). We use exponential of weights to ensure they stay positive.\n", + "\n", + "To measure how well our predictions match the real constituency data, we calculate the constituency mean squared error:\n", + "\n", + "$$ \\text{MSE}_c = \\text{mean}((\\text{pred}_c / (1 + y_c) - 1)^2) $$\n", + "\n", + "where $y_c$ are the actual target values from official statistics for each constituency. We use relative error (dividing by $1 + y_c$) to make errors comparable across different scales of metrics.\n", + "\n", + "For the national component, we need to ensure our constituency-level adjustments don't distort national-level statistics. We aggregate across all constituencies:\n", + "\n", + "$$ \\text{pred}_n = \\sum_i (\\sum_j \\exp(w_{ij})) \\times \\text{metrics}_\\text{national} $$\n", + "\n", + "with corresponding mean squared error to measure deviation from national targets:\n", + "\n", + "$$ \\text{MSE}_n = \\text{mean}((\\text{pred}_n / (1 + y_n) - 1)^2) $$\n", + "\n", + "The total loss combines both constituency and national errors:\n", + "\n", + "$$ L = \\text{MSE}_c + \\text{MSE}_n $$\n", + "\n", + "We initialize the weights using the original household weights from the survey data:\n", + "\n", + "$$ w_{\\text{initial}} = \\ln(\\text{household}_{weight}/650) $$\n", + "\n", + "where 650 is the number of constituencies. These weights are then iteratively optimized using the Adam (Adaptive Moment Estimation) optimizer with a learning rate of 0.1. The optimization process runs for 512 epochs, with the weights being updated in each iteration:\n", + "\n", + "$$ w_{t+1} = w_t - 0.1 \\times \\nabla L(w_t) $$\n", + "\n", + "This formulation ensures that the optimized weights maintain both local consistency at the constituency level and global accuracy for national-level statistics. The Adam optimizer adaptively adjusts the weights to minimize both constituency-level and national-level errors simultaneously, providing efficient convergence through adaptive learning rates and momentum. The resulting optimized weights allow us to accurately reweight household survey data to match both constituency-level and national statistics to obtain accurate estimates of income distributions, demographics, and policy impacts for each parliamentary constituency while maintaining consistency with national totals." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example\n", + "\n", + "The following code demonstrates how to analyze and visualize median earnings across UK parliamentary constituencies using PolicyEngine:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```{code-block} python\n", + "# Import required libraries\n", + "from policyengine.utils.charts import *\n", + "from policyengine import Simulation\n", + "\n", + "# Initialize simulation for visualization\n", + "sim = Simulation(\n", + " country=\"uk\",\n", + " scope=\"macro\",\n", + " time_period=\"2025\",\n", + " options={\n", + " \"include_constituencies\": True, # Enable constituency-level analysis\n", + " }\n", + ")\n", + "\n", + "# Add fonts for visualization\n", + "add_fonts()\n", + "\n", + "# Define function to calculate median earnings for adults\n", + "def adult_earnings_median(sim):\n", + " # Filter for working age adults (18-65)\n", + " adult = sim.calculate(\"age\").between(18, 65)\n", + " # Get employment income\n", + " earnings = sim.calculate(\"employment_income\")\n", + " # Return median of positive earnings for adults\n", + " return earnings[earnings > 0][adult].quantile(0.5)\n", + "\n", + "# Create and display visualization of median earnings by constituency\n", + "sim.calculate(\n", + " \"macro/gov/local_areas/parliamentary_constituencies\",\n", + " metric=adult_earnings_median,\n", + " chart=True\n", + ").update_layout(\n", + " title=\"Median earnings of adults in parliamentary constituencies\",\n", + ")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code demonstrates how to:\n", + "\n", + "1. Load and process constituency-level data using PolicyEngine's microsimulation capabilities\n", + "\n", + "2. Calculate real household income and population metrics\n", + "\n", + "3. Apply constituency weights to generate accurate geographic distributions\n", + "\n", + "4. Create constituency-level visualizations of median earnings\n", + "\n", + "5. Filter for working-age adults (18-65) and positive earnings\n", + "\n", + "6. Generate an interactive visualization showing median earnings across parliamentary constituencies\n", + "\n", + "The figure below shows the simulated results, displaying median earnings data across UK parliamentary constituencies in a geographic representation. The color intensity indicates earnings levels, providing an intuitive visualization of how earnings vary across different regions of the UK." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "%{hovertext}

x=%{x}
y=%{y}
Value=%{marker.color}", + "hovertext": [ + "Aldershot", + "Aldridge-Brownhills", + "Altrincham and Sale West", + "Amber Valley", + "Arundel and South Downs", + "Ashfield", + "Ashford", + "Ashton-under-Lyne", + "Aylesbury", + "Banbury", + "Barking", + "Barnsley North", + "Barnsley South", + "Barrow and Furness", + "Basildon and Billericay", + "Basingstoke", + "Bassetlaw", + "Bath", + "Battersea", + "Beaconsfield", + "Beckenham and Penge", + "Bedford", + "Bermondsey and Old Southwark", + "Bethnal Green and Stepney", + "Beverley and Holderness", + "Bexhill and Battle", + "Bexleyheath and Crayford", + "Bicester and Woodstock", + "Birkenhead", + "Birmingham Edgbaston", + "Birmingham Erdington", + "Birmingham Hall Green and Moseley", + "Birmingham Hodge Hill and Solihull North", + "Birmingham Ladywood", + "Birmingham Northfield", + "Birmingham Perry Barr", + "Birmingham Selly Oak", + "Birmingham Yardley", + "Bishop Auckland", + "Blackburn", + "Blackley and Middleton South", + "Blackpool North and Fleetwood", + "Blackpool South", + "Blaydon and Consett", + "Blyth and Ashington", + "Bognor Regis and Littlehampton", + "Bolsover", + "Bolton North East", + "Bolton South and Walkden", + "Bolton West", + "Bootle", + "Boston and Skegness", + "Bournemouth East", + "Bournemouth West", + "Bracknell", + "Bradford East", + "Bradford South", + "Bradford West", + "Braintree", + "Brent East", + "Brent West", + "Brentford and Isleworth", + "Brentwood and Ongar", + "Bridgwater", + "Bridlington and The Wolds", + "Brigg and Immingham", + "Brighton Kemptown and Peacehaven", + "Brighton Pavilion", + "Bristol Central", + "Bristol East", + "Bristol North East", + "Bristol North West", + "Bristol South", + "Broadland and Fakenham", + "Bromley and Biggin Hill", + "Bromsgrove", + "Broxbourne", + "Broxtowe", + "Buckingham and Bletchley", + "Burnley", + "Burton and Uttoxeter", + "Bury North", + "Bury South", + "Bury St Edmunds and Stowmarket", + "Calder Valley", + "Camborne and Redruth", + "Cambridge", + "Cannock Chase", + "Canterbury", + "Carlisle", + "Carshalton and Wallington", + "Castle Point", + "Central Devon", + "Central Suffolk and North Ipswich", + "Chatham and Aylesford", + "Cheadle", + "Chelmsford", + "Chelsea and Fulham", + "Cheltenham", + "Chesham and Amersham", + "Chester North and Neston", + "Chester South and Eddisbury", + "Chesterfield", + "Chichester", + "Chingford and Woodford Green", + "Chippenham", + "Chipping Barnet", + "Chorley", + "Christchurch", + "Cities of London and Westminster", + "City of Durham", + "Clacton", + "Clapham and Brixton Hill", + "Colchester", + "Colne Valley", + "Congleton", + "Corby and East Northamptonshire", + "Coventry East", + "Coventry North West", + "Coventry South", + "Cramlington and Killingworth", + "Crawley", + "Crewe and Nantwich", + "Croydon East", + "Croydon South", + "Croydon West", + "Dagenham and Rainham", + "Darlington", + "Dartford", + "Daventry", + "Derby North", + "Derby South", + "Derbyshire Dales", + "Dewsbury and Batley", + "Didcot and Wantage", + "Doncaster Central", + "Doncaster East and the Isle of Axholme", + "Doncaster North", + "Dorking and Horley", + "Dover and Deal", + "Droitwich and Evesham", + "Dudley", + "Dulwich and West Norwood", + "Dunstable and Leighton Buzzard", + "Ealing Central and Acton", + "Ealing North", + "Ealing Southall", + "Earley and Woodley", + "Easington", + "East Grinstead and Uckfield", + "East Ham", + "East Hampshire", + "East Surrey", + "East Thanet", + "East Wiltshire", + "East Worthing and Shoreham", + "Eastbourne", + "Eastleigh", + "Edmonton and Winchmore Hill", + "Ellesmere Port and Bromborough", + "Eltham and Chislehurst", + "Ely and East Cambridgeshire", + "Enfield North", + "Epping Forest", + "Epsom and Ewell", + "Erewash", + "Erith and Thamesmead", + "Esher and Walton", + "Exeter", + "Exmouth and Exeter East", + "Fareham and Waterlooville", + "Farnham and Bordon", + "Faversham and Mid Kent", + "Feltham and Heston", + "Filton and Bradley Stoke", + "Finchley and Golders Green", + "Folkestone and Hythe", + "Forest of Dean", + "Frome and East Somerset", + "Fylde", + "Gainsborough", + "Gateshead Central and Whickham", + "Gedling", + "Gillingham and Rainham", + "Glastonbury and Somerton", + "Gloucester", + "Godalming and Ash", + "Goole and Pocklington", + "Gorton and Denton", + "Gosport", + "Grantham and Bourne", + "Gravesham", + "Great Grimsby and Cleethorpes", + "Great Yarmouth", + "Greenwich and Woolwich", + "Guildford", + "Hackney North and Stoke Newington", + "Hackney South and Shoreditch", + "Halesowen", + "Halifax", + "Hamble Valley", + "Hammersmith and Chiswick", + "Hampstead and Highgate", + "Harborough, Oadby and Wigston", + "Harlow", + "Harpenden and Berkhamsted", + "Harrogate and Knaresborough", + "Harrow East", + "Harrow West", + "Hartlepool", + "Harwich and North Essex", + "Hastings and Rye", + "Havant", + "Hayes and Harlington", + "Hazel Grove", + "Hemel Hempstead", + "Hendon", + "Henley and Thame", + "Hereford and South Herefordshire", + "Herne Bay and Sandwich", + "Hertford and Stortford", + "Hertsmere", + "Hexham", + "Heywood and Middleton North", + "High Peak", + "Hinckley and Bosworth", + "Hitchin", + "Holborn and St Pancras", + "Honiton and Sidmouth", + "Hornchurch and Upminster", + "Hornsey and Friern Barnet", + "Horsham", + "Houghton and Sunderland South", + "Hove and Portslade", + "Huddersfield", + "Huntingdon", + "Hyndburn", + "Ilford North", + "Ilford South", + "Ipswich", + "Isle of Wight East", + "Isle of Wight West", + "Islington North", + "Islington South and Finsbury", + "Jarrow and Gateshead East", + "Keighley and Ilkley", + "Kenilworth and Southam", + "Kensington and Bayswater", + "Kettering", + "Kingston and Surbiton", + "Kingston upon Hull East", + "Kingston upon Hull North and Cottingham", + "Kingston upon Hull West and Haltemprice", + "Kingswinford and South Staffordshire", + "Knowsley", + "Lancaster and Wyre", + "Leeds Central and Headingley", + "Leeds East", + "Leeds North East", + "Leeds North West", + "Leeds South", + "Leeds South West and Morley", + "Leeds West and Pudsey", + "Leicester East", + "Leicester South", + "Leicester West", + "Leigh and Atherton", + "Lewes", + "Lewisham East", + "Lewisham North", + "Lewisham West and East Dulwich", + "Leyton and Wanstead", + "Lichfield", + "Lincoln", + "Liverpool Garston", + "Liverpool Riverside", + "Liverpool Walton", + "Liverpool Wavertree", + "Liverpool West Derby", + "Loughborough", + "Louth and Horncastle", + "Lowestoft", + "Luton North", + "Luton South and South Bedfordshire", + "Macclesfield", + "Maidenhead", + "Maidstone and Malling", + "Makerfield", + "Maldon", + "Manchester Central", + "Manchester Rusholme", + "Manchester Withington", + "Mansfield", + "Melksham and Devizes", + "Melton and Syston", + "Meriden and Solihull East", + "Mid Bedfordshire", + "Mid Buckinghamshire", + "Mid Cheshire", + "Mid Derbyshire", + "Mid Dorset and North Poole", + "Mid Leicestershire", + "Mid Norfolk", + "Mid Sussex", + "Middlesbrough and Thornaby East", + "Middlesbrough South and East Cleveland", + "Milton Keynes Central", + "Milton Keynes North", + "Mitcham and Morden", + "Morecambe and Lunesdale", + "New Forest East", + "New Forest West", + "Newark", + "Newbury", + "Newcastle upon Tyne Central and West", + "Newcastle upon Tyne East and Wallsend", + "Newcastle upon Tyne North", + "Newcastle-under-Lyme", + "Newton Abbot", + "Newton Aycliffe and Spennymoor", + "Normanton and Hemsworth", + "North Bedfordshire", + "North Cornwall", + "North Cotswolds", + "North Devon", + "North Dorset", + "North Durham", + "North East Cambridgeshire", + "North East Derbyshire", + "North East Hampshire", + "North East Hertfordshire", + "North East Somerset and Hanham", + "North Herefordshire", + "North Norfolk", + "North Northumberland", + "North Shropshire", + "North Somerset", + "North Warwickshire and Bedworth", + "North West Cambridgeshire", + "North West Essex", + "North West Hampshire", + "North West Leicestershire", + "North West Norfolk", + "Northampton North", + "Northampton South", + "Norwich North", + "Norwich South", + "Nottingham East", + "Nottingham North and Kimberley", + "Nottingham South", + "Nuneaton", + "Old Bexley and Sidcup", + "Oldham East and Saddleworth", + "Oldham West, Chadderton and Royton", + "Orpington", + "Ossett and Denby Dale", + "Oxford East", + "Oxford West and Abingdon", + "Peckham", + "Pendle and Clitheroe", + "Penistone and Stocksbridge", + "Penrith and Solway", + "Peterborough", + "Plymouth Moor View", + "Plymouth Sutton and Devonport", + "Pontefract, Castleford and Knottingley", + "Poole", + "Poplar and Limehouse", + "Portsmouth North", + "Portsmouth South", + "Preston", + "Putney", + "Queen's Park and Maida Vale", + "Rawmarsh and Conisbrough", + "Rayleigh and Wickford", + "Reading Central", + "Reading West and Mid Berkshire", + "Redcar", + "Redditch", + "Reigate", + "Ribble Valley", + "Richmond and Northallerton", + "Richmond Park", + "Rochdale", + "Rochester and Strood", + "Romford", + "Romsey and Southampton North", + "Rossendale and Darwen", + "Rother Valley", + "Rotherham", + "Rugby", + "Ruislip, Northwood and Pinner", + "Runcorn and Helsby", + "Runnymede and Weybridge", + "Rushcliffe", + "Rutland and Stamford", + "Salford", + "Salisbury", + "Scarborough and Whitby", + "Scunthorpe", + "Sefton Central", + "Selby", + "Sevenoaks", + "Sheffield Brightside and Hillsborough", + "Sheffield Central", + "Sheffield Hallam", + "Sheffield Heeley", + "Sheffield South East", + "Sherwood Forest", + "Shipley", + "Shrewsbury", + "Sittingbourne and Sheppey", + "Skipton and Ripon", + "Sleaford and North Hykeham", + "Slough", + "Smethwick", + "Solihull West and Shirley", + "South Basildon and East Thurrock", + "South Cambridgeshire", + "South Cotswolds", + "South Derbyshire", + "South Devon", + "South Dorset", + "South East Cornwall", + "South Holland and The Deepings", + "South Leicestershire", + "South Norfolk", + "South Northamptonshire", + "South Ribble", + "South Shields", + "South Shropshire", + "South Suffolk", + "South West Devon", + "South West Hertfordshire", + "South West Norfolk", + "South West Wiltshire", + "Southampton Itchen", + "Southampton Test", + "Southend East and Rochford", + "Southend West and Leigh", + "Southgate and Wood Green", + "Southport", + "Spelthorne", + "Spen Valley", + "St Albans", + "St Austell and Newquay", + "St Helens North", + "St Helens South and Whiston", + "St Ives", + "St Neots and Mid Cambridgeshire", + "Stafford", + "Staffordshire Moorlands", + "Stalybridge and Hyde", + "Stevenage", + "Stockport", + "Stockton North", + "Stockton West", + "Stoke-on-Trent Central", + "Stoke-on-Trent North", + "Stoke-on-Trent South", + "Stone, Great Wyrley and Penkridge", + "Stourbridge", + "Stratford and Bow", + "Stratford-on-Avon", + "Streatham and Croydon North", + "Stretford and Urmston", + "Stroud", + "Suffolk Coastal", + "Sunderland Central", + "Surrey Heath", + "Sussex Weald", + "Sutton and Cheam", + "Sutton Coldfield", + "Swindon North", + "Swindon South", + "Tamworth", + "Tatton", + "Taunton and Wellington", + "Telford", + "Tewkesbury", + "The Wrekin", + "Thirsk and Malton", + "Thornbury and Yate", + "Thurrock", + "Tipton and Wednesbury", + "Tiverton and Minehead", + "Tonbridge", + "Tooting", + "Torbay", + "Torridge and Tavistock", + "Tottenham", + "Truro and Falmouth", + "Tunbridge Wells", + "Twickenham", + "Tynemouth", + "Uxbridge and South Ruislip", + "Vauxhall and Camberwell Green", + "Wakefield and Rothwell", + "Wallasey", + "Walsall and Bloxwich", + "Walthamstow", + "Warrington North", + "Warrington South", + "Warwick and Leamington", + "Washington and Gateshead South", + "Watford", + "Waveney Valley", + "Weald of Kent", + "Wellingborough and Rushden", + "Wells and Mendip Hills", + "Welwyn Hatfield", + "West Bromwich", + "West Dorset", + "West Ham and Beckton", + "West Lancashire", + "West Suffolk", + "West Worcestershire", + "Westmorland and Lonsdale", + "Weston-super-Mare", + "Wetherby and Easingwold", + "Whitehaven and Workington", + "Widnes and Halewood", + "Wigan", + "Wimbledon", + "Winchester", + "Windsor", + "Wirral West", + "Witham", + "Witney", + "Woking", + "Wokingham", + "Wolverhampton North East", + "Wolverhampton South East", + "Wolverhampton West", + "Worcester", + "Worsley and Eccles", + "Worthing West", + "Wycombe", + "Wyre Forest", + "Wythenshawe and Sale East", + "Yeovil", + "York Central", + "York Outer", + "Belfast East", + "Belfast North", + "Belfast South and Mid Down", + "Belfast West", + "East Antrim", + "East Londonderry", + "Fermanagh and South Tyrone", + "Foyle", + "Lagan Valley", + "Mid Ulster", + "Newry and Armagh", + "North Antrim", + "North Down", + "South Antrim", + "South Down", + "Strangford", + "Upper Bann", + "West Tyrone", + "East Renfrewshire", + "Na h-Eileanan an Iar", + "Midlothian", + "North Ayrshire and Arran", + "Orkney and Shetland", + "Aberdeen North", + "Aberdeen South", + "Aberdeenshire North and Moray East", + "Airdrie and Shotts", + "Alloa and Grangemouth", + "Angus and Perthshire Glens", + "Arbroath and Broughty Ferry", + "Argyll, Bute and South Lochaber", + "Bathgate and Linlithgow", + "Caithness, Sutherland and Easter Ross", + "Coatbridge and Bellshill", + "Cowdenbeath and Kirkcaldy", + "Cumbernauld and Kirkintilloch", + "Dumfries and Galloway", + "Dumfriesshire, Clydesdale and Tweeddale", + "Dundee Central", + "Dunfermline and Dollar", + "East Kilbride and Strathaven", + "Edinburgh East and Musselburgh", + "Edinburgh North and Leith", + "Edinburgh South", + "Edinburgh South West", + "Edinburgh West", + "Falkirk", + "Glasgow East", + "Glasgow North", + "Glasgow North East", + "Glasgow South", + "Glasgow South West", + "Glasgow West", + "Glenrothes and Mid Fife", + "Gordon and Buchan", + "Hamilton and Clyde Valley", + "Inverclyde and Renfrewshire West", + "Inverness, Skye and West Ross-shire", + "Livingston", + "Lothian East", + "Mid Dunbartonshire", + "Moray West, Nairn and Strathspey", + "Motherwell, Wishaw and Carluke", + "North East Fife", + "Paisley and Renfrewshire North", + "Paisley and Renfrewshire South", + "Perth and Kinross-shire", + "Rutherglen", + "Stirling and Strathallan", + "West Dunbartonshire", + "Ayr, Carrick and Cumnock", + "Berwickshire, Roxburgh and Selkirk", + "Central Ayrshire", + "Kilmarnock and Loudoun", + "West Aberdeenshire and Kincardine", + "Aberafan Maesteg", + "Alyn and Deeside", + "Bangor Aberconwy", + "Blaenau Gwent and Rhymney", + "Brecon, Radnor and Cwm Tawe", + "Bridgend", + "Caerfyrddin", + "Caerphilly", + "Cardiff East", + "Cardiff North", + "Cardiff South and Penarth", + "Cardiff West", + "Ceredigion Preseli", + "Clwyd East", + "Clwyd North", + "Dwyfor Meirionnydd", + "Gower", + "Llanelli", + "Merthyr Tydfil and Aberdare", + "Mid and South Pembrokeshire", + "Monmouthshire", + "Montgomeryshire and Glyndwr", + "Neath and Swansea East", + "Newport East", + "Newport West and Islwyn", + "Pontypridd", + "Rhondda and Ogmore", + "Swansea West", + "Torfaen", + "Vale of Glamorgan", + "Wrexham", + "Ynys Môn" + ], + "legendgroup": "", + "marker": { + "color": [ + 41423.47265625, + 34819.73046875, + 50368.54296875, + 35540.13653859061, + 38036.82334936957, + 33378.91015625, + 38001.53515625, + 34819.73046875, + 40823.1328125, + 43764.80078125, + 34519.5625, + 34519.5625, + 34519.5625, + 35420.0703125, + 40703.06640625, + 40823.1328125, + 36980.95703125, + 40823.1328125, + 60454.25390625, + 49948.3046875, + 47546.93792127848, + 44185.0390625, + 48807.65625, + 52109.52734375, + 36860.88671875, + 35420.0703125, + 41843.7109375, + 44004.938457836084, + 38001.53515625, + 38341.571060303155, + 33378.9140625, + 34519.5625, + 36860.88671875, + 34819.73046875, + 35239.96875, + 35720.2421875, + 34519.5625, + 34519.5625, + 34819.73046875, + 34579.59375, + 34519.5625, + 38421.7734375, + 33979.25390625, + 35420.0703125, + 35420.0703125, + 37060.44420507146, + 34519.55859375, + 35420.0703125, + 36860.88671875, + 35420.0703125, + 33138.77734375, + 35420.0703125, + 35239.96875, + 36020.41015625, + 44185.0390625, + 32838.609375, + 33378.9140625, + 32288.342418136006, + 44905.4453125, + 47366.83984375, + 40042.69140625, + 40823.1328125, + 50848.8125, + 34579.59375, + 35420.0703125, + 37101.0234375, + 38661.91015625, + 40042.69140625, + 39142.1796875, + 38327.67502989451, + 38001.53125, + 39142.1796875, + 37281.125, + 40042.69140625, + 49407.9921875, + 41423.47265625, + 44185.0390625, + 34579.59375, + 41843.7109375, + 32418.369140625, + 34579.59375, + 37281.125, + 35239.96875, + 37281.125, + 40703.06640625, + 35420.0703125, + 42083.84375, + 34519.5625, + 39142.1796875, + 34579.59375, + 42984.359375, + 39742.51953125, + 35540.136438683, + 39742.51953125, + 39322.28125, + 43524.662664472926, + 44125, + 58052.89453125, + 40703.06640625, + 44185.0390625, + 38421.7734375, + 39142.1796875, + 33559.015625, + 35900.33984375, + 45265.6484375, + 39022.11328125, + 50428.57421875, + 35900.33984375, + 37281.125, + 57392.515625, + 34819.73046875, + 36860.890625, + 51809.359375, + 37641.328125, + 35720.2421875, + 42984.359375, + 35420.0703125, + 33138.77734375, + 35420.0703125, + 33138.77734375, + 36860.890625, + 39142.1796875, + 38001.53125, + 43524.66105736767, + 43524.66105736767, + 40703.06640625, + 39142.1796875, + 35720.2421875, + 40703.06640625, + 38661.91015625, + 34579.59375, + 35420.0703125, + 39742.51953125, + 35239.96875, + 43524.661389119305, + 34519.5625, + 35720.2421875, + 34579.59375, + 44185.0390625, + 36020.40625, + 36020.41015625, + 34819.73046875, + 47967.1796875, + 36980.95703125, + 48027.21484375, + 38421.7734375, + 43824.83203125, + 43644.73046875, + 35720.2421875, + 42624.15234375, + 35420.0703125, + 44185.0390625, + 46706.46484375, + 36620.75, + 39742.51953125, + 38001.53515625, + 36980.95703125, + 41003.23046875, + 42624.15234375, + 38421.7734375, + 43524.66267877325, + 40703.06640625, + 42624.15234375, + 50308.50390625, + 44185.0390625, + 35420.0703125, + 43524.6640625, + 50608.67578125, + 36020.41015625, + 36980.95703125, + 40703.06640625, + 45265.6484375, + 38001.53515625, + 34819.73046875, + 39322.28125, + 52049.4921875, + 38421.7734375, + 38421.7734375, + 37641.328125, + 40703.06640625, + 38001.53515625, + 34819.73046875, + 35420.0703125, + 39742.51953125, + 37281.125, + 34579.59375, + 44185.0390625, + 37281.125, + 37281.125, + 38421.7734375, + 35720.2421875, + 36860.88671875, + 36860.890625, + 34579.59375, + 51329.08203125, + 41423.47265625, + 50283.6779735496, + 50283.6779735496, + 35420.0703125, + 35420.0703125, + 40042.69140625, + 50848.80859375, + 56792.18359375, + 40282.828125, + 46046.09375, + 45625.85546875, + 40042.69140625, + 41423.47265625, + 40042.69140625, + 35600.171875, + 37641.328125, + 35420.0703125, + 37281.125, + 40703.06640625, + 36020.41015625, + 44785.37890625, + 45085.546875, + 48327.38671875, + 34579.59375, + 37281.125, + 47126.703125, + 41423.47265625, + 38361.73828125, + 34579.59375, + 37641.328125, + 37641.328125, + 44185.0390625, + 57572.6171875, + 35420.0703125, + 44905.4453125, + 50308.50510633398, + 40823.1328125, + 34579.59375, + 40703.06640625, + 35420.0703125, + 38346.81146774556, + 35420.0703125, + 40703.06640625, + 38001.53125, + 35420.0703125, + 33498.984375, + 33498.984375, + 48927.7265625, + 51809.359375, + 34579.59375, + 40703.06640625, + 41843.7109375, + 60994.5703125, + 35420.0703125, + 52529.765625, + 33138.77734375, + 34519.5625, + 35720.2421875, + 37281.125, + 35420.0703125, + 38001.53125, + 35239.96875, + 35420.0703125, + 35720.2421875, + 40042.69140625, + 35239.96875, + 35420.0703125, + 35720.2421875, + 32118.19921875, + 33378.9140625, + 34339.45703125, + 35600.171875, + 39322.28125, + 44185.0390625, + 42804.25390625, + 46046.09375, + 43764.80078125, + 38361.73828125, + 33498.984375, + 40042.69140625, + 35420.0703125, + 35720.2421875, + 37641.328125, + 36680.78515625, + 35720.2421875, + 36860.88671875, + 37281.125, + 36680.78515625, + 36980.95703125, + 40703.06640625, + 46046.09375, + 37641.328125, + 35720.2421875, + 41843.7109375, + 34579.59375, + 34519.5625, + 39142.1796875, + 34159.35546875, + 37641.328125, + 37281.125, + 41603.57421875, + 42083.84375, + 41843.7109375, + 42083.84375, + 36860.88671875, + 37281.125, + 37281.125, + 35420.0703125, + 41543.54296875, + 35420.0703125, + 33679.0859375, + 40703.06640625, + 40703.06640625, + 47186.73828125, + 36680.78515625, + 38421.7734375, + 40643.0234375, + 39742.51953125, + 44125, + 36860.890625, + 34519.5625, + 36010.911138705706, + 37281.125, + 33559.015625, + 36860.890625, + 34519.5625, + 43764.80078125, + 34519.5625, + 39322.28125, + 37641.328125, + 35600.171875, + 34579.59375, + 36020.41015625, + 34519.55859375, + 42564.12109375, + 44185.0390625, + 36980.95703125, + 34579.59375, + 38001.53125, + 35720.2421875, + 34819.73046875, + 43524.66335780143, + 37281.125, + 36020.41015625, + 53490.3125, + 43213.00735171613, + 39022.11328125, + 34579.59375, + 35239.96875, + 39142.1796875, + 37281.125, + 37281.125, + 33378.91015625, + 34579.59375, + 33378.9140625, + 36680.78515625, + 46046.09375, + 36020.41015625, + 34519.5625, + 49708.1640625, + 34579.59375, + 39322.28125, + 40643.0234375, + 48807.65625, + 34579.59375, + 39502.37890625, + 36020.41015625, + 36020.41015625, + 33378.9140625, + 32478.40625, + 34519.5625, + 36020.41015625, + 52049.4921875, + 34819.73046875, + 36860.890625, + 38267.35794199639, + 57752.725900697464, + 51809.359375, + 34819.73046875, + 38001.53515625, + 40823.1328125, + 42624.15234375, + 33559.015625, + 36020.41015625, + 47126.703125, + 37281.125, + 34579.59375, + 59853.91796875, + 35360.0390625, + 40042.69140625, + 44185.0390625, + 43764.80078125, + 34579.59375, + 34579.59375, + 34579.59375, + 40703.06640625, + 45625.85546875, + 38361.73828125, + 50608.67578125, + 43644.73046875, + 37641.328125, + 34579.59375, + 38001.53515625, + 34819.73046875, + 35420.0703125, + 39142.1796875, + 39322.28125, + 42624.15234375, + 34579.59375, + 37581.296875, + 39742.51953125, + 39742.51953125, + 35900.34375, + 38303.52042788706, + 40703.06640625, + 37281.125, + 34819.73046875, + 40042.69140625, + 35420.0703125, + 36860.890625, + 34519.5625, + 41603.57421875, + 39322.28125, + 42984.359375, + 40042.69140625, + 39322.28125, + 35420.0703125, + 37821.43359375, + 36860.88671875, + 34819.73046875, + 39502.37890625, + 35420.0703125, + 38661.91015625, + 38001.53125, + 34579.59375, + 36860.88671875, + 40042.69140625, + 38308.95661417534, + 45085.546875, + 34579.59375, + 37281.125, + 34579.59375, + 34519.5625, + 39562.4140625, + 38001.53125, + 44185.0390625, + 38361.73828125, + 43464.62890625, + 35239.96875, + 55291.33203125, + 34579.59375, + 34819.73046875, + 35420.0703125, + 34819.73046875, + 40703.06640625, + 39562.41796875, + 40703.06640625, + 34639.62890625, + 39142.1796875, + 34519.5625, + 36620.75, + 36860.890625, + 35239.96875, + 36860.890625, + 37281.125, + 40042.69140625, + 34819.73046875, + 50288.55778180683, + 44185.0390625, + 41423.47265625, + 36860.890625, + 40042.69140625, + 37040.984375, + 33559.015625, + 45926.0234375, + 40282.828125, + 42984.359375, + 43644.73046875, + 39022.11328125, + 39022.11328125, + 38361.73828125, + 41423.47265625, + 35600.171875, + 35420.0703125, + 38001.53125, + 36620.75, + 34579.59375, + 39322.28125, + 38421.7734375, + 34579.59375, + 35239.96875, + 40703.06640625, + 49708.1640625, + 34519.5625, + 38001.53515625, + 48327.38671875, + 34579.59375, + 47366.83984375, + 42564.12109375, + 35900.34765625, + 44185.0390625, + 51809.359375, + 37281.125, + 34579.59375, + 34519.5625, + 46706.46484375, + 40042.69140625, + 40042.69140625, + 44125, + 35239.96875, + 40703.06640625, + 39022.11328125, + 39322.28125, + 34579.59375, + 36860.890625, + 43644.73046875, + 36020.41015625, + 35420.0703125, + 39322.28125, + 35720.2421875, + 38001.53125, + 40042.69140625, + 38001.53515625, + 34579.59375, + 37281.125, + 39022.11328125, + 36860.88671875, + 34819.73046875, + 51809.359375, + 42984.359375, + 42984.359375, + 36860.890625, + 40823.1328125, + 42624.15234375, + 45745.91796875, + 44305.10546875, + 34519.5625, + 33498.984375, + 34819.73046875, + 34819.73046875, + 35720.2421875, + 40042.69140625, + 40703.06640625, + 36020.41015625, + 35420.0703125, + 37941.5, + 36860.88671875, + 36860.88671875, + 35420.0703125, + 34519.5625, + 36020.41015625, + 35900.34375, + 35420.0703125, + 34819.73046875, + 36860.88671875, + 35720.2421875, + 36860.88671875, + 36860.88671875, + 34819.73046875, + 36860.88671875, + 36860.88671875, + 35420.0703125, + 36860.88671875, + 34579.59375, + 36860.890625, + 36860.88671875, + 43644.73046875, + 40042.69140625, + 39742.51953125, + 35420.0703125, + 42564.12109375, + 39742.51953125, + 38421.7734375, + 35239.96875, + 38001.53125, + 39142.1796875, + 36860.88671875, + 34579.59375, + 36860.88671875, + 39322.28125, + 38001.53515625, + 38001.53125, + 36860.88671875, + 39322.28125, + 34579.59375, + 36860.88671875, + 34579.59375, + 38305.303871994074, + 40042.69140625, + 39322.28125, + 41543.54296875, + 41423.47265625, + 44905.4453125, + 41543.54296875, + 37941.5, + 38421.7734375, + 40282.828125, + 39322.28125, + 40643.0234375, + 38349.69803445856, + 40943.20044377309, + 35420.0703125, + 39022.11328125, + 40042.69140625, + 36080.4453125, + 37281.125, + 39322.28125, + 40282.828125, + 40703.06640625, + 37581.296875, + 38361.73828125, + 34879.765625, + 35900.34765625, + 37101.0234375, + 40042.69140625, + 40042.69140625, + 40042.69140625, + 38001.53125, + 38001.53515625, + 34519.5625, + 40462.9375, + 34519.5625, + 44185.0390625, + 36620.75, + 38001.53125, + 38001.53125, + 35420.0703125, + 34519.5625, + 35900.34375, + 39742.51953125, + 36860.88671875, + 35420.0703125, + 38361.73828125, + 35720.2421875, + 36680.78515625, + 38902.0390625, + 36860.890625, + 36260.546875, + 37521.2578125, + 39322.28125, + 40282.828125, + 34519.5625, + 38121.6015625, + 34519.5625, + 36980.95703125, + 37040.98828125, + 34519.5625, + 39022.109375, + 35360.0390625, + 35420.0703125, + 39742.51953125, + 36860.890625, + 36860.88671875, + 35720.2421875, + 36860.88671875 + ], + "coloraxis": "coloraxis", + "line": { + "color": "lightgray", + "width": 0 + }, + "size": 10, + "symbol": "hexagon" + }, + "mode": "markers", + "name": "", + "orientation": "v", + "showlegend": false, + "type": "scatter", + "x": [ + 55.5, + 55.5, + 52, + 58, + 60.5, + 60, + 71.5, + 54, + 60, + 58, + 67.5, + 57, + 58, + 53.5, + 66.5, + 55, + 60.5, + 50.5, + 62, + 57, + 65, + 62.5, + 63.5, + 65, + 63.5, + 69.5, + 67, + 58.5, + 49, + 53, + 54, + 54.5, + 55, + 53.5, + 53.5, + 53, + 54, + 55.5, + 53.5, + 53, + 53, + 52.5, + 51.5, + 54.5, + 54.5, + 62.5, + 59.5, + 52, + 51.5, + 51, + 48.5, + 63.5, + 52, + 51.5, + 56, + 57.5, + 56, + 56.5, + 67, + 60.5, + 59.5, + 59.5, + 66, + 48, + 62.5, + 61.5, + 67, + 66.5, + 50.5, + 51.5, + 51, + 49.5, + 51, + 66, + 66.5, + 52, + 66, + 59, + 59.5, + 55, + 55.5, + 53, + 52.5, + 68, + 55.5, + 43, + 64.5, + 54, + 71, + 52.5, + 62, + 68.5, + 46.5, + 68, + 68.5, + 54.5, + 67, + 60.5, + 51.5, + 58.5, + 49.5, + 51, + 58.5, + 59.5, + 64, + 52, + 61.5, + 52.5, + 52.5, + 62.5, + 54.5, + 68.5, + 61.5, + 67.5, + 55, + 54, + 61.5, + 57, + 56, + 56.5, + 55.5, + 68.5, + 53, + 64.5, + 64, + 63, + 67, + 55, + 67.5, + 59.5, + 57.5, + 56.5, + 56.5, + 56.5, + 53.5, + 60, + 61, + 60.5, + 59, + 72, + 53.5, + 51, + 62.5, + 62, + 59, + 58.5, + 58, + 55.5, + 56.5, + 69, + 66.5, + 55, + 67, + 71, + 53, + 64.5, + 69, + 54, + 63.5, + 50, + 66, + 65.5, + 62, + 67, + 60, + 58.5, + 66.5, + 57.5, + 47.5, + 48, + 55, + 55.5, + 70.5, + 58.5, + 50, + 61, + 70.5, + 50, + 50, + 51, + 61, + 56, + 60.5, + 69.5, + 49, + 51, + 56.5, + 61, + 54.5, + 57, + 62.5, + 68, + 62.5, + 67, + 65.5, + 56, + 63.5, + 64, + 51, + 55, + 56, + 60, + 61.5, + 61, + 66.5, + 61.5, + 58.5, + 60, + 59, + 58.5, + 69, + 70, + 58.5, + 57.5, + 55, + 63.5, + 60.5, + 58, + 50.5, + 71.5, + 65.5, + 65.5, + 53, + 53.5, + 56, + 57.5, + 63.5, + 62, + 49, + 66, + 62.5, + 61.5, + 57, + 65.5, + 55.5, + 63, + 54, + 64.5, + 65, + 67.5, + 54, + 53, + 62.5, + 63, + 56.5, + 56, + 55.5, + 61, + 60.5, + 58.5, + 62.5, + 62, + 61.5, + 51.5, + 50, + 53.5, + 59.5, + 60.5, + 59, + 58, + 59, + 58, + 58.5, + 59.5, + 60, + 59, + 51, + 68, + 65.5, + 64.5, + 65, + 64, + 56, + 62, + 50, + 48.5, + 49, + 49, + 49.5, + 58.5, + 63, + 67.5, + 63, + 62.5, + 55.5, + 56.5, + 69, + 50.5, + 69, + 53.5, + 53, + 53.5, + 61, + 51.5, + 61, + 55, + 61.5, + 59, + 52, + 57, + 50, + 58, + 64.5, + 68, + 57, + 59, + 60.5, + 61, + 61, + 54, + 54, + 53, + 61.5, + 54, + 54, + 55.5, + 55, + 51.5, + 47, + 55.5, + 59, + 62, + 45, + 53, + 46, + 50.5, + 54, + 64, + 57.5, + 55.5, + 64.5, + 50, + 51.5, + 65, + 53.5, + 50, + 49, + 56.5, + 63.5, + 66, + 54, + 58, + 63.5, + 60.5, + 60, + 65.5, + 66, + 60, + 59.5, + 59, + 57, + 67, + 54.5, + 53.5, + 66, + 57.5, + 57.5, + 57, + 64, + 55.5, + 56, + 52, + 63, + 46, + 46.5, + 59.5, + 51, + 66, + 58, + 57.5, + 52, + 61, + 61.5, + 59.5, + 67.5, + 55, + 54.5, + 58, + 53, + 67.5, + 54.5, + 56.5, + 59, + 54, + 69, + 65.5, + 53.5, + 54.5, + 60, + 58.5, + 57.5, + 59.5, + 50.5, + 57, + 61.5, + 62, + 52.5, + 52, + 61, + 60.5, + 49.5, + 60, + 67.5, + 57.5, + 58, + 56.5, + 57, + 59, + 62, + 57, + 50.5, + 70, + 57.5, + 62.5, + 56, + 52.5, + 54.5, + 67.5, + 65, + 52.5, + 57, + 48, + 50.5, + 45.5, + 63, + 58.5, + 67, + 59, + 51.5, + 57.5, + 50, + 68.5, + 47, + 61, + 65, + 51, + 54.5, + 53.5, + 68.5, + 68, + 63, + 50, + 57.5, + 57, + 64.5, + 44.5, + 50, + 49.5, + 42.5, + 64, + 53.5, + 56, + 55.5, + 64, + 54, + 57.5, + 56, + 54.5, + 55, + 55, + 52.5, + 50.5, + 64.5, + 54, + 63.5, + 51.5, + 52, + 69, + 58, + 57, + 69.5, + 59.5, + 56, + 53, + 52.5, + 56.5, + 51.5, + 48.5, + 52, + 52.5, + 51, + 59.5, + 50.5, + 66.5, + 52, + 47, + 68, + 60.5, + 47.5, + 45.5, + 62, + 44, + 68.5, + 58, + 56, + 58, + 63, + 58.5, + 48, + 54.5, + 63, + 51, + 50.5, + 55, + 55, + 65, + 66.5, + 70, + 62.5, + 49.5, + 65, + 51.5, + 49.5, + 65.5, + 49, + 66.5, + 52, + 53, + 48.5, + 61.5, + 52.5, + 50.5, + 50.5, + 60, + 54.5, + 56.5, + 48.5, + 68, + 56, + 56.5, + 54.5, + 53, + 53.5, + 52.5, + 52.5, + 52, + 63.5, + 57.5, + 50, + 52.5, + 49.5, + 60, + 60.5, + 45, + 44.5, + 44.5, + 44, + 45, + 43, + 42, + 42, + 43.5, + 42.5, + 44, + 44, + 45.5, + 43.5, + 45.5, + 46, + 42.5, + 41.5, + 48, + 46.5, + 52, + 47.5, + 50.5, + 52, + 51.5, + 51, + 50, + 50, + 50, + 52, + 49, + 51, + 49.5, + 49.5, + 52, + 49.5, + 51, + 52, + 49.5, + 51, + 48, + 53.5, + 53, + 52.5, + 51.5, + 52, + 50.5, + 50.5, + 49, + 50, + 49, + 49.5, + 48.5, + 51.5, + 49.5, + 50.5, + 47.5, + 49, + 51, + 53, + 49, + 48.5, + 51.5, + 50.5, + 48, + 48.5, + 51, + 48.5, + 48.5, + 48, + 49, + 52.5, + 47.5, + 50, + 50.5, + 45.5, + 49, + 47, + 49, + 49.5, + 46, + 48.5, + 49, + 48, + 47.5, + 47.5, + 47, + 47.5, + 48.5, + 47.5, + 48, + 44, + 44.5, + 48.5, + 43.5, + 49.5, + 49, + 47, + 49, + 48.5, + 48, + 46.5, + 45, + 49.5, + 46.5, + 49.5, + 46 + ], + "xaxis": "x", + "y": [ + -40, + -30, + -25, + -27, + -44, + -27, + -42, + -23, + -35, + -33, + -38, + -23, + -23, + -16, + -34, + -39, + -26, + -40, + -41, + -37, + -43, + -32, + -40, + -39, + -22, + -44, + -39, + -34, + -27, + -33, + -31, + -32, + -31, + -32, + -34, + -31, + -33, + -32, + -14, + -19, + -23, + -18, + -18, + -14, + -12, + -44, + -26, + -21, + -22, + -21, + -22, + -26, + -43, + -42, + -39, + -20, + -21, + -20, + -31, + -38, + -38, + -40, + -33, + -41, + -20, + -24, + -45, + -44, + -38, + -38, + -37, + -38, + -39, + -27, + -42, + -33, + -35, + -27, + -34, + -19, + -28, + -21, + -22, + -31, + -20, + -45, + -30, + -29, + -41, + -14, + -43, + -36, + -42, + -29, + -40, + -26, + -33, + -40, + -36, + -36, + -28, + -27, + -26, + -44, + -35, + -39, + -36, + -20, + -42, + -40, + -16, + -32, + -42, + -32, + -23, + -27, + -30, + -33, + -33, + -34, + -12, + -44, + -27, + -42, + -43, + -43, + -37, + -17, + -40, + -32, + -28, + -28, + -26, + -22, + -38, + -23, + -23, + -22, + -43, + -41, + -36, + -31, + -42, + -33, + -39, + -38, + -39, + -36, + -16, + -43, + -38, + -41, + -43, + -39, + -41, + -44, + -45, + -41, + -36, + -27, + -41, + -30, + -35, + -35, + -43, + -28, + -40, + -42, + -42, + -43, + -43, + -42, + -40, + -40, + -37, + -37, + -42, + -35, + -41, + -19, + -25, + -15, + -28, + -40, + -41, + -35, + -42, + -21, + -24, + -43, + -28, + -39, + -24, + -27, + -40, + -41, + -38, + -39, + -33, + -21, + -43, + -39, + -38, + -31, + -32, + -34, + -18, + -37, + -37, + -16, + -31, + -43, + -44, + -38, + -25, + -34, + -36, + -35, + -34, + -40, + -32, + -34, + -13, + -20, + -25, + -30, + -32, + -39, + -43, + -37, + -36, + -44, + -15, + -44, + -22, + -31, + -19, + -36, + -37, + -30, + -45, + -45, + -38, + -39, + -14, + -19, + -34, + -39, + -30, + -42, + -22, + -21, + -22, + -30, + -23, + -18, + -20, + -20, + -19, + -19, + -21, + -21, + -20, + -30, + -31, + -31, + -25, + -45, + -42, + -40, + -41, + -37, + -29, + -25, + -25, + -24, + -23, + -25, + -24, + -30, + -25, + -28, + -33, + -34, + -26, + -36, + -41, + -22, + -33, + -24, + -25, + -26, + -27, + -40, + -29, + -33, + -32, + -35, + -27, + -27, + -43, + -31, + -28, + -43, + -17, + -17, + -34, + -33, + -43, + -17, + -43, + -43, + -26, + -37, + -13, + -14, + -13, + -28, + -43, + -16, + -23, + -31, + -43, + -37, + -41, + -42, + -15, + -29, + -26, + -38, + -32, + -39, + -34, + -27, + -12, + -29, + -39, + -32, + -30, + -31, + -39, + -29, + -28, + -32, + -33, + -28, + -29, + -29, + -28, + -29, + -31, + -41, + -22, + -22, + -43, + -22, + -34, + -35, + -41, + -18, + -23, + -15, + -29, + -43, + -44, + -22, + -43, + -39, + -43, + -44, + -19, + -41, + -40, + -24, + -34, + -37, + -36, + -17, + -35, + -44, + -18, + -18, + -41, + -21, + -39, + -36, + -40, + -20, + -25, + -24, + -32, + -36, + -28, + -41, + -28, + -29, + -24, + -41, + -19, + -24, + -20, + -21, + -42, + -24, + -25, + -24, + -25, + -25, + -27, + -19, + -30, + -39, + -18, + -26, + -37, + -32, + -34, + -36, + -31, + -38, + -29, + -45, + -44, + -44, + -27, + -32, + -29, + -33, + -20, + -14, + -31, + -30, + -45, + -35, + -29, + -41, + -42, + -42, + -34, + -35, + -35, + -19, + -40, + -21, + -34, + -44, + -21, + -22, + -46, + -31, + -28, + -27, + -24, + -33, + -25, + -16, + -17, + -28, + -27, + -29, + -28, + -32, + -38, + -35, + -42, + -24, + -37, + -29, + -15, + -39, + -42, + -42, + -31, + -39, + -40, + -30, + -26, + -42, + -29, + -36, + -29, + -18, + -36, + -36, + -31, + -41, + -41, + -42, + -44, + -42, + -37, + -45, + -42, + -41, + -13, + -37, + -41, + -22, + -27, + -30, + -37, + -23, + -24, + -35, + -15, + -35, + -28, + -41, + -30, + -40, + -33, + -32, + -44, + -38, + -21, + -30, + -35, + -15, + -40, + -20, + -16, + -26, + -20, + -41, + -40, + -38, + -28, + -33, + -35, + -40, + -38, + -29, + -30, + -30, + -34, + -23, + -44, + -36, + -33, + -26, + -42, + -19, + -18, + -17, + -16, + -18, + -17, + -15, + -15, + -17, + -15, + -18, + -16, + -19, + -15, + -16, + -16, + -18, + -17, + -18, + -16, + -11, + -2, + -11, + -10, + 0, + -3, + -4, + -3, + -11, + -7, + -5, + -5, + -5, + -9, + -2, + -12, + -7, + -8, + -13, + -13, + -6, + -7, + -13, + -10, + -9, + -10, + -10, + -9, + -8, + -10, + -9, + -9, + -11, + -10, + -8, + -6, + -4, + -12, + -8, + -3, + -11, + -11, + -7, + -4, + -12, + -6, + -9, + -10, + -5, + -12, + -6, + -7, + -13, + -12, + -12, + -13, + -4, + -36, + -29, + -31, + -33, + -32, + -37, + -32, + -35, + -37, + -36, + -38, + -37, + -34, + -30, + -30, + -31, + -37, + -36, + -34, + -36, + -36, + -31, + -35, + -37, + -36, + -35, + -36, + -37, + -34, + -38, + -30, + -29 + ], + "yaxis": "y" + } + ], + "layout": { + "annotations": [ + { + "showarrow": false, + "text": "Source: PolicyEngine tax-benefit microsimulation model (version 2.15.1)", + "x": 0, + "xanchor": "left", + "xref": "paper", + "y": -0.15, + "yanchor": "bottom", + "yref": "paper" + } + ], + "coloraxis": { + "cmax": 60994.5703125, + "cmin": 0, + "colorbar": { + "title": { + "text": "Value" + } + }, + "colorscale": [ + [ + 0, + "lightgray" + ], + [ + 1, + "#2C6496" + ] + ] + }, + "font": { + "color": "black", + "family": "Roboto Serif" + }, + "height": 600, + "images": [ + { + "sizex": 0.15, + "sizey": 0.15, + "source": "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png", + "x": 1.1, + "xanchor": "right", + "xref": "paper", + "y": -0.15, + "yanchor": "bottom", + "yref": "paper" + } + ], + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "modebar": { + "activecolor": "lightgray", + "bgcolor": "lightgray", + "color": "lightgray" + }, + "paper_bgcolor": "lightgray", + "plot_bgcolor": "lightgray", + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Median earnings of adults in parliamentary constituencies" + }, + "width": 800, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "gridcolor": "lightgray", + "range": [ + 30, + 85 + ], + "tickvals": [], + "title": { + "text": "" + }, + "zerolinecolor": "lightgray" + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "gridcolor": "lightgray", + "range": [ + -50, + 2 + ], + "tickvals": [], + "title": { + "text": "" + }, + "zerolinecolor": "lightgray" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from policyengine.utils.charts import *\n", + "from policyengine import Simulation\n", + "\n", + "\n", + "sim = Simulation(\n", + " country=\"uk\",\n", + " scope=\"macro\",\n", + " time_period=\"2025\",\n", + " options={\n", + " \"include_constituencies\": True,\n", + " }\n", + ")\n", + "\n", + "add_fonts()\n", + "\n", + "def adult_earnings_median(sim):\n", + " adult = sim.calculate(\"age\").between(18, 65)\n", + " earnings = sim.calculate(\"employment_income\")\n", + " return earnings[earnings > 0][adult].quantile(0.5)\n", + "\n", + "sim.calculate(\n", + " \"macro/gov/local_areas/parliamentary_constituencies\",\n", + " metric=adult_earnings_median,\n", + " chart=True\n", + ").update_layout(\n", + " title=\"Median earnings of adults in parliamentary constituencies\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/pictures/earning_dist.png b/docs/pictures/earning_dist.png new file mode 100644 index 0000000..cde5b6b Binary files /dev/null and b/docs/pictures/earning_dist.png differ diff --git a/docs/pictures/nomis_screenshot1.png b/docs/pictures/nomis_screenshot1.png new file mode 100644 index 0000000..29e014c Binary files /dev/null and b/docs/pictures/nomis_screenshot1.png differ diff --git a/docs/pictures/parliamentary_earnings.png b/docs/pictures/parliamentary_earnings.png new file mode 100644 index 0000000..2358f7d Binary files /dev/null and b/docs/pictures/parliamentary_earnings.png differ diff --git a/docs/validation/constituencies.ipynb b/docs/validation/constituencies.ipynb index 3cb2439..ca4784c 100644 --- a/docs/validation/constituencies.ipynb +++ b/docs/validation/constituencies.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -53,7 +53,7 @@ "text/html": [ "\n", - "
\n", + "
\n", "\n", "\n" ], "text/plain": [ @@ -168,11 +168,10 @@ "from policyengine_uk import Microsimulation\n", "import pandas as pd\n", "import h5py\n", - "import numpy as np\n", - "import sys\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", "from pathlib import Path\n", + "from policyengine.utils.huggingface import download\n", "from policyengine_uk_data.storage import STORAGE_FOLDER\n", "\n", "opt.maxBytes = \"1MB\"\n", @@ -180,8 +179,16 @@ "\n", "REPO = Path(\".\").resolve().parent\n", "\n", - "with h5py.File(STORAGE_FOLDER / \"constituencies.h5\", \"r\") as f:\n", - " weights = f[\"2025\"][:]\n", + "weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n", + "with h5py.File(weights_file_path, \"r\") as f:\n", + " weights = f[str(2025)][...]\n", + "constituency_names_file_path = download(\n", + " repo=\"policyengine/policyengine-uk-data\",\n", + " repo_filename=\"constituencies_2024.csv\",\n", + " local_folder=None,\n", + " version=None,\n", + ")\n", + "constituencies_2024 = pd.read_csv(constituency_names_file_path)\n", "\n", "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", @@ -190,7 +197,6 @@ "\n", "constituency_target_matrix, constituency_actuals = create_constituency_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", "national_target_matrix, national_actuals = create_national_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", - "constituencies_2024 = pd.read_csv(STORAGE_FOLDER / \"constituencies_2024.csv\")\n", "\n", "constituency_wide = weights @ constituency_target_matrix\n", "constituency_wide.index = constituencies_2024.code.values\n", @@ -221,13 +227,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\n", + "
\n", "\n", " \n", " \n", @@ -344,9 +350,9 @@ "\n" ], "text/plain": [ - " index name \\\n", - "11360 E14001373 New Forest East \n", - "4325 E14001488 South Leicestershire \n", - "821 E14001234 Farnham and Bordon \n", - "2676 E14001139 Broxbourne \n", - "10790 E14001453 Rugby \n", - "... ... ... \n", - "6697 E14001260 Hackney South and Shoreditch \n", - "7249 E14001162 Chesham and Amersham \n", - "6599 E14001162 Chesham and Amersham \n", - "7388 E14001301 Ilford South \n", - "6738 E14001301 Ilford South \n", + " index name \\\n", + "10276 E14001589 Wirral West \n", + "5283 E14001146 Bury St Edmunds and Stowmarket \n", + "4154 E14001317 Knowsley \n", + "5855 E14001068 Ashfield \n", + "5197 W07000110 Vale of Glamorgan \n", + "... ... ... \n", + "6842 E14001405 North West Norfolk \n", + "7791 W07000104 Newport East \n", + "7141 W07000104 Newport East \n", + "6643 E14001206 Dunstable and Leighton Buzzard \n", + "7293 E14001206 Dunstable and Leighton Buzzard \n", "\n", " metric estimate target \\\n", - "11360 hmrc/employment_income/amount/30000_40000 2.509288e+08 2.509499e+08 \n", - "4325 age/40_50 1.193506e+04 1.193367e+04 \n", - "821 hmrc/total_income/count 6.737637e+04 6.736803e+04 \n", - "2676 age/20_30 1.162183e+04 1.162016e+04 \n", - "10790 hmrc/employment_income/count/30000_40000 7.828644e+03 7.830460e+03 \n", + "10276 hmrc/employment_income/amount/20000_30000 1.322333e+08 1.322420e+08 \n", + "5283 age/60_70 1.451102e+04 1.450993e+04 \n", + "4154 age/40_50 1.269529e+04 1.269403e+04 \n", + "5855 age/70_80 1.078475e+04 1.078346e+04 \n", + "5197 age/50_60 1.489186e+04 1.489368e+04 \n", "... ... ... ... \n", - "6697 hmrc/employment_income/count/12570_15000 1.436137e+03 4.885362e+01 \n", - "7249 hmrc/employment_income/amount/12570_15000 1.991328e+07 6.456983e+05 \n", - "6599 hmrc/employment_income/count/12570_15000 1.435062e+03 4.649818e+01 \n", - "7388 hmrc/employment_income/amount/12570_15000 1.991574e+07 6.234373e+05 \n", - "6738 hmrc/employment_income/count/12570_15000 1.435239e+03 4.489512e+01 \n", + "6842 hmrc/employment_income/count/12570_15000 8.755749e+02 8.431956e+01 \n", + "7791 hmrc/employment_income/amount/12570_15000 1.544117e+07 1.432437e+06 \n", + "7141 hmrc/employment_income/count/12570_15000 1.118195e+03 1.031530e+02 \n", + "6643 hmrc/employment_income/count/12570_15000 7.247283e+02 6.187779e+01 \n", + "7293 hmrc/employment_income/amount/12570_15000 1.006603e+07 8.592676e+05 \n", "\n", " error abs_error rel_abs_error \n", - "11360 -2.108173e+04 2.108173e+04 0.000084 \n", - "4325 1.389142e+00 1.389142e+00 0.000116 \n", - "821 8.340814e+00 8.340814e+00 0.000124 \n", - "2676 1.670008e+00 1.670008e+00 0.000144 \n", - "10790 -1.816220e+00 1.816220e+00 0.000232 \n", + "10276 -8.699413e+03 8.699413e+03 0.000066 \n", + "5283 1.083593e+00 1.083593e+00 0.000075 \n", + "4154 1.256512e+00 1.256512e+00 0.000099 \n", + "5855 1.288091e+00 1.288091e+00 0.000119 \n", + "5197 -1.817116e+00 1.817116e+00 0.000122 \n", "... ... ... ... \n", - "6697 1.387283e+03 1.387283e+03 28.396738 \n", - "7249 1.926758e+07 1.926758e+07 29.839914 \n", - "6599 1.388564e+03 1.388564e+03 29.862760 \n", - "7388 1.929230e+07 1.929230e+07 30.945050 \n", - "6738 1.390344e+03 1.390344e+03 30.968717 \n", + "6842 7.912553e+02 7.912553e+02 9.384007 \n", + "7791 1.400874e+07 1.400874e+07 9.779649 \n", + "7141 1.015042e+03 1.015042e+03 9.840155 \n", + "6643 6.628505e+02 6.628505e+02 10.712253 \n", + "7293 9.206761e+06 9.206761e+06 10.714661 \n", "\n", "[14300 rows x 8 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -411,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -433,13 +439,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", "\n", " \n", " \n", @@ -554,9 +560,9 @@ "\n" ], "text/plain": [ @@ -174,14 +174,24 @@ "import itables.options as opt\n", "from pathlib import Path\n", "from policyengine_uk_data.storage import STORAGE_FOLDER\n", + "from policyengine.utils.huggingface import download\n", "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", "REPO = Path(\".\").resolve().parent\n", "\n", - "with h5py.File(STORAGE_FOLDER / \"local_authority_weights.h5\", \"r\") as f:\n", - " weights = f[\"2025\"][:]\n", + "weights_file_path = STORAGE_FOLDER / \"local_authority_weights.h5\"\n", + "constituency_names_file_path = download(\n", + " repo=\"policyengine/policyengine-uk-data\",\n", + " repo_filename=\"local_authorities_2021.csv\",\n", + " local_folder=None,\n", + " version=None,\n", + ")\n", + "constituencies_2024 = pd.read_csv(constituency_names_file_path)\n", + "\n", + "with h5py.File(weights_file_path, \"r\") as f:\n", + " weights = f[str(2025)][...]\n", "\n", "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", @@ -190,7 +200,6 @@ "\n", "local_authority_target_matrix, local_authority_actuals = create_local_authority_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", "national_target_matrix, national_actuals = create_national_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", - "constituencies_2024 = pd.read_csv(STORAGE_FOLDER / \"local_authorities_2021.csv\")\n", "\n", "local_authority_wide = weights @ local_authority_target_matrix\n", "local_authority_wide.index = constituencies_2024.code.values\n", @@ -221,13 +230,13 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", "\n", " \n", " \n", @@ -344,9 +353,9 @@ "\n" ], "text/plain": [ - " index name \\\n", - "7682 E07000102 Three Rivers \n", - "3227 W06000011 Swansea \n", - "5640 E08000014 Sefton \n", - "6179 E06000063 Cumberland \n", - "7022 E07000193 East Staffordshire \n", - "... ... ... \n", - "4998 S12000023 Orkney Islands \n", - "4943 E09000001 City of London \n", - "769 E06000053 Isles of Scilly \n", - "1129 E06000053 Isles of Scilly \n", - "1849 E06000053 Isles of Scilly \n", - "\n", - " metric estimate target \\\n", - "7682 hmrc/employment_income/amount/50000_70000 6.835184e+08 6.835077e+08 \n", - "3227 age/60_70 2.865268e+04 2.865445e+04 \n", - "5640 hmrc/employment_income/amount/20000_30000 3.690511e+08 3.690074e+08 \n", - "6179 hmrc/employment_income/amount/30000_40000 7.644177e+08 7.643270e+08 \n", - "7022 hmrc/employment_income/amount/40000_50000 3.140948e+08 3.140508e+08 \n", - "... ... ... ... \n", - "4998 hmrc/employment_income/amount/15000_20000 4.427085e+06 1.308275e+06 \n", - "4943 hmrc/employment_income/amount/15000_20000 4.447282e+06 1.308275e+06 \n", - "769 age/0_10 6.712070e+02 1.954337e+02 \n", - "1129 age/10_20 7.329886e+02 2.066304e+02 \n", - "1849 age/30_40 8.760654e+02 2.300417e+02 \n", - "\n", - " error abs_error rel_abs_error \n", - "7682 1.071678e+04 1.071678e+04 0.000016 \n", - "3227 -1.769448e+00 1.769448e+00 0.000062 \n", - "5640 4.374969e+04 4.374969e+04 0.000119 \n", - "6179 9.073156e+04 9.073156e+04 0.000119 \n", - "7022 4.394198e+04 4.394198e+04 0.000140 \n", - "... ... ... ... \n", - "4998 3.118810e+06 3.118810e+06 2.383910 \n", - "4943 3.139007e+06 3.139007e+06 2.399348 \n", - "769 4.757734e+02 4.757734e+02 2.434449 \n", - "1129 5.263582e+02 5.263582e+02 2.547342 \n", - "1849 6.460237e+02 6.460237e+02 2.808289 \n", + " index name ... abs_error rel_abs_error\n", + "1024 N09000009 Mid Ulster ... 7.786641e-01 0.000036\n", + "3485 E08000019 Sheffield ... 1.751454e+00 0.000039\n", + "2392 E08000006 Salford ... 1.939367e+00 0.000058\n", + "174 E07000175 Newark and Sherwood ... 2.067212e+05 0.000072\n", + "6517 E06000040 Windsor and Maidenhead ... 6.069600e-01 0.000077\n", + "... ... ... ... ... ...\n", + "4998 S12000023 Orkney Islands ... 3.384977e+06 2.587359\n", + "4943 E09000001 City of London ... 3.397485e+06 2.596920\n", + "5000 S12000027 Shetland Islands ... 3.436770e+06 2.626947\n", + "1129 E06000053 Isles of Scilly ... 5.483983e+02 2.654006\n", + "1849 E06000053 Isles of Scilly ... 6.821887e+02 2.965500\n", "\n", "[7920 rows x 8 columns]" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py index 0dc38ad..f94c758 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py +++ b/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py @@ -4,6 +4,7 @@ import numpy as np from tqdm import tqdm import h5py +import os from policyengine_uk_data.datasets.frs.local_areas.constituencies.transform_constituencies import ( transform_2010_to_2024, ) @@ -57,6 +58,18 @@ def loss(w): return mse_c + mse_n + def pct_close(w, t=0.1): + # Return the percentage of metrics that are within t% of the target + pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) + e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t) + c_c = pred_c.shape[0] * pred_c.shape[1] + + pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1) + e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t) + c_n = pred_n.shape[0] + + return (e_c + e_n) / (c_c + c_n) + def dropout_weights(weights, p): if p == 0: return weights @@ -69,7 +82,7 @@ def dropout_weights(weights, p): optimizer = torch.optim.Adam([weights], lr=0.1) - desc = range(512) + desc = range(32) if os.environ.get("DATA_LITE") else range(256) for epoch in desc: optimizer.zero_grad() @@ -77,8 +90,9 @@ def dropout_weights(weights, p): l = loss(torch.exp(weights_)) l.backward() optimizer.step() - if epoch % 50 == 0: - print(f"Loss: {l.item()}, Epoch: {epoch}") + close = pct_close(torch.exp(weights_)) + if epoch % 10 == 0: + print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}") final_weights = torch.exp(weights).detach().numpy() mapping_matrix = pd.read_csv( diff --git a/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py index 8142672..3a8d71c 100644 --- a/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py +++ b/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py @@ -4,10 +4,11 @@ import numpy as np from tqdm import tqdm import h5py +import os from policyengine_uk_data.storage import STORAGE_FOLDER -from loss import ( +from policyengine_uk_data.datasets.frs.local_areas.local_authorities.loss import ( create_local_authority_target_matrix, create_national_target_matrix, ) @@ -50,6 +51,18 @@ def loss(w): return mse_c + mse_n + def pct_close(w, t=0.1): + # Return the percentage of metrics that are within t% of the target + pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) + e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t) + c_c = pred_c.shape[0] * pred_c.shape[1] + + pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1) + e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t) + c_n = pred_n.shape[0] + + return (e_c + e_n) / (c_c + c_n) + def dropout_weights(weights, p): if p == 0: return weights @@ -62,7 +75,7 @@ def dropout_weights(weights, p): optimizer = torch.optim.Adam([weights], lr=0.1) - desc = range(512) + desc = range(32) if os.environ.get("DATA_LITE") else range(256) for epoch in desc: optimizer.zero_grad() @@ -70,8 +83,9 @@ def dropout_weights(weights, p): l = loss(torch.exp(weights_)) l.backward() optimizer.step() - if epoch % 50 == 0: - print(f"Loss: {l.item()}, Epoch: {epoch}") + close = pct_close(torch.exp(weights_)) + if epoch % 10 == 0: + print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}") if epoch % 100 == 0: final_weights = torch.exp(weights).detach().numpy() diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py index 2094a64..bb390d9 100644 --- a/policyengine_uk_data/storage/download_private_prerequisites.py +++ b/policyengine_uk_data/storage/download_private_prerequisites.py @@ -28,6 +28,5 @@ def extract_zipped_folder(folder): repo_filename=file.name, local_folder=file.parent, ) - print(f"Extracting {file}") extract_zipped_folder(file) file.unlink() diff --git a/policyengine_uk_data/utils/huggingface.py b/policyengine_uk_data/utils/huggingface.py index a46da04..95f2a81 100644 --- a/policyengine_uk_data/utils/huggingface.py +++ b/policyengine_uk_data/utils/huggingface.py @@ -9,7 +9,6 @@ def download( token = os.environ.get( "HUGGING_FACE_TOKEN", ) - login(token=token) hf_hub_download( repo_id=repo, @@ -17,6 +16,7 @@ def download( filename=repo_filename, local_dir=local_folder, revision=version, + token=token, ) diff --git a/policyengine_uk_data/utils/reweight.py b/policyengine_uk_data/utils/reweight.py index 9f25d17..f07d669 100644 --- a/policyengine_uk_data/utils/reweight.py +++ b/policyengine_uk_data/utils/reweight.py @@ -1,5 +1,6 @@ import numpy as np import torch +import os def reweight( @@ -32,6 +33,12 @@ def loss(weights): raise ValueError("Relative error contains NaNs") return rel_error.mean() + def pct_close(weights, t=0.1): + # Return the percentage of metrics that are within t% of the target + estimate = weights @ loss_matrix + abs_error = torch.abs((estimate - targets_array) / (1 + targets_array)) + return (abs_error < t).sum() / abs_error.numel() + def dropout_weights(weights, p): if p == 0: return weights @@ -47,17 +54,20 @@ def dropout_weights(weights, p): start_loss = None - iterator = range(1_000) + iterator = range(128) if os.environ.get("DATA_LITE") else range(2048) for i in iterator: optimizer.zero_grad() weights_ = dropout_weights(weights, dropout_rate) l = loss(torch.exp(weights_)) + close = pct_close(torch.exp(weights_)) if start_loss is None: start_loss = l.item() loss_rel_change = (l.item() - start_loss) / start_loss l.backward() if i % 100 == 0: - print(f"Loss: {l.item()}, Rel change: {loss_rel_change}") + print( + f"Loss: {l.item()}, Rel change: {loss_rel_change}, Epoch: {i}, Within 10%: {close:.2%}" + ) optimizer.step() return torch.exp(weights).detach().numpy()