-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add local authority weight generation Fixes #51 * Add LA weights and documentation * Versioning
- Loading branch information
1 parent
7ca372f
commit 426e527
Showing
20 changed files
with
9,110 additions
and
1,645 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 0 additions & 72 deletions
72
policyengine_uk_data/datasets/frs/local_areas/constituencies/ageing.ipynb
This file was deleted.
Oops, something went wrong.
86 changes: 86 additions & 0 deletions
86
policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import torch | ||
from policyengine_uk import Microsimulation | ||
import pandas as pd | ||
import numpy as np | ||
from tqdm import tqdm | ||
import h5py | ||
from policyengine_uk_data.storage import STORAGE_FOLDER | ||
|
||
|
||
from loss import ( | ||
create_local_authority_target_matrix, | ||
create_national_target_matrix, | ||
) | ||
|
||
|
||
def calibrate(): | ||
matrix, y = create_local_authority_target_matrix( | ||
"enhanced_frs_2022_23", 2025 | ||
) | ||
|
||
m_national, y_national = create_national_target_matrix( | ||
"enhanced_frs_2022_23", 2025 | ||
) | ||
|
||
sim = Microsimulation(dataset="enhanced_frs_2022_23") | ||
|
||
count_local_authority = 360 | ||
|
||
# Weights - 360 x 100180 | ||
original_weights = np.log( | ||
sim.calculate("household_weight", 2025).values / count_local_authority | ||
) | ||
weights = torch.tensor( | ||
np.ones((count_local_authority, len(original_weights))) | ||
* original_weights, | ||
dtype=torch.float32, | ||
requires_grad=True, | ||
) | ||
metrics = torch.tensor(matrix.values, dtype=torch.float32) | ||
y = torch.tensor(y.values, dtype=torch.float32) | ||
matrix_national = torch.tensor(m_national.values, dtype=torch.float32) | ||
y_national = torch.tensor(y_national.values, dtype=torch.float32) | ||
|
||
def loss(w): | ||
pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) | ||
mse_c = torch.mean((pred_c / (1 + y) - 1) ** 2) | ||
|
||
pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1) | ||
mse_n = torch.mean((pred_n / (1 + y_national) - 1) ** 2) | ||
|
||
return mse_c + mse_n | ||
|
||
def dropout_weights(weights, p): | ||
if p == 0: | ||
return weights | ||
# Replace p% of the weights with the mean value of the rest of them | ||
mask = torch.rand_like(weights) < p | ||
mean = weights[~mask].mean() | ||
masked_weights = weights.clone() | ||
masked_weights[mask] = mean | ||
return masked_weights | ||
|
||
optimizer = torch.optim.Adam([weights], lr=0.1) | ||
|
||
desc = range(512) | ||
|
||
for epoch in desc: | ||
optimizer.zero_grad() | ||
weights_ = dropout_weights(weights, 0.05) | ||
l = loss(torch.exp(weights_)) | ||
l.backward() | ||
optimizer.step() | ||
if epoch % 50 == 0: | ||
print(f"Loss: {l.item()}, Epoch: {epoch}") | ||
|
||
if epoch % 100 == 0: | ||
final_weights = torch.exp(weights).detach().numpy() | ||
|
||
with h5py.File( | ||
STORAGE_FOLDER / "local_authority_weights.h5", "w" | ||
) as f: | ||
f.create_dataset("2025", data=final_weights) | ||
|
||
|
||
if __name__ == "__main__": | ||
calibrate() |
155 changes: 155 additions & 0 deletions
155
policyengine_uk_data/datasets/frs/local_areas/local_authorities/loss.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
import torch | ||
from policyengine_uk import Microsimulation | ||
import pandas as pd | ||
import numpy as np | ||
from pathlib import Path | ||
|
||
from policyengine_uk_data.utils.loss import ( | ||
create_target_matrix as create_national_target_matrix, | ||
) | ||
|
||
FOLDER = Path(__file__).parent | ||
|
||
|
||
def create_local_authority_target_matrix( | ||
dataset: str = "enhanced_frs_2022_23", | ||
time_period: int = 2025, | ||
reform=None, | ||
uprate: bool = True, | ||
): | ||
ages = pd.read_csv(FOLDER / "targets" / "age.csv") | ||
incomes = pd.read_csv(FOLDER / "targets" / "total_income.csv") | ||
employment_incomes = pd.read_csv( | ||
FOLDER / "targets" / "employment_income.csv" | ||
) | ||
|
||
sim = Microsimulation(dataset=dataset, reform=reform) | ||
sim.default_calculation_period = time_period | ||
|
||
matrix = pd.DataFrame() | ||
y = pd.DataFrame() | ||
|
||
total_income = sim.calculate("total_income").values | ||
matrix["hmrc/total_income/amount"] = sim.map_result( | ||
total_income, "person", "household" | ||
) | ||
y["hmrc/total_income/amount"] = incomes["total_income_amount"] | ||
|
||
matrix["hmrc/total_income/count"] = sim.map_result( | ||
total_income != 0, "person", "household" | ||
) | ||
y["hmrc/total_income/count"] = incomes["total_income_count"] | ||
|
||
age = sim.calculate("age").values | ||
for lower_age in range(0, 80, 10): | ||
upper_age = lower_age + 10 | ||
|
||
in_age_band = (age >= lower_age) & (age < upper_age) | ||
|
||
age_str = f"{lower_age}_{upper_age}" | ||
matrix[f"age/{age_str}"] = sim.map_result( | ||
in_age_band, "person", "household" | ||
) | ||
|
||
age_count = ages[ | ||
[str(age) for age in range(lower_age, upper_age)] | ||
].sum(axis=1) | ||
|
||
age_str = f"{lower_age}_{upper_age}" | ||
y[f"age/{age_str}"] = age_count.values | ||
|
||
employment_income = sim.calculate("employment_income").values | ||
bounds = list( | ||
employment_incomes.employment_income_lower_bound.sort_values().unique() | ||
) + [np.inf] | ||
|
||
for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]): | ||
if lower_bound >= 70_000 or lower_bound < 12_570: | ||
continue | ||
in_bound = ( | ||
(employment_income >= lower_bound) | ||
& (employment_income < upper_bound) | ||
& (employment_income != 0) | ||
& (age >= 16) | ||
) | ||
band_str = f"{lower_bound}_{upper_bound}" | ||
matrix[f"hmrc/employment_income/count/{band_str}"] = sim.map_result( | ||
in_bound, "person", "household" | ||
) | ||
y[f"hmrc/employment_income/count/{band_str}"] = employment_incomes[ | ||
(employment_incomes.employment_income_lower_bound == lower_bound) | ||
& (employment_incomes.employment_income_upper_bound == upper_bound) | ||
].employment_income_count.values | ||
|
||
matrix[f"hmrc/employment_income/amount/{band_str}"] = sim.map_result( | ||
employment_income * in_bound, "person", "household" | ||
) | ||
y[f"hmrc/employment_income/amount/{band_str}"] = employment_incomes[ | ||
(employment_incomes.employment_income_lower_bound == lower_bound) | ||
& (employment_incomes.employment_income_upper_bound == upper_bound) | ||
].employment_income_amount.values | ||
|
||
if uprate: | ||
y = uprate_targets(y, time_period) | ||
|
||
return matrix, y | ||
|
||
|
||
def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame: | ||
# Uprate age targets from 2020, taxable income targets from 2021, employment income targets from 2023. | ||
# Use PolicyEngine uprating factors. | ||
sim = Microsimulation(dataset="frs_2020_21") | ||
matrix_20, y_20 = create_local_authority_target_matrix( | ||
"frs_2020_21", 2020, uprate=False | ||
) | ||
matrix_21, y_21 = create_local_authority_target_matrix( | ||
"frs_2020_21", 2021, uprate=False | ||
) | ||
matrix_23, y_23 = create_local_authority_target_matrix( | ||
"frs_2020_21", 2023, uprate=False | ||
) | ||
matrix_final, y_final = create_local_authority_target_matrix( | ||
"frs_2020_21", target_year, uprate=False | ||
) | ||
weights_20 = sim.calculate("household_weight", 2020) | ||
weights_21 = sim.calculate("household_weight", 2021) | ||
weights_23 = sim.calculate("household_weight", 2023) | ||
weights_final = sim.calculate("household_weight", target_year) | ||
|
||
rel_change_20_final = (weights_final @ matrix_final) / ( | ||
weights_20 @ matrix_20 | ||
) - 1 | ||
is_uprated_from_2020 = [ | ||
col.startswith("age/") for col in matrix_20.columns | ||
] | ||
uprating_from_2020 = np.zeros_like(matrix_20.columns, dtype=float) | ||
uprating_from_2020[is_uprated_from_2020] = rel_change_20_final[ | ||
is_uprated_from_2020 | ||
] | ||
|
||
rel_change_21_final = (weights_final @ matrix_final) / ( | ||
weights_21 @ matrix_21 | ||
) - 1 | ||
is_uprated_from_2021 = [ | ||
col.startswith("hmrc/") for col in matrix_21.columns | ||
] | ||
uprating_from_2021 = np.zeros_like(matrix_21.columns, dtype=float) | ||
uprating_from_2021[is_uprated_from_2021] = rel_change_21_final[ | ||
is_uprated_from_2021 | ||
] | ||
|
||
rel_change_23_final = (weights_final @ matrix_final) / ( | ||
weights_23 @ matrix_23 | ||
) - 1 | ||
is_uprated_from_2023 = [ | ||
col.startswith("hmrc/") for col in matrix_23.columns | ||
] | ||
uprating_from_2023 = np.zeros_like(matrix_23.columns, dtype=float) | ||
uprating_from_2023[is_uprated_from_2023] = rel_change_23_final[ | ||
is_uprated_from_2023 | ||
] | ||
|
||
uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023 | ||
y = y * (1 + uprating) | ||
|
||
return y |
6 changes: 6 additions & 0 deletions
6
policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Data | ||
|
||
* Age is from [the ONS](https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.ons.gov.uk/file%3Furi%3D/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/parliamentaryconstituencymidyearpopulationestimates/mid2020sape23dt7/sape23dt7mid2020parliconsyoaestimatesunformatted.xlsx&ved=2ahUKEwifosm3x9GIAxXxQkEAHU_LB70QFnoECBgQAQ&usg=AOvVaw0-MdplttsD8klJR6M3WID8) and has single-year age counts for each political constituency (2010) in the UK. The data is from 2020. | ||
* Employment incomes are from Nomis, and are from 2023. | ||
* HMRC total income is from 2021. | ||
|
Oops, something went wrong.