Skip to content

Commit

Permalink
Update calibration routine parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Dec 23, 2024
1 parent ef50800 commit 1bcab90
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pull_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
- name: Build datasets
run: make data
env:
TEST_LITE: true
DATA_LITE: true
- name: Run tests
run: pytest
- name: Test documentation builds
Expand Down
7 changes: 1 addition & 6 deletions docs/validation/constituencies.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,7 @@
"\n",
"REPO = Path(\".\").resolve().parent\n",
"\n",
"weights_file_path = download(\n",
" repo=\"policyengine/policyengine-uk-data\",\n",
" repo_filename=\"parliamentary_constituency_weights.h5\",\n",
" local_folder=None,\n",
" version=None,\n",
")\n",
"weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n",
"with h5py.File(weights_file_path, \"r\") as f:\n",
" weights = f[str(2025)][...]\n",
"constituency_names_file_path = download(\n",
Expand Down
7 changes: 1 addition & 6 deletions docs/validation/local_authorities.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,7 @@
"\n",
"REPO = Path(\".\").resolve().parent\n",
"\n",
"weights_file_path = download(\n",
" repo=\"policyengine/policyengine-uk-data\",\n",
" repo_filename=\"local_authority_weights.h5\",\n",
" local_folder=None,\n",
" version=None,\n",
")\n",
"weights_file_path = STORAGE_FOLDER / \"local_authority_weights.h5\"\n",
"constituency_names_file_path = download(\n",
" repo=\"policyengine/policyengine-uk-data\",\n",
" repo_filename=\"local_authorities_2021.csv\",\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ def loss(w):

return mse_c + mse_n

def pct_close(w, t=0.1):
# Return the percentage of metrics that are within t% of the target
pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1)
e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t)
c_c = pred_c.shape[0] * pred_c.shape[1]

pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1)
e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t)
c_n = pred_n.shape[0]

return (e_c + e_n) / (c_c + c_n)

def dropout_weights(weights, p):
if p == 0:
return weights
Expand All @@ -68,18 +80,19 @@ def dropout_weights(weights, p):
masked_weights[mask] = mean
return masked_weights

optimizer = torch.optim.Adam([weights], lr=0.05)
optimizer = torch.optim.Adam([weights], lr=0.1)

desc = range(128) if os.environ.get("DATA_LITE") else range(2048)
desc = range(32) if os.environ.get("DATA_LITE") else range(256)

for epoch in desc:
optimizer.zero_grad()
weights_ = dropout_weights(weights, 0.05)
l = loss(torch.exp(weights_))
l.backward()
optimizer.step()
if epoch % 50 == 0:
print(f"Loss: {l.item()}, Epoch: {epoch}", flush=True)
close = pct_close(torch.exp(weights_))
if epoch % 10 == 0:
print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}")

final_weights = torch.exp(weights).detach().numpy()
mapping_matrix = pd.read_csv(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ def loss(w):

return mse_c + mse_n

def pct_close(w, t=0.1):
# Return the percentage of metrics that are within t% of the target
pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1)
e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t)
c_c = pred_c.shape[0] * pred_c.shape[1]

pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1)
e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t)
c_n = pred_n.shape[0]

return (e_c + e_n) / (c_c + c_n)

def dropout_weights(weights, p):
if p == 0:
return weights
Expand All @@ -61,18 +73,19 @@ def dropout_weights(weights, p):
masked_weights[mask] = mean
return masked_weights

optimizer = torch.optim.Adam([weights], lr=0.05)
optimizer = torch.optim.Adam([weights], lr=0.1)

desc = range(128) if os.environ.get("DATA_LITE") else range(2048)
desc = range(32) if os.environ.get("DATA_LITE") else range(256)

for epoch in desc:
optimizer.zero_grad()
weights_ = dropout_weights(weights, 0.05)
l = loss(torch.exp(weights_))
l.backward()
optimizer.step()
if epoch % 50 == 0:
print(f"Loss: {l.item()}, Epoch: {epoch}")
close = pct_close(torch.exp(weights_))
if epoch % 10 == 0:
print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}")

if epoch % 100 == 0:
final_weights = torch.exp(weights).detach().numpy()
Expand Down

0 comments on commit 1bcab90

Please sign in to comment.