Update calibration routine parameters

PolicyEngine · Dec 23, 2024 · 1bcab90 · 1bcab90
1 parent ef50800
commit 1bcab90
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 21 deletions.
diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
@@ -50,7 +50,7 @@ jobs:
       - name: Build datasets
         run: make data
         env:
-          TEST_LITE: true
+          DATA_LITE: true
       - name: Run tests
         run: pytest
       - name: Test documentation builds

diff --git a/docs/validation/constituencies.ipynb b/docs/validation/constituencies.ipynb
@@ -179,12 +179,7 @@
     "\n",
     "REPO = Path(\".\").resolve().parent\n",
     "\n",
-    "weights_file_path = download(\n",
-    "    repo=\"policyengine/policyengine-uk-data\",\n",
-    "    repo_filename=\"parliamentary_constituency_weights.h5\",\n",
-    "    local_folder=None,\n",
-    "    version=None,\n",
-    ")\n",
+    "weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n",
     "with h5py.File(weights_file_path, \"r\") as f:\n",
     "        weights = f[str(2025)][...]\n",
     "constituency_names_file_path = download(\n",

diff --git a/docs/validation/local_authorities.ipynb b/docs/validation/local_authorities.ipynb
@@ -181,12 +181,7 @@
     "\n",
     "REPO = Path(\".\").resolve().parent\n",
     "\n",
-    "weights_file_path = download(\n",
-    "    repo=\"policyengine/policyengine-uk-data\",\n",
-    "    repo_filename=\"local_authority_weights.h5\",\n",
-    "    local_folder=None,\n",
-    "    version=None,\n",
-    ")\n",
+    "weights_file_path = STORAGE_FOLDER / \"local_authority_weights.h5\"\n",
     "constituency_names_file_path = download(\n",
     "    repo=\"policyengine/policyengine-uk-data\",\n",
     "    repo_filename=\"local_authorities_2021.csv\",\n",

diff --git a/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py
@@ -58,6 +58,18 @@ def loss(w):
 
         return mse_c + mse_n
 
+    def pct_close(w, t=0.1):
+        # Return the percentage of metrics that are within t% of the target
+        pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1)
+        e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t)
+        c_c = pred_c.shape[0] * pred_c.shape[1]
+
+        pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1)
+        e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t)
+        c_n = pred_n.shape[0]
+
+        return (e_c + e_n) / (c_c + c_n)
+
     def dropout_weights(weights, p):
         if p == 0:
             return weights
@@ -68,18 +80,19 @@ def dropout_weights(weights, p):
         masked_weights[mask] = mean
         return masked_weights
 
-    optimizer = torch.optim.Adam([weights], lr=0.05)
+    optimizer = torch.optim.Adam([weights], lr=0.1)
 
-    desc = range(128) if os.environ.get("DATA_LITE") else range(2048)
+    desc = range(32) if os.environ.get("DATA_LITE") else range(256)
 
     for epoch in desc:
         optimizer.zero_grad()
         weights_ = dropout_weights(weights, 0.05)
         l = loss(torch.exp(weights_))
         l.backward()
         optimizer.step()
-        if epoch % 50 == 0:
-            print(f"Loss: {l.item()}, Epoch: {epoch}", flush=True)
+        close = pct_close(torch.exp(weights_))
+        if epoch % 10 == 0:
+            print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}")
 
     final_weights = torch.exp(weights).detach().numpy()
     mapping_matrix = pd.read_csv(

diff --git a/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py
@@ -51,6 +51,18 @@ def loss(w):
 
         return mse_c + mse_n
 
+    def pct_close(w, t=0.1):
+        # Return the percentage of metrics that are within t% of the target
+        pred_c = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1)
+        e_c = torch.sum(torch.abs((pred_c / (1 + y) - 1)) < t)
+        c_c = pred_c.shape[0] * pred_c.shape[1]
+
+        pred_n = (w.sum(axis=0) * matrix_national.T).sum(axis=1)
+        e_n = torch.sum(torch.abs((pred_n / (1 + y_national) - 1)) < t)
+        c_n = pred_n.shape[0]
+
+        return (e_c + e_n) / (c_c + c_n)
+
     def dropout_weights(weights, p):
         if p == 0:
             return weights
@@ -61,18 +73,19 @@ def dropout_weights(weights, p):
         masked_weights[mask] = mean
         return masked_weights
 
-    optimizer = torch.optim.Adam([weights], lr=0.05)
+    optimizer = torch.optim.Adam([weights], lr=0.1)
 
-    desc = range(128) if os.environ.get("DATA_LITE") else range(2048)
+    desc = range(32) if os.environ.get("DATA_LITE") else range(256)
 
     for epoch in desc:
         optimizer.zero_grad()
         weights_ = dropout_weights(weights, 0.05)
         l = loss(torch.exp(weights_))
         l.backward()
         optimizer.step()
-        if epoch % 50 == 0:
-            print(f"Loss: {l.item()}, Epoch: {epoch}")
+        close = pct_close(torch.exp(weights_))
+        if epoch % 10 == 0:
+            print(f"Loss: {l.item()}, Epoch: {epoch}, Within 10%: {close:.2%}")
 
         if epoch % 100 == 0:
             final_weights = torch.exp(weights).detach().numpy()