From 1142c8ce9cbe803b8d97a34ca43d528ef2672d72 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 11:31:49 +0100
Subject: [PATCH 01/14] Lighten dependencies and add changelog

---
 .github/CONTRIBUTING.md                 |  3 +++
 .github/changelog_template.md           |  8 ++++++
 .github/fetch_version.py                | 13 ++++++++++
 .github/get-changelog-diff.sh           |  2 ++
 .github/has-functional-changes.sh       | 12 +++++++++
 .github/is-version-number-acceptable.sh | 33 +++++++++++++++++++++++++
 .github/publish-git-tag.sh              |  4 +++
 .github/workflows/ci_cd.yaml            | 29 ++++++++++++++++++++++
 changelog.yaml                          |  5 ++++
 changelog_entry.yaml                    |  4 +++
 pyproject.toml                          |  6 ++---
 11 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 .github/CONTRIBUTING.md
 create mode 100644 .github/changelog_template.md
 create mode 100644 .github/fetch_version.py
 create mode 100755 .github/get-changelog-diff.sh
 create mode 100755 .github/has-functional-changes.sh
 create mode 100755 .github/is-version-number-acceptable.sh
 create mode 100755 .github/publish-git-tag.sh
 create mode 100644 changelog.yaml
 create mode 100644 changelog_entry.yaml

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..5b5d24e
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,3 @@
+## Updating data
+
+If your changes present a non-bugfix change to one or more datasets which are cloud-hosted (FRS and EFRS), then please change both the filename and URL (in both the class definition file and in `storage/upload_completed_datasets.py`). This enables us to store historical versions of datasets separately and reproducibly.
diff --git a/.github/changelog_template.md b/.github/changelog_template.md
new file mode 100644
index 0000000..8a1e679
--- /dev/null
+++ b/.github/changelog_template.md
@@ -0,0 +1,8 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+{{changelog}}
\ No newline at end of file
diff --git a/.github/fetch_version.py b/.github/fetch_version.py
new file mode 100644
index 0000000..f130f40
--- /dev/null
+++ b/.github/fetch_version.py
@@ -0,0 +1,13 @@
+from policyengine_us_data.__version__ import __version__
+
+
+def fetch_version():
+    try:
+        return __version__
+    except Exception as e:
+        print(f"Error fetching version: {e}")
+        return None
+
+
+if __name__ == "__main__":
+    print(fetch_version())
diff --git a/.github/get-changelog-diff.sh b/.github/get-changelog-diff.sh
new file mode 100755
index 0000000..66c2bfd
--- /dev/null
+++ b/.github/get-changelog-diff.sh
@@ -0,0 +1,2 @@
+last_tagged_commit=`git describe --tags --abbrev=0 --first-parent`
+git --no-pager diff $last_tagged_commit -- CHANGELOG.md
\ No newline at end of file
diff --git a/.github/has-functional-changes.sh b/.github/has-functional-changes.sh
new file mode 100755
index 0000000..169689a
--- /dev/null
+++ b/.github/has-functional-changes.sh
@@ -0,0 +1,12 @@
+#! /usr/bin/env bash
+
+IGNORE_DIFF_ON="README.md CONTRIBUTING.md Makefile docs/* .gitignore LICENSE* .github/* data/*"
+
+last_tagged_commit=`git describe --tags --abbrev=0 --first-parent`  # --first-parent ensures we don't follow tags not published in master through an unlikely intermediary merge commit
+
+if git diff-index --name-only --exit-code $last_tagged_commit -- . `echo " $IGNORE_DIFF_ON" | sed 's/ / :(exclude)/g'`  # Check if any file that has not be listed in IGNORE_DIFF_ON has changed since the last tag was published.
+then
+  echo "No functional changes detected."
+  exit 1
+else echo "The functional files above were changed."
+fi
diff --git a/.github/is-version-number-acceptable.sh b/.github/is-version-number-acceptable.sh
new file mode 100755
index 0000000..a9067e6
--- /dev/null
+++ b/.github/is-version-number-acceptable.sh
@@ -0,0 +1,33 @@
+#! /usr/bin/env bash
+
+if [[ ${GITHUB_REF#refs/heads/} == master ]]
+then
+    echo "No need for a version check on master."
+    exit 0
+fi
+
+if ! $(dirname "$BASH_SOURCE")/has-functional-changes.sh
+then
+    echo "No need for a version update."
+    exit 0
+fi
+
+current_version=`python .github/fetch_version.py`
+
+if git rev-parse --verify --quiet $current_version
+then
+    echo "Version $current_version already exists in commit:"
+    git --no-pager log -1 $current_version
+    echo
+    echo "Update the version number in setup.py before merging this branch into master."
+    echo "Look at the CONTRIBUTING.md file to learn how the version number should be updated."
+    exit 1
+fi
+
+if ! $(dirname "$BASH_SOURCE")/has-functional-changes.sh | grep --quiet CHANGELOG.md
+then
+    echo "CHANGELOG.md has not been modified, while functional changes were made."
+    echo "Explain what you changed before merging this branch into master."
+    echo "Look at the CONTRIBUTING.md file to learn how to write the changelog."
+    exit 2
+fi
diff --git a/.github/publish-git-tag.sh b/.github/publish-git-tag.sh
new file mode 100755
index 0000000..9437a66
--- /dev/null
+++ b/.github/publish-git-tag.sh
@@ -0,0 +1,4 @@
+#! /usr/bin/env bash
+
+git tag `python .github/fetch_version.py`  # create a new tag
+git push --tags || true  # update the repository version
diff --git a/.github/workflows/ci_cd.yaml b/.github/workflows/ci_cd.yaml
index 3db31f2..4df3e8c 100644
--- a/.github/workflows/ci_cd.yaml
+++ b/.github/workflows/ci_cd.yaml
@@ -68,6 +68,35 @@ jobs:
         run: make data
       - name: Run tests
         run: pytest
+  check-version:
+    name: Check version
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history for all tags and branches
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Build changelog
+        run: pip install "yaml-changelog>=0.1.7" && make changelog
+      - name: Preview changelog update
+        run: ".github/get-changelog-diff.sh"
+      - name: Check version number has been properly updated
+        run: ".github/is-version-number-acceptable.sh"
+      - name: Update changelog
+        uses: EndBug/add-and-commit@v9
+        with:
+          add: "."
+          committer_name: Github Actions[bot]
+          author_name: Github Actions[bot]
+          message: Update PolicyEngine US data
+          github_token: ${{ secrets.POLICYENGINE_GITHUB }}
 
   docker:
     name: Docker
diff --git a/changelog.yaml b/changelog.yaml
new file mode 100644
index 0000000..8944a4b
--- /dev/null
+++ b/changelog.yaml
@@ -0,0 +1,5 @@
+- changes:
+    added:
+    - Initialized changelogging
+  date: 2024-09-09 17:29:10
+  version: 1.0.0
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
new file mode 100644
index 0000000..f3b708c
--- /dev/null
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    changed:
+    - Lightened dependency list.
diff --git a/pyproject.toml b/pyproject.toml
index 640810d..2ecfff7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,9 +14,6 @@ license = {file = "LICENSE"}
 requires-python = ">=3.6"
 dependencies = [
     "policyengine_core",
-    "tables",
-    "survey_enhance",
-    "torch",
     "requests",
     "tqdm",
     "tabulate",
@@ -29,6 +26,9 @@ dev = [
     "pytest",
     "policyengine_uk>=1.8.0",
     "streamlit",
+    "survey_enhance",
+    "torch",
+    "tables",
 ]
 
 [tool.setuptools]

From 84ab3ff508ead2588174a203f86b9e17ffd064cc Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 11:59:00 +0100
Subject: [PATCH 02/14] Add missing uploads

---
 Makefile                                        |  1 +
 .../datasets/frs/enhanced_frs.py                |  2 ++
 .../datasets/frs/extended_frs.py                |  1 +
 .../storage/upload_completed_datasets.py        | 17 +++++++++++++++++
 4 files changed, 21 insertions(+)

diff --git a/Makefile b/Makefile
index b254604..ed38cdd 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@ documentation:
 data:
 	python policyengine_uk_data/datasets/frs/dwp_frs.py
 	python policyengine_uk_data/datasets/frs/frs.py
+	python policyengine_uk_data/datasets/frs/enhanced_frs.py
 
 build:
 	python -m build
diff --git a/policyengine_uk_data/datasets/frs/enhanced_frs.py b/policyengine_uk_data/datasets/frs/enhanced_frs.py
index 28b873b..6a773e9 100644
--- a/policyengine_uk_data/datasets/frs/enhanced_frs.py
+++ b/policyengine_uk_data/datasets/frs/enhanced_frs.py
@@ -38,6 +38,7 @@ class ReweightedFRS_2022_23(EnhancedFRS):
     input_frs = FRS_2022_23
     time_period = 2022
     end_year = 2022
+    url = "release://PolicyEngine/ukda/reweighted_frs_2022_23.h5"
 
 
 class EnhancedFRS_2022_23(EnhancedFRS):
@@ -100,4 +101,5 @@ def loss(weights):
 
 
 if __name__ == "__main__":
+    ReweightedFRS_2022_23().generate()
     EnhancedFRS_2022_23().generate()
diff --git a/policyengine_uk_data/datasets/frs/extended_frs.py b/policyengine_uk_data/datasets/frs/extended_frs.py
index a421d0d..085ef38 100644
--- a/policyengine_uk_data/datasets/frs/extended_frs.py
+++ b/policyengine_uk_data/datasets/frs/extended_frs.py
@@ -94,6 +94,7 @@ class ExtendedFRS_2022_23(ExtendedFRS):
     data_format = Dataset.TIME_PERIOD_ARRAYS
     input_frs = FRS_2022_23
     time_period = 2022
+    url = "release://PolicyEngine/ukda/extended_frs_2022_23.h5"
 
 
 if __name__ == "__main__":
diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py
index 8d97ba1..6d6490b 100644
--- a/policyengine_uk_data/storage/upload_completed_datasets.py
+++ b/policyengine_uk_data/storage/upload_completed_datasets.py
@@ -1,4 +1,21 @@
 from policyengine_uk_data.utils.github import upload
 from pathlib import Path
+from tqdm import tqdm
 
 FOLDER = Path(__file__).parent
+
+FILES = [
+    "cps_2022_23.h5",
+    "enhanced_frs_2022_23.h5",
+    "extended_frs_2022_23.h5",
+    "reweighted_frs_2022_23.h5",
+]
+
+for file in tqdm(FILES):
+    upload(
+        "PolicyEngine",
+        "ukda",
+        "release",
+        file,
+        FOLDER / file,
+    )

From 153339bc0d05f46eec90d5955eef9a330d6000cf Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 12:17:18 +0100
Subject: [PATCH 03/14] Fix bug in uploads

---
 .../storage/upload_completed_datasets.py      |  2 +-
 policyengine_uk_data/utils/github.py          | 77 +++++++------------
 2 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py
index 6d6490b..513bbbb 100644
--- a/policyengine_uk_data/storage/upload_completed_datasets.py
+++ b/policyengine_uk_data/storage/upload_completed_datasets.py
@@ -5,7 +5,7 @@
 FOLDER = Path(__file__).parent
 
 FILES = [
-    "cps_2022_23.h5",
+    "frs_2022_23.h5",
     "enhanced_frs_2022_23.h5",
     "extended_frs_2022_23.h5",
     "reweighted_frs_2022_23.h5",
diff --git a/policyengine_uk_data/utils/github.py b/policyengine_uk_data/utils/github.py
index 43a05e2..27c88e1 100644
--- a/policyengine_uk_data/utils/github.py
+++ b/policyengine_uk_data/utils/github.py
@@ -1,8 +1,6 @@
 import os
 import requests
 from tqdm import tqdm
-from requests.adapters import HTTPAdapter
-from requests.packages.urllib3.util.retry import Retry
 import time
 
 auth_headers = {
@@ -62,66 +60,45 @@ def download(
         f.write(response.content)
 
 
-def create_session_with_retries():
-    session = requests.Session()
-    retries = Retry(
-        total=5, backoff_factor=1, status_forcelist=[502, 503, 504]
-    )
-    session.mount("https://", HTTPAdapter(max_retries=retries))
-    return session
-
-
 def upload(
     org: str, repo: str, release_tag: str, file_name: str, file_path: str
 ) -> bytes:
     release_id = get_release_id(org, repo, release_tag)
+
+    # First, list release assets
+    url = f"https://api.github.com/repos/{org}/{repo}/releases/{release_id}/assets"
+    response = requests.get(url, headers=auth_headers).json()
+    names = [asset["name"] for asset in response]
+    if file_name in names:
+        print(
+            f"Asset {file_name} already exists in release {release_tag} of {org}/{repo}, skipping."
+        )
+        return
+    
     url = f"https://uploads.github.com/repos/{org}/{repo}/releases/{release_id}/assets?name={file_name}"
 
-    file_size = os.path.getsize(file_path)
     headers = {
         "Accept": "application/vnd.github.v3+json",
         "Content-Type": "application/octet-stream",
         **auth_headers,
     }
 
-    session = create_session_with_retries()
-
-    max_retries = 3
-    for attempt in range(max_retries):
-        try:
-            with open(file_path, "rb") as f:
-                with tqdm(total=file_size, unit="B", unit_scale=True) as pbar:
-                    response = session.post(
-                        url,
-                        headers=headers,
-                        data=f,
-                        stream=True,
-                        hooks=dict(
-                            response=lambda r, *args, **kwargs: pbar.update(
-                                len(r.content)
-                            )
-                        ),
-                        timeout=300,  # 5 minutes timeout
-                    )
-
-            if response.status_code == 201:
-                return response.json()
-            else:
-                print(
-                    f"Attempt {attempt + 1} failed with status code {response.status_code}. Response: {response.text}"
-                )
-
-        except requests.exceptions.RequestException as e:
-            print(f"Attempt {attempt + 1} failed with error: {str(e)}")
-
-        if attempt < max_retries - 1:
-            wait_time = (
-                attempt + 1
-            ) * 60  # Wait 1 minute, then 2 minutes, then 3 minutes
-            print(f"Waiting {wait_time} seconds before retrying...")
-            time.sleep(wait_time)
-
-    raise ValueError(f"Failed to upload file after {max_retries} attempts.")
+    with open(file_path, "rb") as f:
+        data = f.read()
+    
+    response = requests.post(
+        url,
+        headers=headers,
+        data=data,
+    )
+
+    if response.status_code != 201:
+        raise ValueError(
+            f"Invalid response code {response.status_code} for url {url}. Received: {response.text}"
+        )
+    
+    return response.json()
+
 
 
 def set_pr_auto_review_comment(text: str):

From b67924cca8fa9fc6298b3f5ef976f2f770561a28 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 18:29:44 +0100
Subject: [PATCH 04/14] Add Jupyter Book documentation

---
 .github/workflows/ci_cd.yaml         |  27 ++-
 .gitignore                           |   1 +
 Makefile                             |   2 +-
 docker/docs.Dockerfile               |   5 -
 docs/_config.yml                     |  22 ++
 docs/_static/style.css               |   2 +
 docs/_toc.yml                        |   5 +
 docs/{Home.py => intro.md}           |  16 +-
 docs/logo.png                        | Bin 0 -> 10188 bytes
 docs/pages/Methodology.py            | 301 ---------------------------
 docs/pages/Validation.py             |  88 --------
 docs/utils.py                        |   7 +
 policyengine_uk_data/utils/github.py |   7 +-
 pyproject.toml                       |   3 +-
 14 files changed, 71 insertions(+), 415 deletions(-)
 delete mode 100644 docker/docs.Dockerfile
 create mode 100644 docs/_config.yml
 create mode 100644 docs/_static/style.css
 create mode 100644 docs/_toc.yml
 rename docs/{Home.py => intro.md} (81%)
 create mode 100644 docs/logo.png
 delete mode 100644 docs/pages/Methodology.py
 delete mode 100644 docs/pages/Validation.py
 create mode 100644 docs/utils.py

diff --git a/.github/workflows/ci_cd.yaml b/.github/workflows/ci_cd.yaml
index 4df3e8c..0e7afc5 100644
--- a/.github/workflows/ci_cd.yaml
+++ b/.github/workflows/ci_cd.yaml
@@ -30,6 +30,29 @@ jobs:
           user: __token__
           password: ${{ secrets.PYPI }}
           skip-existing: true
+  publish-docs:
+    name: Publish documentation
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history for all tags and branches
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Install package
+        run: pip install -e ".[dev]"
+      - name: Build Jupyter Book
+        run: make documentation
+      - name: Deploy documentation
+        uses: JamesIves/github-pages-deploy-action@releases/v4
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          BRANCH: gh-pages
+          FOLDER: docs/_build/html
   lint:
     runs-on: ubuntu-latest
     name: Lint
@@ -47,7 +70,7 @@ jobs:
         run: black . -l 79 --check
 
   test:
-    name: Build and Test
+    name: Build and test
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -68,6 +91,8 @@ jobs:
         run: make data
       - name: Run tests
         run: pytest
+      - name: Test documentation builds
+        run: make documentation
   check-version:
     name: Check version
     if: github.event_name == 'pull_request'
diff --git a/.gitignore b/.gitignore
index e417f9d..61b49b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@
 !incomes.csv
 !tax_benefit.csv
 !demographics.csv
+**/_build
diff --git a/Makefile b/Makefile
index ed38cdd..f78fd5b 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ docker:
 	docker buildx build --platform linux/amd64 . -t policyengine-uk-data:latest
 
 documentation:
-	streamlit run docs/Home.py
+	jb clean docs && jb build docs
 
 data:
 	python policyengine_uk_data/datasets/frs/dwp_frs.py
diff --git a/docker/docs.Dockerfile b/docker/docs.Dockerfile
deleted file mode 100644
index 090ca7e..0000000
--- a/docker/docs.Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM python:latest
-COPY . .
-RUN make install
-EXPOSE 8080
-ENTRYPOINT ["streamlit", "run", "docs/Home.py", "--server.port=8080", "--server.address=0.0.0.0"]
diff --git a/docs/_config.yml b/docs/_config.yml
new file mode 100644
index 0000000..711cc60
--- /dev/null
+++ b/docs/_config.yml
@@ -0,0 +1,22 @@
+title: PolicyEngine UK data
+author: PolicyEngine
+copyright: "2024"
+logo: logo.png
+
+execute:
+  execute_notebooks: off
+
+repository:
+  url: https://github.com/policyengine/policyengine-uk-data
+  branch: master
+  path_to_book: docs
+
+sphinx:
+  config:
+    html_js_files:
+    - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.7/require.min.js
+    html_theme: furo
+    pygments_style: default
+    html_css_files:
+      - style.css
+    nb_remove_code_source: true
\ No newline at end of file
diff --git a/docs/_static/style.css b/docs/_static/style.css
new file mode 100644
index 0000000..e511f94
--- /dev/null
+++ b/docs/_static/style.css
@@ -0,0 +1,2 @@
+@import url('https://fonts.googleapis.com/css2?family=Roboto+Serif:opsz@8..144&family=Roboto:wght@300&display=swap');
+
diff --git a/docs/_toc.yml b/docs/_toc.yml
new file mode 100644
index 0000000..4b8640a
--- /dev/null
+++ b/docs/_toc.yml
@@ -0,0 +1,5 @@
+format: jb-book
+root: intro
+chapters:
+- file: methodology.ipynb
+- file: validation.ipynb
diff --git a/docs/Home.py b/docs/intro.md
similarity index 81%
rename from docs/Home.py
rename to docs/intro.md
index 674f08e..af04a85 100644
--- a/docs/Home.py
+++ b/docs/intro.md
@@ -1,16 +1,5 @@
-import streamlit as st
-from policyengine_uk_data.utils.download_docs_prerequisites import (
-    download_data,
-)
+# Introduction
 
-download_data()
-
-st.set_page_config(layout="wide")
-
-st.title("PolicyEngine-UK-Data")
-
-st.write(
-    """
 PolicyEngine-UK-Data is a package that creates representative microdata for the UK, 
 designed for input in the PolicyEngine tax-benefit microsimulation model. This tool 
 allows users to explore the data sources, validation processes, and enhancements 
@@ -22,5 +11,4 @@
 * An accurate representation of the current UK household sector *now*.
 
 This repository is dedicated to the second of those. In this documentation, we'll explain how we do that, but we'll also use our model (the first bullet) to see what we end up with when we combine the two, and measure up against other organisations doing the same thing.
-"""
-)
+
diff --git a/docs/logo.png b/docs/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..12736e4dce8158bb6ed2557a2bf9d0856811758e
GIT binary patch
literal 10188
zcmd5?_divC{J+^Fgp@KXD=Sg<Ohxw0$4*7|&A8U(ULy&~N*QHla}}=3y=G)bcE&Zb
z@8#OA?R(t%{_y<|z8(+by07y-ukn06U*p_J13e7}x~p^`5QyQC=6xd&h<pzCyO)Lv
z_!&6K;Sc<xeW_{g1p?8to_(JKrG8=u{&~*JNaG#|(Zjh6yr6iYrmF@5eT$_(vZDln
z1nnN(S2OWDw~FocG%>4M+(7Dak3Ok3EL!r2nD3~2_tiu%TSLinlj4$ou(w#vd2fn8
zv={V2=i{s%T15&<z=9d)sF!%%)LucK`wxfJdzfOM*okbFV0`oDz9*{~5rR(OD$MM3
zo#iL1&rf$Ppj>QsS|e;rdUcLdcSPf_49o@HO);#va{3}qa%=N+>UbY<c&tP$Rw8a8
zEOrx9cNL7s*5Lk5W}y3h)}x=ePfm~1ou#D)d)fre(^4I6E1L9Eyboe!#NFrt+pem=
z_(&+xdcgk_`l04)GM~HG*l3!(6mq-)SuM!Ek#@AQToJA_pg;IpiS_db%|S|UVy|2L
z%6Lx-k^9cZ)=6LgSX7kmk(@U58WsoMIF`j!PI^rVDy95aj%~?YR=Fjv0~c;~Ju%IK
zvfyl4`Ij+I;0*(hs*1f|N~3D)pM3bKnR!RI3g^iyiwbND>YlM3-!6fQ-CVxAd+QXv
zdrLCQR{zB*F+$;Vb_(#ljwr;k7H?#jG7dwV5ULBD$ge1JRqGNgZ10Tfx~{ufiaeYJ
z@r_XH6QMc!arP%te79p{QmUrwRW41yUktcap(fU_;QmF0+cnNbQj%GV`RU-A3|Zc*
z;Tv%y@!hA-EUz~5Brv2aHxwn5CM&Fb>*ibN=j1_~_v%2yLOND9KQc``t+Nh?EZgw-
zV5u=TL#W92TaLYLgxxt?n^kN~vBF!8UIn|ol3BjH8gMn%_H!VbIL%{kYtZLjzMAE#
zV5Y?nBQS3}k1dHW(y3FRgV+)H1k6~RQgUnNXlX0xg&OFnmD+=N)jMc!`$dDY*)yNw
z+IXl~Nhg|<hi(s%YA{^fhWFpIt6IB{^PYPR0tF8jJ>xy)vh7f!i*{!vevP#21M3~X
z9yfdTv{K_WJG>s^<>S9<NQ~aOa?Ya1w6N?FcZ==jING}`#2(RgC+PuklEMsX>Q;gI
zYTmiHDSI@d{dVL}_nh>WJP7278l&Ev;y`+$zwXVC=+Z>j;}$~qMozwFt?-^u3t*cX
zVV@2ISMxw(bGFX{K|ZE9OMA)<70Id2lN_eEMxo6w{VmD%M=)v!TTv#z_SP<0CaKoT
zpFE%46gvk3l@5PZg|(OOOUmTl-YcpuL)XMC*P0~Tm;WgynsoIE5O|$ovhCb#L6EsU
zS7Zq!g%MVFmz(a2R@vhXh`sE4A$ZyI1e*&W(64}>=jR$W()xF_P|Ee4d%R7iKSYcn
z&<ex5U0lI?h4W;0CJL<O63dF~m5<S!jH@#mUAB;1)#FkZr~_JPCXbq&mOJ&*fL~l%
z!*#Y+CQqm*ajX8!w=RCXJWMw@^j1d}>bE9d;L~o8UF*ucE`y&)0@ePUu^TNKd=-=K
ztL(Skw+P1wQ92WXWvFV-fr6{A^0e3GXc;D{@6Wtnh{&56`sg_-oW-eo2Q|0IKe+u>
zl*65k%Qa1UI!4VM1ma~XrJ0+wv)mamb+hGB9-k^91Pvx$f1hLi&`#&ef|=wB{`&Yx
z+MR#Ww@PW}BGApgyCRn_2F;h^n#c8e9@iMZ6DY7uJK)d}loZ;gy{LO-y_Hu61d97u
zMY*|Qvfeu^%nD00^oJsw73P)BITv99^T*>Pl#RUe4OppaZ?`jmKy!tVkO`?It%*mj
zV-BaLMV_auvNvtxxuRfVKaCyp4UVb*$>k@7SOp?;R}>;e(gql?E2ljL*JXCJbGfZ4
zI3vKu?{2pgM+N2^#LrYA$S2w70K(lI9#$34QHbP!>>i;{jQ+hs#4_$y9bUH`=$?4}
zUoX$``YCk#@H$cBv_a8a+BVmPtZ}@>*%-!kcl7R1=}4(A%F%tVsQe+#x3VA}y7MRq
z=U?*-UYUfNA4x0f=IPGqyGzN7&HhSldp?~bykB&&8@x?L-sNOmI3D?1rek!#^4K!E
zZL&Ld%qybb%eL2wk&dy9)|H65<VDY}s(7_7cRa#{BfS4PbTJ(~tDIglzZ0%FJWoI4
zV@fwoUCWR^S;v0j@s_IAZQk0$A&vtC61w-27hdaOZ|W)SyawA1R6fWv*)hI(E$%Q&
zizTJ(``*l>*~3&p-|8Cdriv#WD3>c3BzPNhG;dnr`tbTDd7b<VJzIs2Yoqa>F(0e=
zP!KuXMYL&)S_;p)G^UbJe9FY%-4~47N=Qr&o8Xo@3sJ3_n8on1z%W?Lmtb?!i1O1C
z6~JakQ0&K~w;=Y?CVkS5y%Jw<eBAGa<mW6hjgQ_CCiXpO9N$~hPZG5tb`}UD&`ZOE
zfB5W<9%$hE>-wl!^s~0^;H3UyE0Z0GQ9n6uowv;M(aXk`b_W<0RF{5TU=-q>!c?-L
zN8u@D$!o4wHg5v??c^VG2*pvG`PZ=*lPS?X7hcSGD>7lY2ie~v%+uc<p5KF^v6{X0
zW=qElMNEFr;>I_e1Y=&VGJt}8isubbg1%+jSPb`~&1AJ(-Gy1{cE8oU`ppl-ubCT3
zul%wmP$8S_cTAG#{pndkm@xIJSf0AaKQ5Px$A<bXu0VPAP^^a%Yi>n~XPOIo#VNrx
zKp@q)F+Fo0l=4HGD8Mab+2)>x+21&Bi-5r_3hzH2x64hNKIKkMQIP1y$m2$G$2Q86
z_TrYxj^4Ai*v?Vc?h`daf7DSmpCyh#rG?6FHyHP{QkijTbnQ_OqsT3#$fj){=H_i1
z@BW{sX-S8`c|%XF1;258Tac^OeX$p<G7+H7W`n|x)lI*6xhJAxPqlFa)|kd~@8--q
zs#?lux%O4sun7>T@%E5>+@bF0`!tc}9}c$~b4+alO>Z9O8y6dqHSEV44<#&zdBfF$
zqt?m4KQ(DPJ`mdc+TB2vS7_t6)AWTWJ#P^^)42Z``tccKXmobKza)z?!>cN1KiYd}
zyz{KX`U}oaV5GC?<%1IENG%UvVhGAZO}G=1AZd{l<Pv!O;2zD9#)Vr&Mk<BL<db}$
zGqqIBoq5`s))FGzmUK=;<x&q_-IVoqbWxoM?;j=3eO2#pG%6Az6{h~%+qCg62#XUn
zuF$=B;Bp6`kLaWzWqPN{Dc!y%OC_z(P!f?}|IsJsm*E!Ia%y=hOLOR+B8x36uh1uM
z*nNFBG+O?TPu>?Yhgki9C4htzymF;$4;-?mOzbPTina8{<%OHd{b_Ui%d}0=&e@E9
z9Jrr}NA%}C$w44{m;1cF97X;4R1JE*`^E0>eIW5g$a@b95-iPv%L`%GLnda7UXDZ3
z9$@HZ8O`+mk!_~c&w{w($GhU2Aq)X5j*2xk7DFiS^;#Y~_5trt%k7QN87)7BF};?j
z&KH*XQR^PX{f}uVe^#p_Ix_fzBj+C5>EEG6)SKr`Y5r&lnEPbNuMzp0dS(iqgXgoH
zR~g3;qBz$Vn4X%Hv(dJkf$2igHFa$HS6!r&dzw>)oj#d)|7e16)JD5pUpk2Y8_gCM
z3tQ1_{KdP|KbbtesBboXHYM4Z1;pbxPo#SF*PKE}lb<?!)iQ_a6JdE#$B8O#0Yj1L
zE9;Fejx0yVBg^eoyiwPwYGZiRJ4o<lKr+oW+YKkG=2eNvz3F32OIh&z??Q8do6^e*
zi7GlX{&Pb5|M|wKNX(wRw5nn9)1JGZ_}ey|RF6JTRhDQZCy1nKCpA+Z(-3pqEQ`9!
zr^&<A4n9HWgsgmL7Q7v)wUd~-aiYV)GDK<E<@zgUG+&fA91?hResuk(sSRtlsY280
zSAVWcE!hH1YZ=bcH&mObmJ6rDc$40KnCEP{y1wA8Vqw9-0DgAHrUlt!RGZC*qfJ8_
zLZ$I#b`DLYzfGYNqh0c!3#V0|n%@iO3?C3Z;?+!a=1CqhFF6Mm_Qcvo+x@-Pg36Pp
zH9{TAo+D3?4s41g1gwrrr^R0o9-3O=Q5;WF{h;~l89w+?^%~|1?R#^yNc)4qbKvC@
zbyC3=F0$q&ss}s`_qf$d+mV+wFtr8cv=oI6lw5lJqhHfjdz#P0`yrVwm(D7kC#zdy
znnGKWDk(z#nz;gj7koLgqw70ozWZGiIMB5T^ABW$OVB_y#5%Y9wiYLQGsDP-8uiGY
zkhvc>teswYQr3X{Z(qFvy+uAb;j(-q@|QdJcP7tqk|}RQW#f8}@t-L>A_b(CfX@Aq
zgA!|6BwGs{dU$=GVd09;#DWL!wZZa?FH{qY8w&IVsx@UIu}bQzj?-!v-?j0i_K0IQ
z$S1FNpHF#P_D-d~iv&R1m%OO^GW>!IX<F76UTnXYj;LG~-C^VdRMO`eQwX!*uGbl=
z@4oL}VY*f(=71IvdIc1rPXWa2B=5Svpt!~0$GHC-_q9oD(3!a%8{dHdsp?&4^i#M^
za-kwpGOnVjD4j_o_8WPyS+wl*>hFK0AFXOLNTq#4F>FF~lnqov6GV5<`n9-pB+vSS
z@>V#6abL|nh;#-gQasMLMSRdcEI0KhP4FiQDuqN9dOFD7Sdb;%G!r<(Rf5!~6x)YP
zxNoo7-27sSt-MP<n!Ohye335iQq@D#IsMS@`{%$Gs=j0-Mq{?P*uL628zWL2y65M5
zB{=lr?dce~8*Pbvp9jc5pwLRbxXV1P#;X)&?NS346xJwz{<s{7iQ7YiD8+yDbg^(F
zWa2&|$>_@3`7rXD)E-TS<fTIv)hqA9S&}z7H{`oq8aYTei@(az9@MTq)RQFresO<m
zIwqSRQj>F4+HjI9_2m?I_&}o^lY^b{7_&g^IWP%64_vHxM+|?oxO?|}QZ^d|q0Baj
zoa)6gglEfIO0ViNT0x5M8_OHCGnpFx%V2&lu=}sW%)Toecc`ZL`HtM8<=`Rek~Wh+
zbDpf(iMtO#h3o3%iE9)>$&ssCnS~|N5e_jIz5Z!<1t64I;Xlix1MtZjsM9wi-_ry>
zN3TW8rx^OQB*yt>SBV>0cXIB%`|fnf#{8$`*l^dSXaBA>H!94pA6E;)YgsY*NQ;BD
zADBxYb*ydQJKw)pPlgg>n=g7>XDFr;ETCmd7moMNJ0R!Kyf73?nQL4zp4Xp9Yo9l@
zN($h6-g~^gi+xh=5y%(WP;uNbmVaEy_GhPje}??lq&wSKU#z<B!KDwTLDQw)yuMBc
zvwi<nfl_iYWEWSbM-gAvwB9`T<_#|&byT%&#K(2QX@J^A8%n-E8>78QS#~jpEI&H~
zogpl@gTlP@3b=4B?j@Mjew`Q#%)8aR`K-3|a_GnXQ$=mpt^s3kQSL_XwOdJLjg-ZN
zUj=rH*mR1n!e%nr4}5zPj<P#jwSWwWl{aqD@n}hk7Q3$BD_%<c5|v=MqkNuq*ifJ=
zwBc0pkuozN^@NOKvo<A2BuhuwJVdH1bn4dX?<x>~9}U-4XKCA={IlC-rqayGMSq*|
zRXB+@9bfMin=+@VfkMADzk<9kkUFxr{I1LM<&Yi#C+O3K5XBd|*!8q!)3X{l=r!9X
ze_|17vA5OE6mjcJLJ)A|=pMJfUy`CBEmLDW=cE%8kMu_;KLwQw?Jf23Plk(EN~i%r
zlS?7Bpxe?%CmUL`(i>XU;<pPFFJ!u=wj3zGtq>QugR0$~1twfuvn9ycxnMQkTa6|~
z@zO^Dbt{;;*`KhziZ)Yw;znz(+c~WdjOj!y(fifnn~!GvzNc9H$uB+8G?@Wio*rPr
zl~J)cDJqZEr2x(esl6}@?VoYO!<v;e;WqIJ-eyO3!4HebHjg|^nwKmhJAxv=yK6PI
zMu4gg{;chf&`DVua424l*ixX!-Wq;!c1F@Y-dmcVphf96w~Fax0?{gn?+<=0w=<k8
zx^6QbK+MTSerNm{*!~##Q|1`nI%6e6O`em>1vvUC%^YRQ)uVE@e6e8^bljnDNE{X4
z;Cwwk^tHQ`HJ7Pmy)v^?P}GFzpI_a+CFdh{*Y8I>udD=m3lqAKTHaYFyr<>$b&i(}
z%(rmL`x+UitJ&l2Qwf?T&ydvQh7Zg$Y?vb)nqM5ICHj=q{_a=CRPJg5`Wn!Y`G@p3
zm7$)R&OFQtbUrnlOoQ>X-sNzA#0N_r@4;$Yl9}ji?<_O@^t8zE$|yaQP`<@n;B5Q9
z*FJsRQCBN-4D<BrmQc={;L_lqM=C$73O|(I8J$OJ<W=#;OY{OGbfzN3X6TQS@BTR*
zc-Vi5^t~*ROnZb?r9gjAFQKB@0<_}rVzroJV9)_{8BjB}h3`)t7}KN(;30VPOX$6Q
z;fa-gF5B?vOPOu15O;sKCh%8<&(E{7oOr~hmIV^5Am`v}o_!@azW`nlg3A@L+WGm7
z)+E!WZx_EQ1M|nId{cw0JTC|1Ji1|Pl?-<0B)Gfou`8`aR@M>}zCVJ5134iPzjnI1
zc`6Y1vC(YiwegzG+Z~#3H)c^X0k!cNQ763Lnoz(>YI)T7_@wlQ3Pl?e_8}bCh|W9s
zc9@|=qzVvV_C2~~FjU%V+BSrE=iqEMRW$v3(<Vba*_Nn(HLkn9Bf;Aj*F8vx#t)Qr
zdGks|p2xT!!*Ii1`kq-PZOnWfE87P18E5;WC{>%+9&GMGeQtd`S~j9-y?iM|eN`Ss
z?8njEyB~S%i71uFml>^Iq72i!1z+)8G5b@U9~ICJ4SE+=BUW9(sWLxN+S}w9bmAak
z6Fa)}keT{95Hj6XuFZtE=}^~8Zoz{ddlz8a>U0nI8FwTtGK87Fgvjy&cq(8d^N;MG
zDJzC~iRwB8q-k;Dgp~H|zs2K?6-NF_PyrHwF0R1T_Dm0gVBvA-hsz^{RT=hoQX;J#
zhYu<_6l{-?@kwwXc=FfdK6aZG>EwE31x{QlUbMZd!&ne~Hc*)+y7YtGb(Np|Q3fax
zd7)h-^HCaX4JDyxDSGuu;QV)7(4s}|l$=+9;ThU)Qfx-8r@`(P20=ybfb|4IbmY!^
zj6{06oGuDjoT^eXw~IVqoSdhw52{0ieW96kl;r8e(mT}2o72n!2P8e@En}amC#bt&
z@6{9&rq({m1-+AaCvXS@mORD^W)VBfBYbw80940l=Gf-cS9`EsJcC&e9{<?wHf6Ae
zz(ZnG#-^8kl@#^fe4z^v&meV3?f1ZF*Hj!e5b|XoLERoD4^#DX7A-ufO7+nF(4xmH
z=JO?w&YW;9&TPUES*yiyx^bAb$OE?eQ!i`rlj#|!^=;;wTl|}*9;a8EQUL}jxJ>4+
zyB7oBZ?Zxi^ZA{^s$)*8K<qrJZdNwzkc#Rj2ukIfc1Kg?9G@%Jf+S#yKsO4)@v#kd
z&g4N|e4OeQrDy5gHBKT^x;6EzKvQ?`njLFb(atpDxgO_TMgu#L<`=z-x+})FQ$Dr_
z()8Z(pN^=i2@7@ua#Yo=FmqSgqJCgCc~|msk3DA(MlxhWLI`rs>X55q4W_%(#8lZp
zrgk)wqB+bu6f0<+k)!_Y{=uK&FDmTXz3Diq5@77lyh1a_urqTRFSV@dFz7nwHkXfm
z4O{r3qJN(#%7Geo;96!<=-{5=5Nho^*ltj^$Z0svLL--9=Z!{{Ah<!`9l)gj&tGDh
zXJv=?H)+|;^`BXHo>=^qQ)phtp#~iB;dl<N*RtE4U~P;&^9d0wMxYIT+<&^eKQ#&L
z?hT?&cZO`?9}@P~dBxV7KYsMdRmCR#Twd(W(C?zq`z((%`|<M9L7ma|NG8n*UhxR7
z(yKkT|GT_o5w0!pMHnOS$ku*OH$Typ=yI}8XHITojt@p2LhWHXZU=`qmDstDh*?5e
zoS<Nq>adMd^T$HINwpDqv%)sbY-7wz>K3U>c{)$lz2Rk;uls87VtI~3#5g+>)ozYX
zdxS1=fVv-~{MBARL+p9~{$NmPKteURR5a1R0mkJZB_O}1JR*sy95zFaLl_pD^AAx*
z@zIi5GZh(^vHF%qU*nJpfmIFoVM2{SDYj`~ah#C9WfXd6`WSIV7hpAz>20OtgCEqP
z^r4;WY7^m!#m)IDWz||$#hIydB`$es%~`8GVr5`odCwez=~muWgfOr=EW_B^uufC3
zoFNqQpR@{bbgVYhTlo#p+SvN4O_<_chfKc;x;Ll4V)A#WnhxDpKZVf<|CBWMu+2{g
z399k}HKc&ucyvYF_0rXob*S=x3>m8`!Z+SDxK8xD7khQNsQ6fVqN)%kG014vYjI~a
zwbu|YPq$F~2dH?M-Z~{t>KWYnT5JQW4zd<)<7iF&E?kunh~<My#AO7qox~Ua0V-XR
zLvclyEliicD%H`t?a>8brw$U18s7O6xV=9m&ZsMF#s(MjeIoT~41S*;dbbb#ZuUCq
zZkJ2(l<Q*CsSK#jdJ&jW&hAo{65o3>rc%7DIGG$^c2XZYy?7(Y)iNnnzSd0dFo5#x
zVqVh4oTd2v0w_CN%{NZ@)WSfn5;5yWGY2?(NCO}6+$-Y{5%%!ahWfA<&w#F4?l1H^
z6v@QFbEWvcw*;#f!`0-BQ`e8r9*|@j0eYvA^mcwai0%QEJ%L@Yz*1FneT<smJ2C|n
z^5Y^Y6r{sMbq^l^f`e<{XrvW(APyI+PVI^;H5wj&Q|h=fQx@>BnON-;YNmM>_PbA&
zral~<Ez_GK_Xwd)ycy70<>0ijAhWeGy?;4Uw|vabn7epy?7`{I`*s@|_+^SA55H9s
zqmyxJMXcdiu^(>&9u;&bJi6R5*Ptb`ysw5U4i-v{vhvL$sGW9cV?v*8mEu@Fmj!mk
z01Kz8*h4TU3|n;+Ybfg*K_Q_0N#p)r@t0;n0n#RQ?Y(1#S`m+??ic-G#1Rj0G;4}=
z?TrtWQgMe8C*&btMKwzCi<E7WsK6K39E#CkrQZ8s#nI}EgM}~IdoG`pR}S~y&(?e7
zTc!&`%Y@Wg<SpB@+kQTA|HF6|GqD)U5lZk~?pOJJ+)BB^7-0UnyfxNHB)FxF$(>g5
z|JqAG+c|z(Xv~Z9drD^W7pH~I!#h`+I2forZ(BgD&Z^&m754~b`b1s-n)h~~fsjlc
zR_<_;I#vczQoqct&<XjC?wMBl8?(2CcyOO}wOpz5*&ed99{1_RuN^gbxq))(AK1x8
zowf+YhDEXiPF|4|#L_<Wi9mCq@4_>aHVrqNxX@*0pCF;$7V-Am-0{+k1o1aut4)<E
z${jVl{3{aPp_Pb+M_(|rV>OTGOrTTsh`i{s!>t_XpZcJ5UK`s=L>Ng74v2-8DYq8B
z!w>hE!NmeC_Lt)A9Z;w1)Dx^=7K;(fcV}|HXetq>YS|fDnI6UHiMRyCdSCnbb@RpV
zN*k}UA$3C}6f>yd4^;Mg!_4<2faMK=s13{WNbS5%Y)Ok5-r3bUgO{$yAbRp(QJ*Qo
z9PE9c+2NLMqS(#RnUF?AX2&~=ky|tt;dEya$@zs=9*+ElKrnS8-sL<HO?$Cg#T0T@
z@$DR~iKPq8=@i0S1lh4j<hU}0Cs4K*oOT9xMss)4qowVL9;OE-ZH8p&$-g8?Zwh;;
z8Du+p;n58$+^sqU`j^)=2G-R*R=?<y^k&O`iaO4z;tSoaY7ofK6w*ae%AR9AY(19h
zoLzFX1~ZXDFz;A`=@N5;|3tGmtaV^}Z$N}I#aNJauK*_c%MEW*x>qLZDjyu;pQC|i
zLF#L<X-g<Fo$14vF4R*<^)K`!BS{_SDCrrD?rPcDR2*U=Su`vAt-F<44R~r456A=-
z0)EnYf@|dN391tC?@ZXfXK5J$^dV#E$16Q<hIPU-@5XF<wVP8cd@nu=<r^pa6c-gk
zw(qzqUZ!**(VuoC70Ho!h*{R?7;So=etPYDBKw({Pp;=l-jFlqc&&skS~zPO0XV7b
zxUI0bPMjQ^CA^aa+vnS~&&DDplj&EJxR1(ZHK&#-Rbb#RTrM(s*H2(k;U6G?tD|sW
z>+zi9o##5+L9{KZGOspW%*-*eR3tBj@9&!3#)x@TKJlywH|5JwqULr(d97`d#}E~v
zc?g+{q#*tY#LFk9#dbOHy}G%Idy@yur{TzGnZJY%u%saKLA%sG1{D9*XG5Cgv#Xhq
zI$q(^YpVB4tZQkFhXD$34h3>(K_ntf?e|*<az(JCX{G;Qr(8hAMT4Mn(jR{B{cB|O
z$X^D11%^YK-22?xd9)}ZR?C<UnA3BkQMf{LeXUIO!^Z}~!{Rm(B<rqb9kUe(0x$oT
z4JjRKYFs2T`XlpMiEwU3H8?!I%*P@i)6kgkbupP7+&BoSeM?Fub$x=(GG^OP{g31Q
zLzlOQl^@zODB76;B4#EToyQHfGKZ+_@Y9XiY5<FH)gFMtnMq!YNlDF^s1;<slXYw=
zCOTIzc1Sk&)<sbE`?FqPtN;9tdEd<XT0%~b0$RlH#uWPGV=j^-rCpcSckm>@#<|lZ
zlu^*?>kOkD-OLDKrT;Sr0YgB)Dn1^Ic_bC>qkjs~Q&QoY=Y4;srhw$qZyA+|x^fXA
zERo|Q_@{U&9A|09|GWBlClZ=wq+Zm4Fdt5$P3h3_wevzZAldCJ7-}OMe1K+RVHlxv
zq;$x%V}rT9dNiISR!D1c7Ky98y5Yl+w(=Q$mZc}SCl7-|-<ExDxov?Vs6H~_AjOYr
zowb#<Z&=@2hM{30u3hbwGyhA)dI&bW+=ZoW`z6l*EnY;3hUBMf^*qQTT-ANMIv}5I
z-fBd$-Tb!RG4z3-!t<i=!r0vk$_5`wB#DC^<C079dT;HF!*M!Jd(Wm>4(|tt{;<J7
z6m(ofNS+OAakEBEPHcXrME=*S-7_WoisJRvL~-Iv!^+<-zE=D)uUQ{+lHhbzfG`ND
zQx={{9(?WmVq7%sBM9hdpoDyCmdMDU8>|yxvu{qsThu|K1+mF4W<%2a14_o+^Jeyc
z?b}=}3ky;59N*0RSq(p*{I1AV7!q&3TM7k--E(!o@jPG?i_<?45>{EA4B`q?zEttt
z%z(pi1TpNgTE#4e2Xda_V#V<gS(sX%$@jJR;!#C6ypVd@h&u-fc~_n9)R@nj`s1&2
zDXwW$D<}lT5pHRcpYK2D1~vXSAt;d9`Imz~0Ezp%s!0!c4o#TV{a!pL>1=sx=n1bB
z%Mx{-+lbLzA-+K9uWR;S7Uf0vOXgg2o&o2Y?QVU-wlVMOcqgs5HD+D($VcQ9{XOz5
zf5WPf2z>_FXo9R(&?Ts2(xeWYW$;od{-?%S&*pe;3XK>`$F$5=?Xau02JncLmTJ--
zz1mrCvmLF<9PV%0qOU~ATqebrGEfz{i$rKR5lzkbc_L<?GDOS_?X$Bk8X~G}6rAWu
z%4sW3R#%ESpmO;i@_;{e{@H%Qn`Xod2R(71XK?@@$*E$I8)J_wNyXHHkLv_ImNyI$
z>_7KvXF=H(bCdE4<7yU#WFMF^rmxCH&`ph`WjtU+ewZ!z2O92@QqnD6x~M$o=I*qb
zXTCSv!y;T1)8FqR81vVn6lo+PHUjggC3fiXR>}L=@70>`uX%nBGp5*l?KF-|lqR)w
zl?G`RBhMapDB0f3u(Ub6y^tw$T4l9!Y*!+ID6%{*eIVc^V?bZ_`Oa06>EUC8Cs!M^
z2pMZ)Qeh#-OP0UYL;Zd2y~Y}1W_Y0&Pu^6(m%RVzBx~#dg+TQG6av$;C{Tx*0XVD3
zz}BpG7MkVG1SSQ_?ht;k`&PhvA4iaTv2E>86Wpd$2qs%h)^GHwJw1R=e3&thIBhYR
zML##;I^FC?WPTf~^bwpnaX&D0Zah7{f94t1+c8|O22@igxKLdFMwl~luPD3tKBnmD
z-uC9l8VMz_S&jE%-QxS|!dM^tGc&Qnj?)?k!bqg<3uQl-O<00?FO{^{-kO%HN!xgq
zn2Rq9<lr!pM5mXw)b6rkc*P7RR7LU2LYa8TX)_D98~EG`d=;)T^7TnT8qQAvu=YGD
zjXnUMlF*vsuX$`@nx=T63vRXieuAZv%6J4Hdq!R$)VGFm91N%3ykGQ`w9~LIqq7yu
zm7i4Bu`ZK2Lk{SnPAQl1h_$1sF5*$n@0eM}_v#hD&^F++^FeR-h9M|9*Z&NHkY^9a
zQ#Ze%aZ6#%jMSAEOYu7o&V11buAiz7?9!+Gc!?cea<@UDtRL7v=t3@T?oB4KIW-{@
z^GG#i+PB~2Qkq}efoY(9O;wWT<<<eI{9a&&toq{;j&kWjCJOwMJL1CMue{q^z6%IF
z?-_EE_y9H*d^>CJhW6>dNK)%a4S<mn&-2!S5tsGbX_LSbeV@#Zv#h@gY3WOns5Ls1
zn7Xp=H`~9zQMZW;nwX`lPlpGt^+$ogzW})x|K!FkMq>A%^G>OsjN=cD*bu^#Kx4`o
zE8Xl-30WN`l_F5t(D1Gqk0Y>dzxAaMX5b)UHY9ecyjLVEhLfeW7{P~}IXhU!+3OJB
z)l~bY2`qnLH6F9QvaGTOG&oCB)aL|L@c}!zz;G?FOE5WDYv4RD04KO;Fn&=Sxl`e!
zX7brLVRfc#v?Z1s=UqhPwBw<{XyY%xHkZC3m^=1H;w5|3<)g9}ABUmhcYC8d+Qe73
z`N-~$BJVeNfVV;vK)*j+#lLZ{Sxa(DiL+HVA{!=Xn`l{L2y1!6iuI)Vw6P^n)vUtF
z7@sPzhIy$p8+uTAxX(mwv!MR2xIAsirfmhIQ<;&jG?dT9<(Pj;G27Or_=b?%27mzc
N=z-pS#Jwk>{{sb>U7r8|

literal 0
HcmV?d00001

diff --git a/docs/pages/Methodology.py b/docs/pages/Methodology.py
deleted file mode 100644
index af6a120..0000000
--- a/docs/pages/Methodology.py
+++ /dev/null
@@ -1,301 +0,0 @@
-import streamlit as st
-from policyengine_uk_data.utils.download_docs_prerequisites import (
-    download_data,
-)
-
-download_data()
-
-st.set_page_config(layout="wide")
-
-from policyengine_uk_data.utils import get_loss_results
-from policyengine_uk_data import (
-    FRS_2022_23,
-    ExtendedFRS_2022_23,
-    EnhancedFRS_2022_23,
-    ReweightedFRS_2022_23,
-)
-from policyengine_core.model_api import Reform
-import plotly.express as px
-import pandas as pd
-
-st.title("Methodology")
-
-st.write(
-    """
-In this page, we'll walk through step-by-step the process we use to create PolicyEngine's dataset.
-* **Family Resources Survey**: we'll start with the FRS, looking at close it is to reality. To take an actual concrete starting point, we'll assume benefit payments are as reported in the survey.
-* **FRS (+ tax-benefit model)**: we need to make sure that our tax-benefit model isn't doing anything unexpected. If we turn on simulation of taxes and benefits, does anything look unexpected? If not- great, we've turned a household survey into something useful for policy analysis. We'll also take stock here of what we're missing from reality.
-* **Wealth and consumption**: the most obvious thing we're missing is wealth and consumption. We'll impute those here.
-* **Fine-tuning**: we'll use reweighting to make some final adjustments to make sure our dataset is as close to reality as possible.
-* **Validation**: we'll compare our dataset to the UK's official statistics, and see how we're doing.
-"""
-)
-
-st.subheader("Family Resources Survey")
-
-st.write(
-    """First, we'll start with the FRS as-is. Skipping over the technical details for how we actually feed this data into the model (you can find that in `policyengine_uk_data/datasets/frs/`), we need to decide how we're actually going to measure 'close to reality'. We need to define an objective function, and if our final dataset improves it a lot, we can call that a success.
-         
-We'll define this objective function using public statistics that we can generally agree are of high importance to describing the UK household sector. These are things that, if the survey gets them wrong, we'd expect to cause inaccuracy in our model, and if we get them all mostly right, we'd expect to have confidence that it's a pretty accurate tax-benefit model.
-         
-For this, we've gone through and collected:
-         
-* **Demographics** from the ONS: ten-year age band populations by region of the UK, national family type populations and national tenure type populations.
-* **Incomes** from HMRC: for each of 14 total income bands, the number of people with income and combined income of the seven income types that account for over 99% of total income: employment, self-employment, State Pension, private pension, property, savings interest, and dividends.
-* **Tax-benefit programs** from the DWP and OBR: statistics on caseloads, expenditures and revenues for all 20 major tax-benefit programs.
-         
-Let's first take a look at the initial FRS, our starting point, and what is generally considered the best dataset to use (mostly completely un-modified across major tax-benefit models), and see how close it is to reproducing these statistics.
-         
-The table below shows the result, and: it's really quite bad! Look at the relative errors.
-"""
-)
-
-
-@st.cache_data
-def get_loss(dataset, reform, time_period):
-    loss_results = get_loss_results(dataset, time_period, reform)
-
-    def get_type(name):
-        if "hmrc" in name:
-            return "Income"
-        if "ons" in name:
-            return "Demographics"
-        if "obr" in name:
-            return "Tax-benefit"
-        return "Other"
-
-    loss_results["type"] = loss_results.name.apply(get_type)
-    return loss_results
-
-
-reported_benefits = Reform.from_dict(
-    {
-        "gov.contrib.policyengine.disable_simulated_benefits": True,
-    }
-)
-loss_results = get_loss(
-    dataset=FRS_2022_23, reform=reported_benefits, time_period=2022
-).copy()
-with st.expander(expanded=True, label="Objective function deep dive"):
-    st.dataframe(loss_results, use_container_width=True)
-
-st.write(
-    "It's easier to understand 'what kind of bad' this is by splitting out the statistics into those three categories. Here's a histogram of the absolute relative errors."
-)
-
-fig = px.histogram(
-    loss_results,
-    x="abs_rel_error",
-    nbins=25,
-    title="Distribution of absolute relative errors",
-    labels={
-        "value": "Absolute relative error",
-        "count": "Number of variables",
-    },
-    color="type",
-)
-
-st.plotly_chart(fig, use_container_width=True)
-
-st.write(
-    """A few notes:
-         
-* We're comparing things in the same relevant time period (2022), and only doing a tiny amount of adjustment to the statistics: OBR statistics are taken directly from the latest EFO, ONS statistics are the most recent projections for 2022, and HMRC statistics are uprated from 2021 to 2022 using the same standard uprating factors we use in the model (and it's only one year adjustment).
-* Demogaphics look basically fine: that's expected, because the DWP applies an optimisation algorithm to optimise the household weights to be as close as possible to a similar set of demographic statistics. It's a good sign that we use slightly different statistics than it was trained on and get good accuracy.
-* Incomes look *not great at all*. We'll take a closer look below to understand why. But the FRS is well-known to under-report income significantly.
-* Tax-benefit programs also look *not good*. And this is a concern! Because we're using this dataset to answer questions about tax-benefit programs, and the FRS isn't even providing a good representation of them under baseline law.
-"""
-)
-
-incomes = loss_results[loss_results.type == "Income"]
-incomes["band"] = incomes.name.apply(
-    lambda x: x.split("band_")[1].split("_")[0]
-).astype(int)
-incomes["count"] = incomes.name.apply(lambda x: "count" in x)
-incomes["variable"] = incomes.name.apply(
-    lambda x: x.split("_income_band")[0].split("_count")[0].split("hmrc/")[-1]
-)
-
-variable = st.selectbox("Select income variable", incomes.variable.unique())
-count = st.checkbox("Count")
-variable_df = incomes[
-    (incomes.variable == variable) & (incomes["count"] == count)
-]
-
-fig = px.bar(
-    variable_df,
-    x="band",
-    y=[
-        "target",
-        "estimate",
-        "error",
-        "rel_error",
-        "abs_error",
-        "abs_rel_error",
-    ],
-    barmode="group",
-)
-st.plotly_chart(fig, use_container_width=True)
-
-st.write(
-    """There are a few interesting things here:
-             
-* The FRS over-estimates incomes in the upper-middle of the distribution and under-estimates them in the top of the distribution. The reason for this is probably: the FRS misses out the top completely, and then because of the weight optimisation (which scales up the working-age age groups to hit their population targets), the middle of the distribution is inflated, overcompensating.
-* Some income types are severely under-estimated across all bands: notably capital incomes. This probably reflects issues with the survey questionnaire design more than sampling bias.
-"""
-)
-st.write("OK, so what can we do about it?")
-
-st.subheader("FRS (+ tax-benefit model)")
-
-st.write(
-    "First, let's turn on the model and check nothing unexpected happens."
-)
-
-
-original_frs_loss = loss_results.copy()
-frs_loss = get_loss(FRS_2022_23, None, 2022).copy()
-combined_frs_loss = pd.merge(
-    on="name",
-    left=original_frs_loss,
-    right=frs_loss,
-    suffixes=("_original", "_simulated"),
-)
-combined_frs_loss["change_in_abs_rel_error"] = (
-    combined_frs_loss["abs_rel_error_simulated"]
-    - combined_frs_loss["abs_rel_error_original"]
-)
-# Sort columns
-combined_frs_loss.sort_index(axis=1, inplace=True)
-combined_frs_loss = combined_frs_loss.set_index("name")
-
-st.dataframe(combined_frs_loss, use_container_width=True)
-
-st.write(
-    """Again, a few notes:
-        
-* You might be thinking: 'why do some of the HMRC income statistics change?'. That's because of the State Pension, which is simulated in the model. The State Pension is a component of total income, so people might be moved from one income band to another if we adjust their State Pension payments slightly.
-* Some of the tax-benefit statistics change, and get better and worse. This is expected for a variety of reasons- one is that incomes and benefits are often out of sync with each other in the data (the income in the survey week might not match income in the benefits assessment time period).
-"""
-)
-
-st.subheader("Adding imputations")
-
-st.write(
-    """Now, let's add in the imputations for wealth and consumption. For this, we train *quantile regression forests* (essentially, random forest models that capture the conditional distribution of the data) to predict wealth and consumption variables from FRS-shared variables in other surveys.
-
-The datasets we use are:
-* The Wealth and Assets Survey (WAS) for wealth imputations.
-* The Living Costs and Food Survey (LCFS) for most consumption imputations.      
-* The Effects of Taxes and Benefits on Household Income (ETB) for '£ consumption that is full VAT rateable'. For example, different households will have different profiles in terms of the share of their consumption that falls on the VATable items.
-         
-Below is a table showing how just adding these imputations changes our objective statistics (filtered to just rows which changed). Not bad pre-calibrated performance! And we've picked up an extra £200bn in taxes.
-"""
-)
-
-new_loss = get_loss(ExtendedFRS_2022_23, None, 2022).copy()
-new_loss_against_old = pd.merge(
-    on="name",
-    left=frs_loss,
-    right=new_loss,
-    suffixes=("_simulated", "_imputed"),
-)
-new_loss_against_old["change_in_abs_rel_error"] = (
-    new_loss_against_old["abs_rel_error_imputed"]
-    - new_loss_against_old["abs_rel_error_simulated"]
-)
-
-st.dataframe(
-    new_loss_against_old[
-        new_loss_against_old.change_in_abs_rel_error.abs() > 0.01
-    ]
-)
-
-st.subheader("Calibration")
-
-st.write(
-    "Now, we've got a dataset that's performs pretty well without explicitly targeting the official statistics we care about. So it's time to add the final touch- calibrating the weights to explicitly minimise error against the target set."
-)
-
-calibrated_loss = get_loss(ReweightedFRS_2022_23, None, 2022).copy()
-calibrated_loss_against_imputed = pd.merge(
-    on="name",
-    left=new_loss,
-    right=calibrated_loss,
-    suffixes=("_imputed", "_calibrated"),
-)
-
-calibrated_loss_against_imputed["change_in_abs_rel_error"] = (
-    calibrated_loss_against_imputed["abs_rel_error_calibrated"]
-    - calibrated_loss_against_imputed["abs_rel_error_imputed"]
-)
-
-st.dataframe(calibrated_loss_against_imputed)
-
-st.write(
-    "The above table shows what this did to our target set. Mostly, we're hitting targets! But we are still under on income tax and many of the highest income band statistics. Let's take another look at the incomes, but with this new calibrated dataset."
-)
-
-incomes = calibrated_loss[loss_results.type == "Income"]
-incomes["band"] = incomes.name.apply(
-    lambda x: x.split("band_")[1].split("_")[0]
-).astype(int)
-incomes["count"] = incomes.name.apply(lambda x: "count" in x)
-incomes["variable"] = incomes.name.apply(
-    lambda x: x.split("_income_band")[0].split("_count")[0].split("hmrc/")[-1]
-)
-
-variable = st.selectbox(
-    "Select income variable",
-    incomes.variable.unique(),
-    key=1,
-)
-count = st.checkbox("Count", key=2)
-variable_df = incomes[
-    (incomes.variable == variable) & (incomes["count"] == count)
-]
-
-fig = px.bar(
-    variable_df,
-    x="band",
-    y=[
-        "target",
-        "estimate",
-        "error",
-        "rel_error",
-        "abs_error",
-        "abs_rel_error",
-    ],
-    barmode="group",
-)
-st.plotly_chart(fig, use_container_width=True)
-
-st.write(
-    """
-So, what's happening here seems like: the FRS just doesn't have enough high-income records for calibration to work straight away. The optimiser can't just set really high weights for the few rich people we do have, because it'd hurt performance on the demographic statistics.
-         
-So, we need a solution to add more high-income records. What we'll do is:
-         
-* Train a QRF model to predict the distributions of income variables from the Survey of Personal Incomes from FRS demographic variables.
-* For each FRS person, add an 'imputed income' clone with zero weight.
-* Run the calibration again.
-"""
-)
-
-st.subheader("The Enhanced FRS")
-
-st.write("Let's see how this new dataset performs.")
-
-efrs_loss = get_loss(EnhancedFRS_2022_23, None, 2022).copy()
-efrs_loss_against_calibrated = pd.merge(
-    on="name",
-    left=calibrated_loss,
-    right=efrs_loss,
-    suffixes=("_calibrated", "_enhanced"),
-)
-efrs_loss_against_calibrated["change_in_abs_rel_error"] = (
-    efrs_loss_against_calibrated["abs_rel_error_enhanced"]
-    - efrs_loss_against_calibrated["abs_rel_error_calibrated"]
-)
-
-st.dataframe(efrs_loss_against_calibrated)
diff --git a/docs/pages/Validation.py b/docs/pages/Validation.py
deleted file mode 100644
index 39ea1f2..0000000
--- a/docs/pages/Validation.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import streamlit as st
-from policyengine_uk_data.utils.download_docs_prerequisites import (
-    download_data,
-)
-
-download_data()
-
-st.set_page_config(layout="wide")
-
-st.title("Validation")
-
-from policyengine_uk_data import EnhancedFRS_2022_23, FRS_2022_23, SPI_2020_21
-from policyengine_uk_data.utils.loss import get_loss_results
-import pandas as pd
-
-
-@st.cache_data
-def get_validation():
-    df = pd.DataFrame()
-    for dataset in [FRS_2022_23, EnhancedFRS_2022_23]:
-        for year in range(2022, 2029):
-            print(dataset.label, year)
-            loss_results = get_loss_results(dataset, year)
-            loss_results["time_period"] = year
-            loss_results["dataset"] = dataset.label
-            df = pd.concat([df, loss_results])
-    df = df.reset_index(drop=True)
-    return df
-
-
-df = get_validation()
-truth_df = df[df.dataset == df.dataset.unique()[0]].reset_index()
-truth_df["estimate"] = truth_df["target"]
-truth_df["error"] = truth_df["estimate"] - truth_df["target"]
-truth_df["abs_error"] = truth_df["error"].abs()
-truth_df["rel_error"] = truth_df["error"] / truth_df["target"]
-truth_df["abs_rel_error"] = truth_df["rel_error"].abs()
-truth_df["dataset"] = "Official"
-df = pd.concat([df, truth_df]).reset_index(drop=True)
-
-st.write(
-    "Calibration check: the table below shows how both the original and enhanced FRS datasets compare to over 2,000 official statistics (which the EFRS was explicitly calibrated to hit) from the OBR, DWP and HMRC."
-)
-
-st.write(
-    "Since the EFRS is calibrated to these statistics, high performance is expected and achieved."
-)
-
-a, b = st.columns(2)
-
-with a:
-    frs_mean = df[df.dataset == "FRS (2022-23)"].abs_rel_error.mean()
-    st.metric("FRS average error", f"{frs_mean:.2%}")
-with b:
-    efrs_mean = df[df.dataset == "Enhanced FRS (2022-23)"].abs_rel_error.mean()
-    st.metric("Enhanced FRS average error", f"{efrs_mean:.2%}")
-
-selected_metrics = st.selectbox("Select statistic", df.name.unique())
-comparison = st.selectbox(
-    "Select metric",
-    ["estimate", "error", "abs_error", "rel_error", "abs_rel_error"],
-)
-
-# Bar chart showing datasets and a dotted line for actual
-
-import plotly.express as px
-
-comparison_df = (
-    df[df.name == selected_metrics]
-    .groupby(["dataset", "time_period"])[comparison]
-    .mean()
-    .reset_index()
-)
-
-fig = px.bar(
-    comparison_df,
-    x="time_period",
-    y=comparison,
-    color="dataset",
-    barmode="group",
-    title=f"{selected_metrics} {comparison} comparison",
-)
-st.plotly_chart(fig, use_container_width=True)
-
-
-st.dataframe(df)
-
-st.dataframe(df[df.name == selected_metrics])
diff --git a/docs/utils.py b/docs/utils.py
new file mode 100644
index 0000000..d61f829
--- /dev/null
+++ b/docs/utils.py
@@ -0,0 +1,7 @@
+import plotly.io as pio
+from IPython.display import HTML
+
+
+def show(fig):
+    html = pio.to_html(fig)
+    return HTML(html)
diff --git a/policyengine_uk_data/utils/github.py b/policyengine_uk_data/utils/github.py
index 27c88e1..d949ac8 100644
--- a/policyengine_uk_data/utils/github.py
+++ b/policyengine_uk_data/utils/github.py
@@ -74,7 +74,7 @@ def upload(
             f"Asset {file_name} already exists in release {release_tag} of {org}/{repo}, skipping."
         )
         return
-    
+
     url = f"https://uploads.github.com/repos/{org}/{repo}/releases/{release_id}/assets?name={file_name}"
 
     headers = {
@@ -85,7 +85,7 @@ def upload(
 
     with open(file_path, "rb") as f:
         data = f.read()
-    
+
     response = requests.post(
         url,
         headers=headers,
@@ -96,9 +96,8 @@ def upload(
         raise ValueError(
             f"Invalid response code {response.status_code} for url {url}. Received: {response.text}"
         )
-    
-    return response.json()
 
+    return response.json()
 
 
 def set_pr_auto_review_comment(text: str):
diff --git a/pyproject.toml b/pyproject.toml
index 2ecfff7..cd227f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,10 +25,11 @@ dev = [
     "black",
     "pytest",
     "policyengine_uk>=1.8.0",
-    "streamlit",
     "survey_enhance",
     "torch",
     "tables",
+    "furo",
+    "jupyter-book",
 ]
 
 [tool.setuptools]

From 8276a00fff7bff57ad0c78abd9ba8d99c610a6f1 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 18:32:38 +0100
Subject: [PATCH 05/14] Add changelog rule

---
 Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Makefile b/Makefile
index f78fd5b..a02c02f 100644
--- a/Makefile
+++ b/Makefile
@@ -31,3 +31,10 @@ build:
 
 publish:
 	twine upload dist/*
+
+changelog:
+	build-changelog changelog.yaml --output changelog.yaml --update-last-date --start-from 1.0.0 --append-file changelog_entry.yaml
+	build-changelog changelog.yaml --org PolicyEngine --repo policyengine-us-data --output CHANGELOG.md --template .github/changelog_template.md
+	bump-version changelog.yaml pyproject.toml
+	rm changelog_entry.yaml || true
+	touch changelog_entry.yaml

From c40523ca564d94bab6f0ca4124ab097618a9c327 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 18:38:42 +0100
Subject: [PATCH 06/14] Ensure Extended FRS generates

---
 Makefile                                                | 1 +
 policyengine_uk_data/utils/imputations/capital_gains.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a02c02f..975f206 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@ documentation:
 data:
 	python policyengine_uk_data/datasets/frs/dwp_frs.py
 	python policyengine_uk_data/datasets/frs/frs.py
+	python policyengine_uk_data/datasets/frs/extended_frs.py
 	python policyengine_uk_data/datasets/frs/enhanced_frs.py
 
 build:
diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py
index 54c7862..3a9b571 100644
--- a/policyengine_uk_data/utils/imputations/capital_gains.py
+++ b/policyengine_uk_data/utils/imputations/capital_gains.py
@@ -86,7 +86,7 @@ def loss(blend_factor):
         return loss
 
     optimiser = Adam([blend_factor], lr=1e-1)
-    progress = tqdm(range(1000))
+    progress = range(1000)
     for i in progress:
         optimiser.zero_grad()
         loss_value = loss(blend_factor)

From f3f42c37c8a0a5f75e72e80a0c96444c9ddf2747 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 18:41:22 +0100
Subject: [PATCH 07/14] Add initial changelog

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..309434f
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,13 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0] - 2024-09-09 17:29:10
+
+### Added
+
+- Initialized changelogging
+

From 1748e2af06c81e7b57be353cd74a2880c9607491 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:03:02 +0100
Subject: [PATCH 08/14] Add download links for all microdata

---
 policyengine_uk_data/datasets/frs/dwp_frs.py  | 16 ++-----
 policyengine_uk_data/datasets/frs/frs.py      |  2 -
 .../storage/download_private_prerequisites.py | 43 ++++++++++---------
 policyengine_uk_data/utils/__init__.py        |  1 -
 .../utils/download_docs_prerequisites.py      | 25 -----------
 5 files changed, 26 insertions(+), 61 deletions(-)
 delete mode 100644 policyengine_uk_data/utils/download_docs_prerequisites.py

diff --git a/policyengine_uk_data/datasets/frs/dwp_frs.py b/policyengine_uk_data/datasets/frs/dwp_frs.py
index 982fde2..cc3f3cf 100644
--- a/policyengine_uk_data/datasets/frs/dwp_frs.py
+++ b/policyengine_uk_data/datasets/frs/dwp_frs.py
@@ -22,14 +22,6 @@ def generate(self):
         if isinstance(tab_folder, str):
             tab_folder = Path(tab_folder)
 
-        # Folder might be either a folder, or a zipped folder.
-
-        if tab_folder.suffix == ".zip":
-            import zipfile
-
-            with zipfile.ZipFile(tab_folder, "r") as zip_ref:
-                zip_ref.extractall(tab_folder.parent)
-
         tab_folder = Path(tab_folder.parent / tab_folder.stem)
         # Load the data
         tables = {}
@@ -93,7 +85,7 @@ def generate(self):
 
 
 class DWP_FRS_2020_21(DWP_FRS):
-    folder = STORAGE_FOLDER / "frs_2020_21.zip"
+    folder = STORAGE_FOLDER / "frs_2020_21"
     name = "dwp_frs_2020_21"
     label = "DWP FRS (2020-21)"
     file_path = STORAGE_FOLDER / "dwp_frs_2020_21.h5"
@@ -101,7 +93,7 @@ class DWP_FRS_2020_21(DWP_FRS):
 
 
 class DWP_FRS_2021_22(DWP_FRS):
-    folder = STORAGE_FOLDER / "frs_2021_22.zip"
+    folder = STORAGE_FOLDER / "frs_2021_22"
     name = "dwp_frs_2021_22"
     label = "DWP FRS (2021-22)"
     file_path = STORAGE_FOLDER / "dwp_frs_2021_22.h5"
@@ -109,7 +101,7 @@ class DWP_FRS_2021_22(DWP_FRS):
 
 
 class DWP_FRS_2022_23(DWP_FRS):
-    folder = STORAGE_FOLDER / "frs_2022_23.zip"
+    folder = STORAGE_FOLDER / "frs_2022_23"
     name = "dwp_frs_2022_23"
     label = "DWP FRS (2022-23)"
     file_path = STORAGE_FOLDER / "dwp_frs_2022_23.h5"
@@ -117,6 +109,4 @@ class DWP_FRS_2022_23(DWP_FRS):
 
 
 if __name__ == "__main__":
-    DWP_FRS_2020_21().generate()
-    DWP_FRS_2021_22().generate()
     DWP_FRS_2022_23().generate()
diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py
index 4c9d821..6ba72c8 100644
--- a/policyengine_uk_data/datasets/frs/frs.py
+++ b/policyengine_uk_data/datasets/frs/frs.py
@@ -837,6 +837,4 @@ def impute_brmas(dataset, frs):
 
 
 if __name__ == "__main__":
-    FRS_2020_21().generate()
-    FRS_2021_22().generate()
     FRS_2022_23().generate()
diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py
index 09b5852..b5f8e34 100644
--- a/policyengine_uk_data/storage/download_private_prerequisites.py
+++ b/policyengine_uk_data/storage/download_private_prerequisites.py
@@ -1,26 +1,29 @@
 from policyengine_uk_data.utils.github import download
 from pathlib import Path
+import zipfile
+
+def extract_zipped_folder(folder):
+    folder = Path(folder)
+    with zipfile.ZipFile(folder, "r") as zip_ref:
+        zip_ref.extractall(folder.parent)
 
 FOLDER = Path(__file__).parent
 
-download(
-    "PolicyEngine",
-    "ukda",
-    "release",
-    "frs_2020_21.zip",
-    FOLDER / "frs_2020_21.zip",
-)
-download(
-    "PolicyEngine",
-    "ukda",
-    "release",
-    "frs_2021_22.zip",
-    FOLDER / "frs_2021_22.zip",
-)
-download(
-    "PolicyEngine",
-    "ukda",
-    "release",
+FILES = [
     "frs_2022_23.zip",
-    FOLDER / "frs_2022_23.zip",
-)
+    "lcfs_2021_22.zip",
+    "was_2006_20.zip",
+    "etb_1977_21.zip",
+    "spi_2020_21.zip",
+]
+
+for file in FILES:
+    download(
+        "PolicyEngine",
+        "ukda",
+        "release",
+        file,
+        FOLDER / file,
+    )
+    extract_zipped_folder(FOLDER / file)
+    (FOLDER / file).unlink()
diff --git a/policyengine_uk_data/utils/__init__.py b/policyengine_uk_data/utils/__init__.py
index f1c9f22..983bc4a 100644
--- a/policyengine_uk_data/utils/__init__.py
+++ b/policyengine_uk_data/utils/__init__.py
@@ -2,4 +2,3 @@
 from .uprating import *
 from .datasets import *
 from .loss import *
-from .download_docs_prerequisites import *
diff --git a/policyengine_uk_data/utils/download_docs_prerequisites.py b/policyengine_uk_data/utils/download_docs_prerequisites.py
deleted file mode 100644
index dd015ec..0000000
--- a/policyengine_uk_data/utils/download_docs_prerequisites.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from policyengine_uk_data.utils.github import download
-from policyengine_uk_data.storage import STORAGE_FOLDER
-
-PREREQUISITES = [
-    {
-        "repo": "ukda",
-        "file_name": "frs_2022_23.h5",
-    },
-    {
-        "repo": "ukda",
-        "file_name": "enhanced_frs_2022_23.h5",
-    },
-]
-
-
-def download_data():
-    for prerequisite in PREREQUISITES:
-        if not (STORAGE_FOLDER / prerequisite["file_name"]).exists():
-            download(
-                "PolicyEngine",
-                prerequisite["repo"],
-                "release",
-                prerequisite["file_name"],
-                STORAGE_FOLDER / prerequisite["file_name"],
-            )

From d127581a8e9571035df5d31fcf7755adc6c13fdc Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:05:06 +0100
Subject: [PATCH 09/14] Try fix for changelog action

---
 .github/workflows/ci_cd.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/ci_cd.yaml b/.github/workflows/ci_cd.yaml
index 0e7afc5..aee95d4 100644
--- a/.github/workflows/ci_cd.yaml
+++ b/.github/workflows/ci_cd.yaml
@@ -110,10 +110,6 @@ jobs:
           python-version: "3.10"
       - name: Build changelog
         run: pip install "yaml-changelog>=0.1.7" && make changelog
-      - name: Preview changelog update
-        run: ".github/get-changelog-diff.sh"
-      - name: Check version number has been properly updated
-        run: ".github/is-version-number-acceptable.sh"
       - name: Update changelog
         uses: EndBug/add-and-commit@v9
         with:

From 745473cecb130f7b41c9a467e27e841048a866b2 Mon Sep 17 00:00:00 2001
From: "Github Actions[bot]" <nikhilwoodruff@users.noreply.github.com>
Date: Tue, 17 Sep 2024 18:05:28 +0000
Subject: [PATCH 10/14] Update PolicyEngine US data

---
 CHANGELOG.md         | 9 +++++++++
 changelog.yaml       | 5 +++++
 changelog_entry.yaml | 4 ----
 pyproject.toml       | 2 +-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 309434f..9d89251 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,9 +5,18 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.1.0] - 2024-09-17 18:05:27
+
+### Changed
+
+- Lightened dependency list.
+
 ## [1.0.0] - 2024-09-09 17:29:10
 
 ### Added
 
 - Initialized changelogging
 
+
+
+[1.1.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.0.0...1.1.0
diff --git a/changelog.yaml b/changelog.yaml
index 8944a4b..317f2a1 100644
--- a/changelog.yaml
+++ b/changelog.yaml
@@ -3,3 +3,8 @@
     - Initialized changelogging
   date: 2024-09-09 17:29:10
   version: 1.0.0
+- bump: minor
+  changes:
+    changed:
+    - Lightened dependency list.
+  date: 2024-09-17 18:05:27
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index f3b708c..e69de29 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +0,0 @@
-- bump: minor
-  changes:
-    changed:
-    - Lightened dependency list.
diff --git a/pyproject.toml b/pyproject.toml
index cd227f4..e2ec2ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "policyengine_uk_data"
-version = "1.0.0"
+version = "1.1.0"
 description = "A package to create representative microdata for the UK."
 readme = "README.md"
 authors = [

From 433757e135de8a0e83eaa85754d61dfc9446fb3d Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:11:16 +0100
Subject: [PATCH 11/14] Use correct folder names

---
 .gitignore                                                     | 1 +
 policyengine_uk_data/storage/download_private_prerequisites.py | 2 ++
 policyengine_uk_data/utils/imputations/consumption.py          | 2 +-
 policyengine_uk_data/utils/imputations/income.py               | 2 +-
 policyengine_uk_data/utils/imputations/vat.py                  | 2 +-
 policyengine_uk_data/utils/imputations/wealth.py               | 2 +-
 6 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 61b49b8..f07f442 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 **/*.csv
 **/*.zip
 **/*.pkl
+**/*.tab
 !uprating_factors.csv
 !uprating_growth_factors.csv
 !incomes.csv
diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py
index b5f8e34..f2813a5 100644
--- a/policyengine_uk_data/storage/download_private_prerequisites.py
+++ b/policyengine_uk_data/storage/download_private_prerequisites.py
@@ -18,6 +18,8 @@ def extract_zipped_folder(folder):
 ]
 
 for file in FILES:
+    if (FOLDER / file).exists():
+        continue
     download(
         "PolicyEngine",
         "ukda",
diff --git a/policyengine_uk_data/utils/imputations/consumption.py b/policyengine_uk_data/utils/imputations/consumption.py
index 6de810f..2270f72 100644
--- a/policyengine_uk_data/utils/imputations/consumption.py
+++ b/policyengine_uk_data/utils/imputations/consumption.py
@@ -5,7 +5,7 @@
 import yaml
 from policyengine_uk_data.storage import STORAGE_FOLDER
 
-LCFS_TAB_FOLDER = Path("/Users/nikhilwoodruff/Downloads/UKDA-9123-tab/tab")
+LCFS_TAB_FOLDER = STORAGE_FOLDER / "lcfs_2021_22"
 
 REGIONS = {
     1: "NORTH_EAST",
diff --git a/policyengine_uk_data/utils/imputations/income.py b/policyengine_uk_data/utils/imputations/income.py
index 7cc2558..95d3947 100644
--- a/policyengine_uk_data/utils/imputations/income.py
+++ b/policyengine_uk_data/utils/imputations/income.py
@@ -4,7 +4,7 @@
 import numpy as np
 from policyengine_uk_data.storage import STORAGE_FOLDER
 
-SPI_TAB_FOLDER = Path("/Users/nikhilwoodruff/Downloads/UKDA-9121-tab/tab")
+SPI_TAB_FOLDER = STORAGE_FOLDER / "spi_2020_21"
 SPI_RENAMES = dict(
     private_pension_income="PENSION",
     self_employment_income="PROFITS",
diff --git a/policyengine_uk_data/utils/imputations/vat.py b/policyengine_uk_data/utils/imputations/vat.py
index 868f441..9519212 100644
--- a/policyengine_uk_data/utils/imputations/vat.py
+++ b/policyengine_uk_data/utils/imputations/vat.py
@@ -4,7 +4,7 @@
 import numpy as np
 from policyengine_uk_data.storage import STORAGE_FOLDER
 
-ETB_TAB_FOLDER = Path("/Users/nikhilwoodruff/Downloads/UKDA-8856-tab/tab")
+ETB_TAB_FOLDER = STORAGE_FOLDER / "etb_1977_21"
 
 CONSUMPTION_PCT_REDUCED_RATE = 0.03  # From OBR's VAT page
 CURRENT_VAT_RATE = 0.2
diff --git a/policyengine_uk_data/utils/imputations/wealth.py b/policyengine_uk_data/utils/imputations/wealth.py
index 686a9b3..2513f5d 100644
--- a/policyengine_uk_data/utils/imputations/wealth.py
+++ b/policyengine_uk_data/utils/imputations/wealth.py
@@ -5,7 +5,7 @@
 import yaml
 from policyengine_uk_data.storage import STORAGE_FOLDER
 
-WAS_TAB_FOLDER = Path("/Users/nikhilwoodruff/Downloads/UKDA-7215-tab/tab")
+WAS_TAB_FOLDER = STORAGE_FOLDER / "was_2006_20"
 
 REGIONS = {
     1: "NORTH_EAST",

From d66eae0ce0e3128e5a569e808d96f738f24ffeca Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:16:37 +0100
Subject: [PATCH 12/14] Remove check version action

---
 .github/workflows/ci_cd.yaml                  | 25 -------------------
 .../storage/download_private_prerequisites.py |  2 ++
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/ci_cd.yaml b/.github/workflows/ci_cd.yaml
index aee95d4..0fc23c4 100644
--- a/.github/workflows/ci_cd.yaml
+++ b/.github/workflows/ci_cd.yaml
@@ -93,31 +93,6 @@ jobs:
         run: pytest
       - name: Test documentation builds
         run: make documentation
-  check-version:
-    name: Check version
-    if: github.event_name == 'pull_request'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0 # Fetch all history for all tags and branches
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
-          ref: ${{ github.event.pull_request.head.ref }}
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-      - name: Build changelog
-        run: pip install "yaml-changelog>=0.1.7" && make changelog
-      - name: Update changelog
-        uses: EndBug/add-and-commit@v9
-        with:
-          add: "."
-          committer_name: Github Actions[bot]
-          author_name: Github Actions[bot]
-          message: Update PolicyEngine US data
-          github_token: ${{ secrets.POLICYENGINE_GITHUB }}
 
   docker:
     name: Docker
diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py
index f2813a5..4234142 100644
--- a/policyengine_uk_data/storage/download_private_prerequisites.py
+++ b/policyengine_uk_data/storage/download_private_prerequisites.py
@@ -2,11 +2,13 @@
 from pathlib import Path
 import zipfile
 
+
 def extract_zipped_folder(folder):
     folder = Path(folder)
     with zipfile.ZipFile(folder, "r") as zip_ref:
         zip_ref.extractall(folder.parent)
 
+
 FOLDER = Path(__file__).parent
 
 FILES = [

From ada4e73169d0bcda844b24b6929979bfb7c131a8 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:17:51 +0100
Subject: [PATCH 13/14] Add to CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 4 ++++
 pyproject.toml          | 1 +
 2 files changed, 5 insertions(+)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 5b5d24e..4604959 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1,3 +1,7 @@
 ## Updating data
 
 If your changes present a non-bugfix change to one or more datasets which are cloud-hosted (FRS and EFRS), then please change both the filename and URL (in both the class definition file and in `storage/upload_completed_datasets.py`). This enables us to store historical versions of datasets separately and reproducibly.
+
+## Updating the versioning
+
+Please add to `changelog.yaml` and then run `make changelog` before committing the results ONCE in this PR.
diff --git a/pyproject.toml b/pyproject.toml
index e2ec2ae..ca7ab03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ dev = [
     "tables",
     "furo",
     "jupyter-book",
+    "yaml-changelog>=0.1.7",
 ]
 
 [tool.setuptools]

From 930653fabd256915ed9376f90abaa392d869ad06 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 17 Sep 2024 19:18:31 +0100
Subject: [PATCH 14/14] Remove bad tqdm

---
 policyengine_uk_data/utils/imputations/capital_gains.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py
index 3a9b571..c4e4551 100644
--- a/policyengine_uk_data/utils/imputations/capital_gains.py
+++ b/policyengine_uk_data/utils/imputations/capital_gains.py
@@ -92,7 +92,6 @@ def loss(blend_factor):
         loss_value = loss(blend_factor)
         loss_value.backward()
         optimiser.step()
-        progress.set_description(f"Loss: {loss_value.item()}")
         if loss_value.item() < 1e-3:
             break