Merge branch 'main' into 2023-09-26-session

codeforboston · Oct 11, 2023 · 6b4784b · 6b4784b
2 parents bb4a9a9 + 8b54855
commit 6b4784b
Show file tree

Hide file tree

Showing 9 changed files with 15,152 additions and 69 deletions.
diff --git a/.github/workflows/test-rules-engine.yml b/.github/workflows/test-rules-engine.yml
@@ -24,8 +24,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Check style
         run: |
-          isort --check .
-          black --check .
+          make lint
   mypy:
     runs-on: ubuntu-latest
     strategy:
@@ -40,7 +39,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Check typing
         run: |
-          mypy .
+          make mypy
   pytest:
     runs-on: ubuntu-latest
     strategy:
@@ -55,8 +54,8 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Run tests
         run: |
-          pytest
-  pydocstyle:
+          make test
+  build:
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -68,6 +67,6 @@ jobs:
         uses: "./.github/actions/setup-rules-engine"
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Run tests
+      - name: Build wheel
         run: |
-          pydocstyle .
+          make build
diff --git a/design_temp/clean.py b/design_temp/clean.py
@@ -0,0 +1,154 @@
+import csv
+import re
+from dataclasses import dataclass
+
+
+# This script generates design_temp_by_county.csv
+# from the raw text copied from the ENERGY STAR pdf located at:
+# https://www.energystar.gov/ia/partners/bldrs_lenders_raters/downloads/County%20Level%20Design%20Temperature%20Reference%20Guide%20-%202015-06-24.pdf
+# To copy the text from the PDF you can open it in chrome and select the 'print' option
+# to save it as a PDF that will allow copying
+
+
+# Number of counties (according to PDF) for each of the 50 states and for DC
+# Same order as PDF
+REGIONS = {
+    "Alaska": 28,
+    "Alabama": 67,
+    "Arkansas": 75,
+    "Arizona": 15,
+    "California": 58,
+    "Colorado": 64,
+    "Connecticut": 8,
+    "District of Columbia": 1,
+    "Delaware": 3,
+    "Florida": 67,
+    "Georgia": 159,
+    "Hawaii": 5,
+    "Iowa": 99,
+    "Idaho": 44,
+    "Illinois": 102,
+    "Indiana": 92,
+    "Kansas": 105,
+    "Kentucky": 120,
+    "Louisiana": 64,
+    "Massachusetts": 14,
+    "Maryland": 24,
+    "Maine": 16,
+    "Michigan": 83,
+    "Minnesota": 87,
+    "Missouri": 115,
+    "Mississippi": 82,
+    "Montana": 56,
+    "North Carolina": 100,
+    "North Dakota": 53,
+    "Nebraska": 93,
+    "New Hampshire": 10,
+    "New Jersey": 21,
+    "New Mexico": 33,
+    "Nevada": 17,
+    "New York": 62,
+    "Ohio": 88,
+    "Oklahoma": 77,
+    "Oregon": 36,
+    "Pennsylvania": 67,
+    "Rhode Island": 5,
+    "South Carolina": 46,
+    "South Dakota": 66,
+    "Tennessee": 95,
+    "Texas": 254,
+    "Utah": 29,
+    "Virginia": 134,
+    "Vermont": 14,
+    "Washington": 39,
+    "Wisconsin": 72,
+    "West Virginia": 55,
+    "Wyoming": 23,
+}
+
+
+def remove_junk(content):
+    # remove everything before the table
+    table_title = (
+        "Exhibit 1: Cooling and Heating Design Temperatures by State and County"
+    )
+    content = content.split(table_title)[1]
+    # remove the title that appears at the top of each page
+    pattern = r"[ ]+ENERGY STAR Certified Homes\nCounty-Level Design Temperature Reference Guide"
+    content, subs = re.subn(pattern, "", content)
+    if subs != 115:
+        raise Exception("Wrong number of substitutions: {}".format(subs))
+    # remove the column labels from each page (they fit one of two patterns)
+    pattern = (
+        r"State[\s]+County[\s]+1% Cooling Temperature \(°F\)[\s]+"
+        r"99% Heating Temperature \(°F\)[\s]+HDD/CDD Ratio[\s]+"
+        r"Weather Station Selected for Cooling Temperature[\s]+"
+        r"Reference[\s]+Weather Station Selected for Heating Temperature[\s]+Reference"
+    )
+    content, subs = re.subn(pattern, "", content)
+    if subs != 47:
+        raise Exception("Wrong number of substitutions: {}".format(subs))
+    pattern = (
+        r"1% Cooling State County Temperature\n\(°F\)\n"
+        r"99% Heating HDD/CDD Weather Station Selected for "
+        r"Weather Station Selected for\nTemperature Ratio Cooling "
+        r"Temperature Reference Heating Temperature Reference \(°F\)"
+    )
+    content, subs = re.subn(pattern, "", content)
+    if subs != 69:
+        raise Exception("Wrong number of substitutions: {}".format(subs))
+    # remove the page numbers that appear at the bottom of each page
+    pattern = r"[ ]+Page [0-9]+ of 116"
+    content, subs = re.subn(pattern, "", content)
+    if subs != 116:
+        raise Exception("Wrong number of substitutions: {}".format(subs))
+    return content
+
+
+@dataclass
+class DataRow:
+    state: str
+    county: str
+    design_temp: int
+
+
+def parse(content: str):
+    # find where each region starts
+    # (it starts at the first instance of the region name appearing twice)
+    matches = []
+    for r in REGIONS:
+        match = re.search(r + r"[\s]*" + r, content)
+        if match is None:
+            raise Exception("Could not find data for region: {}".format(r))
+        matches.append(match)
+    # get table rows for each region
+    rows = []
+    for i, r in enumerate(REGIONS):
+        # get the substring of content for a particular region
+        end_index = matches[i + 1].start() if i + 1 < len(matches) else len(content)
+        r_content = r + content[matches[i].end() : end_index]
+        # match region name (state name), county, 1% cooling, 99% heating, and HDD/CDD
+        # some weather station names are jumbled, so avoid them
+        pattern = r"({})[\s]+([A-Za-z \-\.\']+?)[\s]+[0-9]+[\s]+([\-0-9]+)[\s]+[0-9\.]+"
+        pattern = pattern.format(r)
+        row_matches = re.findall(pattern, r_content)
+        if len(row_matches) != REGIONS[r]:
+            s = "found {} rows for {}, expected {}"
+            raise Exception(s.format(len(row_matches), r, REGIONS[r]))
+        rows += [DataRow(*m) for m in row_matches]
+    if len(rows) != sum(REGIONS.values()):
+        s = "found {} rows, expected {}"
+        raise Exception(s.format(len(rows), sum(REGIONS.values())))
+    return rows
+
+
+if __name__ == "__main__":
+    with open("pdf_rawcopy.txt") as f:
+        content = f.read()
+    content = remove_junk(content)
+    rows = parse(content)
+    with open("design_temp_by_county.csv", "w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=vars(rows[0]).keys())
+        writer.writeheader()
+        for r in rows:
+            writer.writerow(vars(r))