From ab128ed1aa0bb76402782dd5aef5402d2678a884 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:17:20 +0100
Subject: [PATCH 01/22] remove individual code variables from main study
 definition we'll extract these in a separate study definition because we only
 need to extract them once if we use "number_of_matches_in_period" for the
 entire study period

---
 .../templates/v2/analysis/event_variables.py  | 23 -------------------
 .../templates/v2/analysis/measures.py         |  2 +-
 .../templates/v2/analysis/study_definition.py | 13 +----------
 3 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/event_variables.py b/interactive_templates/templates/v2/analysis/event_variables.py
index 6511c00b..9db45d35 100644
--- a/interactive_templates/templates/v2/analysis/event_variables.py
+++ b/interactive_templates/templates/v2/analysis/event_variables.py
@@ -1,5 +1,4 @@
 from cohortextractor import patients
-from report_utils import generate_expectations_codes
 
 
 def clinical_event(codelist, date_range, event_name, ever=False):
@@ -27,17 +26,6 @@ def clinical_event(codelist, date_range, event_name, ever=False):
                 return_expectations={"incidence": 0.5},
             )
         ),
-        f"{event_name}_code": (
-            patients.with_these_clinical_events(
-                codelist=codelist,
-                **date_kwargs,
-                returning="code",
-                return_expectations={
-                    "rate": "universal",
-                    "category": {"ratios": generate_expectations_codes(codelist)},
-                },
-            )
-        ),
         f"{event_name}_date": (
             patients.with_these_clinical_events(
                 codelist=codelist,
@@ -82,17 +70,6 @@ def medication_event(codelist, date_range, event_name, ever=False):
                 return_expectations={"incidence": 0.5},
             )
         ),
-        f"{event_name}_code": (
-            patients.with_these_medications(
-                codelist=codelist,
-                **date_kwargs,
-                returning="code",
-                return_expectations={
-                    "rate": "universal",
-                    "category": {"ratios": generate_expectations_codes(codelist)},
-                },
-            )
-        ),
         f"{event_name}_date": (
             patients.with_these_medications(
                 codelist=codelist,
diff --git a/interactive_templates/templates/v2/analysis/measures.py b/interactive_templates/templates/v2/analysis/measures.py
index a2449974..c0d1b75e 100644
--- a/interactive_templates/templates/v2/analysis/measures.py
+++ b/interactive_templates/templates/v2/analysis/measures.py
@@ -164,7 +164,7 @@ def main():
     args = parse_args()
     breakdowns = args.breakdowns
 
-    breakdowns.extend(["practice", "event_1_code", "event_2_code"])
+    breakdowns.extend(["practice"])
 
     measure_df = pd.DataFrame(
         columns=["date", "event_measure", "population", "group", "group_value"]
diff --git a/interactive_templates/templates/v2/analysis/study_definition.py b/interactive_templates/templates/v2/analysis/study_definition.py
index 47473621..879d4d31 100644
--- a/interactive_templates/templates/v2/analysis/study_definition.py
+++ b/interactive_templates/templates/v2/analysis/study_definition.py
@@ -110,6 +110,7 @@
     ),
 )
 
+
 measures = [
     Measure(
         id="event_rate",
@@ -117,18 +118,6 @@
         denominator="population",
         group_by=["practice"],
     ),
-    Measure(
-        id="event_code_1_rate",
-        numerator="event_1",
-        denominator="population",
-        group_by=["event_1_code"],
-    ),
-    Measure(
-        id="event_code_2_rate",
-        numerator="event_2",
-        denominator="population",
-        group_by=["event_2_code"],
-    ),
 ]
 
 if breakdowns:

From bfd2ae6f4a0d02483ea0783758e3edcb0ea3e050 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:20:03 +0100
Subject: [PATCH 02/22] drop `generate_expectations_codes` This isn't used
 anymore as we're defining the code variables using
 "number_of_matches_in_period"

---
 .../templates/v2/analysis/report_utils.py              | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/report_utils.py b/interactive_templates/templates/v2/analysis/report_utils.py
index 519c62b2..f79ee07a 100644
--- a/interactive_templates/templates/v2/analysis/report_utils.py
+++ b/interactive_templates/templates/v2/analysis/report_utils.py
@@ -45,16 +45,6 @@ def relabel_sex(df):
     return df
 
 
-def generate_expectations_codes(codelist, incidence=0.5):
-    if len(codelist) >= 10:
-        expectations = {str(x): (1 - incidence) / 10 for x in codelist[0:10]}
-    else:
-        expectations = {str(x): (1 - incidence) / len(codelist) for x in codelist}
-
-    expectations[None] = incidence
-    return expectations
-
-
 def save_to_json(d, filename: str):
     """Saves dictionary to json file"""
     with open(filename, "w") as f:

From dcde620affc37fed52428a75c968eb16d1d39bd2 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:20:35 +0100
Subject: [PATCH 03/22] Generate variables with count of matches for each code
 in each codelist

---
 .../templates/v2/analysis/code_variables.py   | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 interactive_templates/templates/v2/analysis/code_variables.py

diff --git a/interactive_templates/templates/v2/analysis/code_variables.py b/interactive_templates/templates/v2/analysis/code_variables.py
new file mode 100644
index 00000000..3a3cc03a
--- /dev/null
+++ b/interactive_templates/templates/v2/analysis/code_variables.py
@@ -0,0 +1,43 @@
+from cohortextractor import codelist, patients
+
+
+def generate_code_variables(
+    code_list_1, codelist_1_type, code_list_2, codelist_2_type, start_date, end_date
+):
+    def make_variable(code, codelist_type, start_date, end_date):
+        if codelist_type == "event":
+            return {
+                f"count_{code}": (
+                    patients.with_these_clinical_events(
+                        codelist([code], system="snomed"),
+                        between=[start_date, end_date],
+                        returning="number_of_matches_in_period",
+                        return_expectations={
+                            "incidence": 0.1,
+                            "int": {"distribution": "normal", "mean": 3, "stddev": 1},
+                        },
+                    )
+                )
+            }
+        elif codelist_type == "medication":
+            return {
+                f"count_{code}": (
+                    patients.with_these_medications(
+                        codelist([code], system="snomed"),
+                        between=[start_date, end_date],
+                        returning="number_of_matches_in_period",
+                        return_expectations={
+                            "incidence": 0.1,
+                            "int": {"distribution": "normal", "mean": 3, "stddev": 1},
+                        },
+                    )
+                )
+            }
+
+    variables = {}
+    for code in code_list_1:
+        variables.update(make_variable(code, codelist_1_type, start_date, end_date))
+
+    for code in code_list_2:
+        variables.update(make_variable(code, codelist_2_type, start_date, end_date))
+    return variables

From cc3e81226af15ad8db2b34f578a72adffd5b03e3 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:30:57 +0100
Subject: [PATCH 04/22] use the new variables in the ethnicity study definition
 This extracts variables once. In doing so, name this study definition more
 generally

---
 ...n_ethnicity.py => study_definition_end.py} | 20 +++++++++++++++++--
 .../templates/v2/project.yaml.tmpl            | 10 +++++-----
 2 files changed, 23 insertions(+), 7 deletions(-)
 rename interactive_templates/templates/v2/analysis/{study_definition_ethnicity.py => study_definition_end.py} (78%)

diff --git a/interactive_templates/templates/v2/analysis/study_definition_ethnicity.py b/interactive_templates/templates/v2/analysis/study_definition_end.py
similarity index 78%
rename from interactive_templates/templates/v2/analysis/study_definition_ethnicity.py
rename to interactive_templates/templates/v2/analysis/study_definition_end.py
index a3eb9b4a..5d5dd872 100644
--- a/interactive_templates/templates/v2/analysis/study_definition_ethnicity.py
+++ b/interactive_templates/templates/v2/analysis/study_definition_end.py
@@ -1,7 +1,7 @@
+from code_variables import generate_code_variables
 from cohortextractor import StudyDefinition, codelist_from_csv, patients
 from config import CONFIG
 
-
 ethnicity_codes = codelist_from_csv(
     filename="codelists/opensafely-ethnicity-snomed-0removed.csv",
     column="snomedcode",
@@ -9,9 +9,22 @@
     category_column="Grouping_6",
 )
 
-
+codelist_1_path = CONFIG["codelist_1"]["path"]
+codelist_1_type = CONFIG["codelist_1"]["type"]
+codelist_2_path = CONFIG["codelist_2"]["path"]
+codelist_2_type = CONFIG["codelist_2"]["type"]
+start_date = CONFIG["start_date"]
 end_date = CONFIG["end_date"]
 
+codelist_1 = codelist_from_csv(codelist_1_path, system="snomed", column="code")
+
+codelist_2 = codelist_from_csv(
+    codelist_2_path,
+    system="snomed",
+    column="code",
+)
+
+
 study = StudyDefinition(
     index_date=end_date,
     default_expectations={
@@ -64,4 +77,7 @@
             },
         ),
     ),
+    **generate_code_variables(
+        codelist_1, codelist_1_type, codelist_2, codelist_2_type, start_date, end_date
+    ),
 )
diff --git a/interactive_templates/templates/v2/project.yaml.tmpl b/interactive_templates/templates/v2/project.yaml.tmpl
index b334b947..f195877a 100644
--- a/interactive_templates/templates/v2/project.yaml.tmpl
+++ b/interactive_templates/templates/v2/project.yaml.tmpl
@@ -5,13 +5,13 @@ expectations:
 
 actions:
 
-  generate_study_population_ethnicity_{{ id }}:
+  generate_study_population_end_{{ id }}:
     run: cohortextractor:latest generate_cohort
-      --study-definition study_definition_ethnicity
+      --study-definition study_definition_end
       --output-dir output/{{ id }} --output-format=feather
     outputs:
       highly_sensitive:
-        cohort: output/{{ id }}/input_ethnicity.feather
+        cohort: output/{{ id }}/input_end.feather
 
   generate_study_population_weekly_{{ id }}:
     run: cohortextractor:latest generate_cohort
@@ -40,9 +40,9 @@ actions:
     run: >
       cohort-joiner:v0.0.38
         --lhs output/{{ id }}/input_20*.feather
-        --rhs output/{{ id }}/input_ethnicity.feather
+        --rhs output/{{ id }}/input_end.feather
         --output-dir output/{{ id }}/joined
-    needs: [generate_study_population_{{ id }}, generate_study_population_ethnicity_{{ id }}]
+    needs: [generate_study_population_{{ id }}, generate_study_population_end_{{ id }}]
     outputs:
       highly_sensitive:
         cohort: output/{{ id }}/joined/input_20*.feather

From e058dc7baa5c49645616a8b8f03cd695e25d8b84 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:31:38 +0100
Subject: [PATCH 05/22] update top 5 to work with new variables

---
 .../templates/v2/analysis/top_5.py            | 86 ++++++++-----------
 .../templates/v2/project.yaml.tmpl            |  2 +-
 2 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index 80e580ce..3d976457 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -84,8 +84,7 @@ def create_top_5_code_table(
     """
 
     event_counts = group_low_values(df, "num", code_column, low_count_threshold)
-
-    # round
+    event_counts = event_counts.copy()
 
     event_counts["num"] = event_counts["num"].apply(
         lambda x: round_values(x, rounding_base)
@@ -93,6 +92,7 @@ def create_top_5_code_table(
 
     # calculate % makeup of each code
     total_events = event_counts["num"].sum()
+
     event_counts["Proportion of codes (%)"] = round(
         (event_counts["num"] / total_events) * 100, 2
     )
@@ -105,10 +105,8 @@ def create_top_5_code_table(
 
     event_counts = event_counts.set_index(code_column).join(code_df).reset_index()
 
-    # set description of "Other column" to something readable
     event_counts.loc[event_counts[code_column] == "Other", "Description"] = "-"
 
-    # Rename the code column to something consistent
     event_counts.rename(columns={code_column: "Code"}, inplace=True)
 
     # sort by proportion of codes
@@ -118,7 +116,6 @@ def create_top_5_code_table(
 
     event_counts_with_counts = event_counts.copy()
 
-    # drop events column
     event_counts = event_counts.loc[
         :, ["Code", "Description", "Proportion of codes (%)"]
     ]
@@ -147,56 +144,43 @@ def main():
     args = parse_args()
     codelist_1_path = args.codelist_1_path
     codelist_2_path = args.codelist_2_path
-    measure_df = pd.read_csv(args.output_dir / "measure_all.csv")
-
-    code_df = measure_df.loc[measure_df["group"] == "event_1_code", :]
-    codelist = pd.read_csv(codelist_1_path, dtype={"code": str})
-
-    events_per_code = (
-        code_df.groupby("group_value")[["event_measure"]].sum().reset_index()
-    )
-    events_per_code.columns = ["code", "num"]
-
-    top_5_code_table, top_5_code_table_with_counts = create_top_5_code_table(
-        df=events_per_code,
-        code_df=codelist,
-        code_column="code",
-        term_column="term",
-        low_count_threshold=7,
-        rounding_base=7,
-    )
-    top_5_code_table.to_csv(args.output_dir / "top_5_code_table_1.csv", index=False)
 
     Path(args.output_dir / "for_checking").mkdir(parents=True, exist_ok=True)
 
-    top_5_code_table_with_counts.to_csv(
-        args.output_dir / "for_checking/top_5_code_table_with_counts_1.csv",
-        index=False,
-    )
+    input_df = pd.read_feather(args.output_dir / "input_end.feather")
 
-    code_df_2 = measure_df.loc[measure_df["group"] == "event_2_code", :]
-
-    # TODO: support vpids?
-    codelist_2 = pd.read_csv(codelist_2_path, dtype={"code": str})
-    events_per_code = (
-        code_df_2.groupby("group_value")[["event_measure"]].sum().reset_index()
-    )
-    events_per_code.columns = ["code", "num"]
-
-    top_5_code_table, top_5_code_table_with_counts = create_top_5_code_table(
-        df=events_per_code,
-        code_df=codelist_2,
-        code_column="code",
-        term_column="term",
-        low_count_threshold=7,
-        rounding_base=7,
-    )
-
-    top_5_code_table.to_csv(args.output_dir / "top_5_code_table_2.csv", index=False)
-    top_5_code_table_with_counts.to_csv(
-        args.output_dir / "for_checking" / "top_5_code_table_with_counts_2.csv",
-        index=False,
-    )
+    use_cols = [col for col in input_df.columns if col.startswith("count")] + [
+        "patient_id"
+    ]
+    input_df = input_df.loc[:, use_cols]
+    code_counts = input_df.sum().reset_index()
+    code_counts.columns = ["code", "num"]
+
+    code_counts["code"] = code_counts["code"].str.replace("count_", "")
+
+    for codelist_path in [codelist_1_path, codelist_2_path]:
+        codelist = pd.read_csv(codelist_path, dtype={"code": str})
+        codes = codelist["code"].to_list()
+        code_counts_subset = code_counts.loc[code_counts["code"].isin(codes), :]
+
+        top_5_code_table, top_5_code_table_with_counts = create_top_5_code_table(
+            df=code_counts_subset,
+            code_df=codelist,
+            code_column="code",
+            term_column="term",
+            low_count_threshold=7,
+            rounding_base=7,
+        )
+        codelist_number = codelist_path.split("/")[-1].split(".")[0].split("_")[-1]
+        top_5_code_table.to_csv(
+            args.output_dir / f"top_5_code_table_{codelist_number}.csv", index=False
+        )
+
+        top_5_code_table_with_counts.to_csv(
+            args.output_dir
+            / f"for_checking/top_5_code_table_with_counts_{codelist_number}.csv",
+            index=False,
+        )
 
 
 if __name__ == "__main__":
diff --git a/interactive_templates/templates/v2/project.yaml.tmpl b/interactive_templates/templates/v2/project.yaml.tmpl
index f195877a..8dde9f2b 100644
--- a/interactive_templates/templates/v2/project.yaml.tmpl
+++ b/interactive_templates/templates/v2/project.yaml.tmpl
@@ -68,7 +68,7 @@ actions:
       --codelist-1-path="{{ codelist_1.path }}"
       --codelist-2-path="{{ codelist_2.path }}"
       --output-dir="output/{{ id }}"
-    needs: [generate_measures_{{ id }}]
+    needs: [generate_study_population_end_{{ id }}]
     outputs:
       moderately_sensitive:
         table_1: output/{{ id }}/top_5_code_table_1.csv

From 546252d0cf72ef8bbbfd42deb932a602c11fe3c7 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:38:55 +0100
Subject: [PATCH 06/22] update description of top 5 in report

---
 .../templates/v2/analysis/report_template.html               | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/report_template.html b/interactive_templates/templates/v2/analysis/report_template.html
index 4442aba9..2abb568a 100644
--- a/interactive_templates/templates/v2/analysis/report_template.html
+++ b/interactive_templates/templates/v2/analysis/report_template.html
@@ -208,9 +208,8 @@ <h2>Most common codes</h2>
                     The tables below show the most common codes recorded within both the {{ codelist_1_name }} and the
                     {{ codelist_2_name }} codelists. For each code within each codelist, the number of times it was recorded
                     in the period between {{ start_date }} and {{ end_date }} is calculated. For each code, the percentage
-                    makeup of the total number of events is then calculated. Note that the code recorded is the latest code
-                    recorded for a given patient in the given period. Where a patient has had multiple codes recorded from
-                    the specified codelists, only the latest code will contribute to the counts in this table.
+                    makeup of the total number of events is then calculated. Note that this includes instances of these codes
+                    that may not contribute to the measure of interest.
                 </p>
                 <table>
                     <caption><strong>Table 2</strong>. The top 5 most common codes recorded for codelist 1 ({{ codelist_1_name }}).</caption>

From fc5da047f04570ab23b3bd3b3a526660a1d2b98f Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 10:55:59 +0100
Subject: [PATCH 07/22] handle v high or v low code proportions See
 opensafely-core/interactive-templates/issues/149

---
 .../templates/v2/analysis/top_5.py                | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index 3d976457..a4b99ca3 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -109,6 +109,19 @@ def create_top_5_code_table(
 
     event_counts.rename(columns={code_column: "Code"}, inplace=True)
 
+    # because of rounding, some codes can appear as either 0.00% or 100.00%. If they are
+    # not truly 0.00% or 100.00%, we want to display them as <0.001% or >99.99% respectively
+    event_counts.loc[
+        (event_counts["Proportion of codes (%)"] == 0) & (event_counts["num"] > 0),
+        "Proportion of codes (%)",
+    ] = "<0.001"
+
+    event_counts.loc[
+        (event_counts["Proportion of codes (%)"] == 100)
+        & (event_counts["num"] < total_events),
+        "Proportion of codes (%)",
+    ] = ">99.99"
+
     # sort by proportion of codes
     event_counts = event_counts.sort_values(
         ascending=False, by="Proportion of codes (%)"
@@ -120,7 +133,7 @@ def create_top_5_code_table(
         :, ["Code", "Description", "Proportion of codes (%)"]
     ]
     # return top n rows
-    return event_counts.head(5), event_counts_with_counts
+    return event_counts.head(nrows), event_counts_with_counts
 
 
 def parse_args():

From 84e40fbcfb873b25ef1df2b85c16da0e34311341 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 11:07:01 +0100
Subject: [PATCH 08/22] refactor `create_top_5_code_table` to make it easier to
 test

---
 .../templates/v2/analysis/top_5.py            | 86 +++++++++----------
 1 file changed, 41 insertions(+), 45 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index a4b99ca3..df2dc006 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -66,73 +66,69 @@ def round_values(x, base=5):
     return rounded
 
 
-def create_top_5_code_table(
-    df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows=5
-):
-    """Creates a table of the top 5 codes recorded with the number of events and % makeup of each code.
-    Args:
-        df: A measure table.
-        code_df: A codelist table.
-        code_column: The name of the code column in the codelist table.
-        term_column: The name of the term column in the codelist table.
-        measure: The measure ID.
-        low_count_threshold: Value to use as threshold for disclosure control.
-        rounding_base: Base to round to.
-        nrows: The number of rows to display.
-    Returns:
-        A table of the top `nrows` codes.
-    """
-
-    event_counts = group_low_values(df, "num", code_column, low_count_threshold)
-    event_counts = event_counts.copy()
-
+def apply_rounding(event_counts, rounding_base):
     event_counts["num"] = event_counts["num"].apply(
         lambda x: round_values(x, rounding_base)
     )
+    return event_counts
 
-    # calculate % makeup of each code
-    total_events = event_counts["num"].sum()
 
+def calculate_proportion(event_counts):
+    total_events = event_counts["num"].sum()
     event_counts["Proportion of codes (%)"] = round(
         (event_counts["num"] / total_events) * 100, 2
     )
+    return event_counts
 
-    # Gets the human-friendly description of the code for the given row
-    # e.g. "Systolic blood pressure".
+
+def add_description(event_counts, code_df, code_column, term_column):
     code_df = code_df.set_index(code_column).rename(
         columns={term_column: "Description"}
     )
-
     event_counts = event_counts.set_index(code_column).join(code_df).reset_index()
-
     event_counts.loc[event_counts[code_column] == "Other", "Description"] = "-"
+    return event_counts
+
 
+def handle_edge_case_percentages(event_counts):
+    total_events = event_counts["num"].sum()
+
+    zero_condition = (event_counts["Proportion of codes (%)"] == 0) & (
+        event_counts["num"] > 0
+    )
+    hundred_condition = (event_counts["Proportion of codes (%)"] == 100) & (
+        event_counts["num"] < total_events
+    )
+
+    event_counts.loc[zero_condition, "Proportion of codes (%)"] = "<0.001"
+    event_counts.loc[hundred_condition, "Proportion of codes (%)"] = ">99.99"
+
+    return event_counts
+
+
+def create_top_5_code_table(
+    df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows=5
+):
+    """Creates a table of the top 5 codes recorded with the number of events and % makeup of each code."""
+
+    event_counts = group_low_values(df, "num", code_column, low_count_threshold)
+    event_counts = event_counts.copy()
+
+    event_counts = apply_rounding(event_counts, rounding_base)
+    event_counts = calculate_proportion(event_counts)
+    event_counts = add_description(event_counts, code_df, code_column, term_column)
     event_counts.rename(columns={code_column: "Code"}, inplace=True)
+    event_counts = handle_edge_case_percentages(event_counts)
 
-    # because of rounding, some codes can appear as either 0.00% or 100.00%. If they are
-    # not truly 0.00% or 100.00%, we want to display them as <0.001% or >99.99% respectively
-    event_counts.loc[
-        (event_counts["Proportion of codes (%)"] == 0) & (event_counts["num"] > 0),
-        "Proportion of codes (%)",
-    ] = "<0.001"
-
-    event_counts.loc[
-        (event_counts["Proportion of codes (%)"] == 100)
-        & (event_counts["num"] < total_events),
-        "Proportion of codes (%)",
-    ] = ">99.99"
-
-    # sort by proportion of codes
-    event_counts = event_counts.sort_values(
+    event_counts_sorted = event_counts.sort_values(
         ascending=False, by="Proportion of codes (%)"
     )
 
-    event_counts_with_counts = event_counts.copy()
-
-    event_counts = event_counts.loc[
+    event_counts_with_counts = event_counts_sorted.copy()
+    event_counts = event_counts_sorted.loc[
         :, ["Code", "Description", "Proportion of codes (%)"]
     ]
-    # return top n rows
+
     return event_counts.head(nrows), event_counts_with_counts
 
 

From 20de052e06dac26c30f56d67c6134137f732bb26 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:12:25 +0100
Subject: [PATCH 09/22] test `group_low_values`

---
 .../templates/v2/tests/test_top_5.py          | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 interactive_templates/templates/v2/tests/test_top_5.py

diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
new file mode 100644
index 00000000..9c3ce169
--- /dev/null
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -0,0 +1,54 @@
+from analysis.top_5 import (
+    add_description,
+    apply_rounding,
+    calculate_proportion,
+    create_top_5_code_table,
+    group_low_values,
+    handle_edge_case_percentages,
+    round_values,
+)
+from hypothesis import given
+from hypothesis import strategies as st
+from hypothesis.extra.pandas import column, data_frames
+
+
+codes_strategy = st.text(min_size=1, max_size=1).map(str)
+count_strategy = st.integers(min_value=0, max_value=100)
+
+df_strategy = data_frames(
+    [
+        column("code", elements=codes_strategy, unique=True),
+        column("num", elements=count_strategy),
+    ]
+)
+
+
+class TestGroupLowValues:
+    @given(df=df_strategy)
+    def test_values_above_threshold(self, df):
+        result = group_low_values(df, "num", "code", 5)
+        assert (
+            result["num"] > 5
+        ).all(), "Values below the threshold were not redacted."
+
+    @given(df=df_strategy)
+    def test_redacted_rows_grouped_into_other(self, df):
+        result = group_low_values(df, "num", "code", 5)
+        if (result["code"] == "Other").any():
+            assert (
+                result.loc[result["code"] == "Other", "num"].sum() >= 5
+            ), "'Other' row sum is below threshold."
+
+    @given(df=df_strategy)
+    def test_all_zero_values_are_suppressed(self, df):
+        if df["num"].sum() == 0:
+            result = group_low_values(df, "num", "code", 5)
+            assert result.empty, "Zero values were not suppressed."
+
+    @given(df=df_strategy)
+    def test_no_redaction_when_all_values_above_threshold(self, df):
+        if df["num"].all() > 5:
+            result = group_low_values(df, "num", "code", 5)
+            assert result.equals(
+                df
+            ), "Redaction happened when all values were above the threshold."

From 69fae9a972a693445889ed8928aa58ec7fd70579 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:17:05 +0100
Subject: [PATCH 10/22] test `round_values`

---
 .../templates/v2/tests/test_top_5.py          | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index 9c3ce169..8564eecf 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 from analysis.top_5 import (
     add_description,
     apply_rounding,
@@ -52,3 +54,28 @@ def test_no_redaction_when_all_values_above_threshold(self, df):
             assert result.equals(
                 df
             ), "Redaction happened when all values were above the threshold."
+
+
+class TestRoundValues:
+    @given(x=st.floats(allow_nan=True, allow_infinity=False), base=st.integers(1, 10))
+    def test_rounding_floats(self, x, base):
+        result = round_values(x, base)
+
+        if np.isnan(x):
+            assert np.isnan(result), f"Expected NaN but got {result} for input {x}"
+        else:
+            expected = int(base * round(x / base))
+            assert (
+                result == expected
+            ), f"Expected {expected} but got {result} for input {x}"
+
+    @given(x=st.integers(min_value=0, max_value=100_000_000), base=st.integers(1, 10))
+    def test_rounding_integers(self, x, base):
+        result = round_values(x, base)
+        expected = int(base * round(x / base))
+        assert result == expected, f"Expected {expected} but got {result} for input {x}"
+
+    @given(x=st.text())
+    def test_non_numeric_input(self, x):
+        result = round_values(x)
+        assert result == x, f"Expected {x} but got {result} for non-numeric input"

From 56736f06a559eae35a1168cc3399eedb5173c749 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:18:45 +0100
Subject: [PATCH 11/22] test `apply_rounding` and `calculate_proportion` As
 part of this handle 0 total events in top 5

---
 .../templates/v2/analysis/top_5.py            | 12 ++++++--
 .../templates/v2/tests/test_top_5.py          | 28 +++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index df2dc006..e38248ce 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -75,9 +75,15 @@ def apply_rounding(event_counts, rounding_base):
 
 def calculate_proportion(event_counts):
     total_events = event_counts["num"].sum()
-    event_counts["Proportion of codes (%)"] = round(
-        (event_counts["num"] / total_events) * 100, 2
-    )
+
+    # ensure total events is not 0
+    if total_events == 0:
+        event_counts["Proportion of codes (%)"] = np.nan
+
+    else:
+        event_counts["Proportion of codes (%)"] = round(
+            (event_counts["num"] / total_events) * 100, 2
+        )
     return event_counts
 
 
diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index 8564eecf..b92a8fef 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 
 from analysis.top_5 import (
     add_description,
@@ -79,3 +80,30 @@ def test_rounding_integers(self, x, base):
     def test_non_numeric_input(self, x):
         result = round_values(x)
         assert result == x, f"Expected {x} but got {result} for non-numeric input"
+
+
+@given(df=df_strategy, rounding_base=st.integers(1, 10))
+def test_apply_rounding(df, rounding_base):
+    result_df = apply_rounding(df.copy(), rounding_base)
+
+    # All numbers should be rounded to the nearest multiple of rounding_base
+    for num in result_df["num"]:
+        assert num % rounding_base == 0, f"{num} not rounded to nearest {rounding_base}"
+
+
+@given(df=df_strategy)
+def test_calculate_proportion(df):
+    result_df = calculate_proportion(df.copy())
+
+    total = result_df["num"].sum()
+
+    if total == 0:
+        assert all(
+            pd.isna(result_df["Proportion of codes (%)"])
+        ), "Proportion should be NaN when total is 0."
+    else:
+        for _, row in result_df.iterrows():
+            expected_proportion = round((row["num"] / total) * 100, 2)
+            assert (
+                row["Proportion of codes (%)"] == expected_proportion
+            ), f"Expected {expected_proportion} but got {row['Proportion of codes (%)']} for count {row['num']}"

From d9e78abbba43821def959970b1298bacc5959f42 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:20:59 +0100
Subject: [PATCH 12/22] test `add_description`

---
 .../templates/v2/tests/test_top_5.py          | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index b92a8fef..e8e09efa 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -25,6 +25,15 @@
     ]
 )
 
+description_strategy = st.text(min_size=1, max_size=20).map(str)
+
+code_df_strategy = data_frames(
+    [
+        column("code", elements=codes_strategy, unique=True),
+        column("term", elements=description_strategy),
+    ]
+)
+
 
 class TestGroupLowValues:
     @given(df=df_strategy)
@@ -107,3 +116,26 @@ def test_calculate_proportion(df):
             assert (
                 row["Proportion of codes (%)"] == expected_proportion
             ), f"Expected {expected_proportion} but got {row['Proportion of codes (%)']} for count {row['num']}"
+
+
+@given(event_counts=df_strategy, code_df=code_df_strategy)
+def test_add_description(event_counts, code_df):
+    result = add_description(event_counts, code_df, "code", "term")
+
+    # Ensure that the Description column exists
+    assert "Description" in result.columns
+
+    # Ensure that the 'Description' column is filled correctly
+    for _, row in result.iterrows():
+        if row["code"] == "Other":
+            assert row["Description"] == "-"
+        elif row["code"] in code_df["code"].values:
+            assert (
+                row["Description"]
+                == code_df[code_df["code"] == row["code"]]["term"].iloc[0]
+            )
+        else:
+            assert row["Description"] == "-"
+
+    # Ensure that no rows were lost
+    assert len(result) == len(event_counts)

From 7e4c4c2bf2122ef62a5a4af4fa34b6cc2a000607 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:23:37 +0100
Subject: [PATCH 13/22] test `handle_edge_case_percentages` As part of this
 handle descriptions without match top 5

---
 .../templates/v2/analysis/top_5.py              |  9 +++++++++
 .../templates/v2/tests/test_top_5.py            | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index e38248ce..262f113b 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -88,11 +88,20 @@ def calculate_proportion(event_counts):
 
 
 def add_description(event_counts, code_df, code_column, term_column):
+    if code_df.empty:
+        event_counts["Description"] = "-"
+        return event_counts
+
     code_df = code_df.set_index(code_column).rename(
         columns={term_column: "Description"}
     )
+
     event_counts = event_counts.set_index(code_column).join(code_df).reset_index()
     event_counts.loc[event_counts[code_column] == "Other", "Description"] = "-"
+
+    # For codes that did not find a match in code_df set a default value
+    event_counts["Description"].fillna("-", inplace=True)
+
     return event_counts
 
 
diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index e8e09efa..b6146e8b 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -139,3 +139,20 @@ def test_add_description(event_counts, code_df):
 
     # Ensure that no rows were lost
     assert len(result) == len(event_counts)
+
+
+@given(df=df_strategy)
+def test_handle_edge_case_percentages(df):
+    df_with_proportions = calculate_proportion(df.copy())
+    result_df = handle_edge_case_percentages(df_with_proportions.copy())
+
+    for _, row in result_df.iterrows():
+        if (row["Proportion of codes (%)"] == 0) and (row["num"] > 0):
+            assert (
+                row["Proportion of codes (%)"] == "<0.001"
+            ), f"Expected '<0.001' but got {row['Proportion of codes (%)']} for num {row['num']}"
+
+        if (row["Proportion of codes (%)"] == 100) and (row["num"] < df["num"].sum()):
+            assert (
+                row["Proportion of codes (%)"] == ">99.99"
+            ), f"Expected '>99.99' but got {row['Proportion of codes (%)']} for num {row['num']}"

From 4954afcc90a05c7de916c98267b24c6ae6db9b82 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 13:24:17 +0100
Subject: [PATCH 14/22] test `create_top_5_code_table` As part of this order
 columns in the two versions of top 5 consistently

---
 .../templates/v2/analysis/top_5.py            |  3 ++
 .../templates/v2/tests/test_top_5.py          | 41 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py
index 262f113b..7a403644 100644
--- a/interactive_templates/templates/v2/analysis/top_5.py
+++ b/interactive_templates/templates/v2/analysis/top_5.py
@@ -143,6 +143,9 @@ def create_top_5_code_table(
     event_counts = event_counts_sorted.loc[
         :, ["Code", "Description", "Proportion of codes (%)"]
     ]
+    event_counts_with_counts = event_counts_with_counts.loc[
+        :, ["Code", "num", "Description", "Proportion of codes (%)"]
+    ]
 
     return event_counts.head(nrows), event_counts_with_counts
 
diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index b6146e8b..acba62e4 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -156,3 +156,44 @@ def test_handle_edge_case_percentages(df):
             assert (
                 row["Proportion of codes (%)"] == ">99.99"
             ), f"Expected '>99.99' but got {row['Proportion of codes (%)']} for num {row['num']}"
+
+
+@given(
+    df=df_strategy,
+    code_df=code_df_strategy,
+    code_column=st.just("code"),
+    term_column=st.just("term"),
+    low_count_threshold=st.integers(min_value=1, max_value=10),
+    rounding_base=st.integers(min_value=1, max_value=10),
+    nrows=st.integers(min_value=1, max_value=10),
+)
+def test_create_top_5_code_table(
+    df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows
+):
+    top_5, top_5_with_counts = create_top_5_code_table(
+        df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows
+    )
+
+    assert len(top_5) <= nrows
+
+    # Make sure that the order is correct based on proportion
+    if not top_5.empty:
+        assert list(top_5["Proportion of codes (%)"]) == sorted(
+            top_5["Proportion of codes (%)"], reverse=True
+        )
+
+    # Ensure the 'complete_counts' contains all rows or the max rows whichever is smaller
+    assert len(top_5_with_counts) <= len(df)
+
+    # The two results should share the same sorted order based on "Proportion of codes (%)"
+    if not top_5.empty and not top_5_with_counts.empty:
+        assert list(top_5["Code"]) == list(top_5_with_counts["Code"].head(len(top_5)))
+
+        # Ensure the have expected columns
+        assert list(top_5.columns) == ["Code", "Description", "Proportion of codes (%)"]
+        assert list(top_5_with_counts.columns) == [
+            "Code",
+            "num",
+            "Description",
+            "Proportion of codes (%)",
+        ]

From 86c1305648d612374d449c38b7e01416475b30b6 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 15:28:41 +0100
Subject: [PATCH 15/22] fix ruff errors

---
 .../templates/v2/analysis/study_definition_end.py                | 1 +
 interactive_templates/templates/v2/tests/test_top_5.py           | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/study_definition_end.py b/interactive_templates/templates/v2/analysis/study_definition_end.py
index 5d5dd872..0ff4b3c5 100644
--- a/interactive_templates/templates/v2/analysis/study_definition_end.py
+++ b/interactive_templates/templates/v2/analysis/study_definition_end.py
@@ -2,6 +2,7 @@
 from cohortextractor import StudyDefinition, codelist_from_csv, patients
 from config import CONFIG
 
+
 ethnicity_codes = codelist_from_csv(
     filename="codelists/opensafely-ethnicity-snomed-0removed.csv",
     column="snomedcode",
diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py
index acba62e4..a0e63f07 100644
--- a/interactive_templates/templates/v2/tests/test_top_5.py
+++ b/interactive_templates/templates/v2/tests/test_top_5.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-
 from analysis.top_5 import (
     add_description,
     apply_rounding,

From 47130d8a32842c26976282c19041345ea98a376f Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 15:54:50 +0100
Subject: [PATCH 16/22] figure captions above figures

---
 .../templates/v2/analysis/report_template.html              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/report_template.html b/interactive_templates/templates/v2/analysis/report_template.html
index 2abb568a..87cc5120 100644
--- a/interactive_templates/templates/v2/analysis/report_template.html
+++ b/interactive_templates/templates/v2/analysis/report_template.html
@@ -161,12 +161,12 @@ <h2>Population level rate</h2>
                     patients for the measure described above.
                 </p>
                 <figure>
-                    {{ display_image(population_plot.path, population_plot.data) }}
                     <figcaption>
                         <strong>Figure 1</strong>. The monthly rate per 1000 registered patients
                         in the selected population for the specified measure between
                         {{ start_date }} and {{ end_date }}.
                     </figcaption>
+                    {{ display_image(population_plot.path, population_plot.data) }}
                 </figure>
             </section>
 
@@ -193,12 +193,12 @@ <h2>Practice level variation</h2>
 
 
                 <figure>
-                    {{ display_image(decile.path, decile.data) }}
                     <figcaption>
                         <strong>Figure 2</strong>. Practice level decile chart showing
                         practice level variation in the rate per 1000 registered patients who satisfy the
                         specified measure between {{ start_date }} and {{ end_date }}.
                     </figcaption>
+                    {{ display_image(decile.path, decile.data) }}
                 </figure>
             </section>
 
@@ -291,13 +291,13 @@ <h3>Breakdown by {{ b.title }}</h3>
 
                     {% if b.figure.exists %}
                     <figure>
-                        {{ display_image(b.figure.path, b.figure.data) }}
                         <figcaption>
                             <strong>Figure {{ i.value }}</strong>. The rate
                             per 1000 patients in the selected population for
                             the measure of interest, broken down by
                             {{ b.title }}
                         </figcaption>
+                        {{ display_image(b.figure.path, b.figure.data) }}
                         {% set i.value = i.value +1 %}
                     </figure>
                     {% else %}

From 8540bb5b56d3ea072f74909d9dbc5d6101240641 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 15:59:51 +0100
Subject: [PATCH 17/22] add missing link for deciles explanatio

---
 .../templates/v2/analysis/report_template.html                 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/report_template.html b/interactive_templates/templates/v2/analysis/report_template.html
index 87cc5120..2f4e0b80 100644
--- a/interactive_templates/templates/v2/analysis/report_template.html
+++ b/interactive_templates/templates/v2/analysis/report_template.html
@@ -180,7 +180,8 @@ <h2>Practice level variation</h2>
                     For each month, the practices are then ranked by their rate. From this, the median
                     (5th decile) practice level rate is calculated as well as the rate for the 10th
                     percentile etc. The wider the gap between the deciles, the more practice level
-                    variability there is. You can read more about how we use deciles here.
+                    variability there is. You can read more about how we use deciles
+                    <a href="https://www.bennett.ox.ac.uk/blog/2019/04/communicating-variation-in-prescribing-why-we-use-deciles/">here</a>.
 
                 </p>
 

From 92c12af51945dcec13341c20eedd98d3a9e1a75d Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 16:04:20 +0100
Subject: [PATCH 18/22] set ylim for figures to 1.1 x max val

---
 interactive_templates/templates/v2/analysis/report_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/report_utils.py b/interactive_templates/templates/v2/analysis/report_utils.py
index f79ee07a..53e1ddff 100644
--- a/interactive_templates/templates/v2/analysis/report_utils.py
+++ b/interactive_templates/templates/v2/analysis/report_utils.py
@@ -136,7 +136,7 @@ def plot_measures(
             0,
             1000
             if df[column_to_plot].isnull().values.all()
-            else df[column_to_plot].max(),
+            else df[column_to_plot].max() * 1.1,
         ),
     )
 

From 963d051d3094d9490de3371a9f3449f7e68ef132 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 16:07:29 +0100
Subject: [PATCH 19/22] increase line width plots

---
 interactive_templates/templates/v2/analysis/report_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/report_utils.py b/interactive_templates/templates/v2/analysis/report_utils.py
index 53e1ddff..a5646ef0 100644
--- a/interactive_templates/templates/v2/analysis/report_utils.py
+++ b/interactive_templates/templates/v2/analysis/report_utils.py
@@ -111,6 +111,7 @@ def plot_measures(
                     palette=palette,
                     ax=ax,
                     label=unique_category,
+                    linewidth=1.5,
                 )
 
         else:
@@ -124,10 +125,11 @@ def plot_measures(
                     palette=palette,
                     ax=ax,
                     label=unique_category,
+                    linewidth=1.5,
                 )
 
     else:
-        ax.plot(df["date"], df[column_to_plot])
+        ax.plot(df["date"], df[column_to_plot], linewidth=1.5)
 
     ax.set(
         ylabel=y_label,

From 59948825bb89b1798eab52e92f6afb34b73a123b Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 16:25:07 +0100
Subject: [PATCH 20/22] replace `number of events`. refer to measure instead

---
 .../v2/analysis/report_template.html          | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/report_template.html b/interactive_templates/templates/v2/analysis/report_template.html
index 2f4e0b80..908b77e8 100644
--- a/interactive_templates/templates/v2/analysis/report_template.html
+++ b/interactive_templates/templates/v2/analysis/report_template.html
@@ -102,7 +102,7 @@ <h2>Analysis limitations</h2>
                             </li>
                             <li>
                                 If the chosen codelists represent <strong>vaccinations, referrals, maternity records,
-                                or activities not directly occurring in primary care</strong> , note that some events
+                                or activities not directly occurring in primary care</strong>, note that some events
                                 may be recorded in way that are not fully captured by this analysis, and/or are subject
                                 to limited recording in primary care.
                             </li>
@@ -119,27 +119,28 @@ <h2>Analysis limitations</h2>
             <section id="measure-summary">
                 <h2>Measure summary</h2>
                 <p>The table below shows the total number of times the measure of interest
-                    occurred and the number of unique patients experiencing the event between
-                    {{ start_date }} and {{ end_date }}. The number of events in the latest
-                    complete month and latest week is also shown. An event is defined as a
+                    occurred and the number of unique patients experiencing with the measure between
+                    {{ start_date }} and {{ end_date }}. The number of patients with the measure of interest
+                    in the latest complete month and latest week is also shown. The measure is defined as a
                     patient having a code recorded from both codelist 1 and codelist 2 that
-                    satisfies the specified measure logic. A patient can have multiple events,
-                    but a maximum of 1 event per patient is counted each month.
+                    satisfies the specified measure logic. The measure can be satisfied multiple times by
+                    the same patients, but here maximum of 1 instance per patient is counted each month.
                 </p>
                 <table>
                     <caption>
                         <strong>Table 1</strong>. Summary table showing the total number of patients
                         who meet the measure criteria at some point during the study period, the total
-                        number of event throughout the study period (up to one event, per patient, per month),
-                        and the number of events in the latest complete month and complete week.
+                        number of instances the measure is recorded throughout the study period (up to one
+                        instance, per patient, per month), and the number of times the measure was recorded
+                        in the latest complete month and complete week.
                     </caption>
                     <thead>
                         <tr>
-                            <th>Total events</th>
-                            <th>Total patients</th>
-                            <th>Total patients with events</th>
-                            <th>Events in latest month ({{ summary_table_data.latest_month }})</th>
-                            <th>Events in latest week ({{ summary_table_data.latest_week }})</th>
+                            <th>Total instances of measure</th>
+                            <th>Total patients in study population</th>
+                            <th>Total patients with measure</th>
+                            <th>Patients with measure in latest month ({{ summary_table_data.latest_month }})</th>
+                            <th>Patients with measure in latest week ({{ summary_table_data.latest_week }})</th>
                         </tr>
                     </thead>
                     <tbody>
@@ -209,7 +210,7 @@ <h2>Most common codes</h2>
                     The tables below show the most common codes recorded within both the {{ codelist_1_name }} and the
                     {{ codelist_2_name }} codelists. For each code within each codelist, the number of times it was recorded
                     in the period between {{ start_date }} and {{ end_date }} is calculated. For each code, the percentage
-                    makeup of the total number of events is then calculated. Note that this includes instances of these codes
+                    makeup is then calculated. Note that this includes instances of these codes
                     that may not contribute to the measure of interest.
                 </p>
                 <table>

From e81d92b33e2a1f1a643780ab0f807f12eda89b71 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 16:50:02 +0100
Subject: [PATCH 21/22] put legend above plots This gives consistent width to
 the plots. The legend settings here work for the possible set of legends we
 have but may need to be adapted if these change in the future.

---
 .../templates/v2/analysis/report_utils.py        | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/interactive_templates/templates/v2/analysis/report_utils.py b/interactive_templates/templates/v2/analysis/report_utils.py
index a5646ef0..6a1eed42 100644
--- a/interactive_templates/templates/v2/analysis/report_utils.py
+++ b/interactive_templates/templates/v2/analysis/report_utils.py
@@ -96,7 +96,7 @@ def plot_measures(
     if category:
         df[category] = df[category].fillna("Missing")
 
-    _, ax = plt.subplots(figsize=(15, 8))
+    _, ax = plt.subplots(figsize=(15, 12))
     palette = sns.color_palette("tab10")
 
     if category:
@@ -150,9 +150,10 @@ def plot_measures(
 
     if category:
         ax.legend(
-            bbox_to_anchor=(1.04, 1),
-            loc="upper left",
+            bbox_to_anchor=(0.5, 1.2),
+            loc="upper center",
             fontsize=20,
+            ncol=len(df[category]) if len(df[category]) < 4 else 4,
         )
 
     ax.margins(x=0)
@@ -255,7 +256,7 @@ def deciles_chart(df, filename, period_column=None, column=None, title="", ylabe
         ylabel: the label of the y-axis of the chart
     """
 
-    fig, ax = plt.subplots(figsize=(15, 8))
+    fig, ax = plt.subplots(figsize=(15, 12))
 
     linestyles = {
         "decile": {"line": "b--", "linewidth": 1, "label": "Decile"},
@@ -317,12 +318,13 @@ def deciles_chart(df, filename, period_column=None, column=None, title="", ylabe
     plt.xticks(sorted(df[period_column].unique()), rotation=90)
     ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
     ax.legend(
-        bbox_to_anchor=(1.1, 0.8),
-        loc="center left",
-        ncol=1,
+        bbox_to_anchor=(0.5, 1.2),
+        loc="upper center",
+        ncol=3,
         fontsize=20,
         borderaxespad=0.0,
     )
+
     plt.tight_layout()
     plt.savefig(filename)
     plt.clf()

From 3982b118dd8f7a842de869fcec590bddd70df557 Mon Sep 17 00:00:00 2001
From: Louis Fisher <louis.fisher1919@gmail.com>
Date: Thu, 10 Aug 2023 17:05:28 +0100
Subject: [PATCH 22/22] indicate the rate is per 1000

---
 .../templates/v2/analysis/report_template.html                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/interactive_templates/templates/v2/analysis/report_template.html b/interactive_templates/templates/v2/analysis/report_template.html
index 908b77e8..d69e6d0c 100644
--- a/interactive_templates/templates/v2/analysis/report_template.html
+++ b/interactive_templates/templates/v2/analysis/report_template.html
@@ -52,7 +52,7 @@ <h2>Measure description</h2>
 
                 <p>
                     This measure is calculated for <strong>{{ population }}</strong> using the OpenSAFELY-TPP
-                    dataset, which covers ~40% of England. The monthly rate of this measure is shown below.
+                    dataset, which covers ~40% of England. The monthly rate per 1000 patients of this measure is shown below.
                     {% if breakdowns|length >0 %}
                     A breakdown of this measure by
                     {% for b in breakdowns %}