diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py index e38248ce..262f113b 100644 --- a/interactive_templates/templates/v2/analysis/top_5.py +++ b/interactive_templates/templates/v2/analysis/top_5.py @@ -88,11 +88,20 @@ def calculate_proportion(event_counts): def add_description(event_counts, code_df, code_column, term_column): + if code_df.empty: + event_counts["Description"] = "-" + return event_counts + code_df = code_df.set_index(code_column).rename( columns={term_column: "Description"} ) + event_counts = event_counts.set_index(code_column).join(code_df).reset_index() event_counts.loc[event_counts[code_column] == "Other", "Description"] = "-" + + # For codes that did not find a match in code_df set a default value + event_counts["Description"].fillna("-", inplace=True) + return event_counts diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py index 53c7a4bc..dbb8dd72 100644 --- a/interactive_templates/templates/v2/tests/test_top_5.py +++ b/interactive_templates/templates/v2/tests/test_top_5.py @@ -139,3 +139,20 @@ def test_add_description(event_counts, code_df): # Ensure that no rows were lost assert len(result) == len(event_counts) + + +@given(df=df_strategy) +def test_handle_edge_case_percentages(df): + df_with_proportions = calculate_proportion(df.copy()) + result_df = handle_edge_case_percentages(df_with_proportions.copy()) + + for _, row in result_df.iterrows(): + if (row["Proportion of codes (%)"] == 0) and (row["num"] > 0): + assert ( + row["Proportion of codes (%)"] == "<0.001" + ), f"Expected '<0.001' but got {row['Proportion of codes (%)']} for num {row['num']}" + + if (row["Proportion of codes (%)"] == 100) and (row["num"] < df["num"].sum()): + assert ( + row["Proportion of codes (%)"] == ">99.99" + ), f"Expected '>99.99' but got {row['Proportion of codes (%)']} for num {row['num']}"