From a2a3f7b52d881c32f13c288e23b5ca7edc9c291a Mon Sep 17 00:00:00 2001 From: Louis Fisher Date: Thu, 10 Aug 2023 13:24:17 +0100 Subject: [PATCH] test `create_top_5_code_table` As part of this order columns in the two versions of top 5 consistently --- .../templates/v2/analysis/top_5.py | 3 ++ .../templates/v2/tests/test_top_5.py | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/interactive_templates/templates/v2/analysis/top_5.py b/interactive_templates/templates/v2/analysis/top_5.py index 262f113b..7a403644 100644 --- a/interactive_templates/templates/v2/analysis/top_5.py +++ b/interactive_templates/templates/v2/analysis/top_5.py @@ -143,6 +143,9 @@ def create_top_5_code_table( event_counts = event_counts_sorted.loc[ :, ["Code", "Description", "Proportion of codes (%)"] ] + event_counts_with_counts = event_counts_with_counts.loc[ + :, ["Code", "num", "Description", "Proportion of codes (%)"] + ] return event_counts.head(nrows), event_counts_with_counts diff --git a/interactive_templates/templates/v2/tests/test_top_5.py b/interactive_templates/templates/v2/tests/test_top_5.py index dbb8dd72..a601105f 100644 --- a/interactive_templates/templates/v2/tests/test_top_5.py +++ b/interactive_templates/templates/v2/tests/test_top_5.py @@ -156,3 +156,44 @@ def test_handle_edge_case_percentages(df): assert ( row["Proportion of codes (%)"] == ">99.99" ), f"Expected '>99.99' but got {row['Proportion of codes (%)']} for num {row['num']}" + + +@given( + df=df_strategy, + code_df=code_df_strategy, + code_column=st.just("code"), + term_column=st.just("term"), + low_count_threshold=st.integers(min_value=1, max_value=10), + rounding_base=st.integers(min_value=1, max_value=10), + nrows=st.integers(min_value=1, max_value=10), +) +def test_create_top_5_code_table( + df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows +): + top_5, top_5_with_counts = create_top_5_code_table( + df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows + ) + + assert len(top_5) <= nrows + + # Make sure that the order is correct based on proportion + if not top_5.empty: + assert list(top_5["Proportion of codes (%)"]) == sorted( + top_5["Proportion of codes (%)"], reverse=True + ) + + # Ensure the 'complete_counts' contains all rows or the max rows whichever is smaller + assert len(top_5_with_counts) <= len(df) + + # The two results should share the same sorted order based on "Proportion of codes (%)" + if not top_5.empty and not top_5_with_counts.empty: + assert list(top_5["Code"]) == list(top_5_with_counts["Code"].head(len(top_5))) + + # Ensure the have expected columns + assert list(top_5.columns) == ["Code", "Description", "Proportion of codes (%)"] + assert list(top_5_with_counts.columns) == [ + "Code", + "num", + "Description", + "Proportion of codes (%)", + ]