Skip to content

Commit

Permalink
test create_top_5_code_table
Browse files Browse the repository at this point in the history
As part of this order columns in the two versions of top 5 consistently
  • Loading branch information
LFISHER7 committed Aug 10, 2023
1 parent a4bebda commit 64dbadd
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
3 changes: 3 additions & 0 deletions interactive_templates/templates/v2/analysis/top_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ def create_top_5_code_table(
event_counts = event_counts_sorted.loc[
:, ["Code", "Description", "Proportion of codes (%)"]
]
event_counts_with_counts = event_counts_with_counts.loc[
:, ["Code", "num", "Description", "Proportion of codes (%)"]
]

return event_counts.head(nrows), event_counts_with_counts

Expand Down
41 changes: 41 additions & 0 deletions interactive_templates/templates/v2/tests/test_top_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,44 @@ def test_handle_edge_case_percentages(df):
assert (
row["Proportion of codes (%)"] == ">99.99"
), f"Expected '>99.99' but got {row['Proportion of codes (%)']} for num {row['num']}"


@given(
df=df_strategy,
code_df=code_df_strategy,
code_column=st.just("code"),
term_column=st.just("term"),
low_count_threshold=st.integers(min_value=1, max_value=10),
rounding_base=st.integers(min_value=1, max_value=10),
nrows=st.integers(min_value=1, max_value=10),
)
def test_create_top_5_code_table(
df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows
):
top_5, top_5_with_counts = create_top_5_code_table(
df, code_df, code_column, term_column, low_count_threshold, rounding_base, nrows
)

assert len(top_5) <= nrows

# Make sure that the order is correct based on proportion
if not top_5.empty:
assert list(top_5["Proportion of codes (%)"]) == sorted(
top_5["Proportion of codes (%)"], reverse=True
)

# Ensure the 'complete_counts' contains all rows or the max rows whichever is smaller
assert len(top_5_with_counts) <= len(df)

# The two results should share the same sorted order based on "Proportion of codes (%)"
if not top_5.empty and not top_5_with_counts.empty:
assert list(top_5["Code"]) == list(top_5_with_counts["Code"].head(len(top_5)))

# Ensure the have expected columns
assert list(top_5.columns) == ["Code", "Description", "Proportion of codes (%)"]
assert list(top_5_with_counts.columns) == [
"Code",
"num",
"Description",
"Proportion of codes (%)",
]

0 comments on commit 64dbadd

Please sign in to comment.