diff --git a/src/nested_pandas/utils/utils.py b/src/nested_pandas/utils/utils.py index a1b0a9f..0f980d2 100644 --- a/src/nested_pandas/utils/utils.py +++ b/src/nested_pandas/utils/utils.py @@ -34,8 +34,9 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame: ) else: # this may be able to be sped up using tolists() as well - counts = df[nested].apply(lambda x: x[by].value_counts()) + counts = df[nested].apply(lambda x: x[by].value_counts(sort=False)) counts = counts.rename(columns={colname: f"n_{nested}_{colname}" for colname in counts.columns}) + counts = counts.reindex(sorted(counts.columns), axis=1) if join: return df.join(counts) # else just return the counts NestedFrame diff --git a/tests/nested_pandas/utils/test_utils.py b/tests/nested_pandas/utils/test_utils.py index e1403a8..009a9c2 100644 --- a/tests/nested_pandas/utils/test_utils.py +++ b/tests/nested_pandas/utils/test_utils.py @@ -27,9 +27,15 @@ def test_count_nested(join): # Test count by label_counts = count_nested(base, "nested", by="label", join=join) + assert all(label_counts["n_nested_a"].values == [2, 2, 1]) assert all(label_counts["n_nested_b"].values == [1, 1, 2]) + # Make sure the ordering is alphabetical + # https://github.com/lincc-frameworks/nested-pandas/issues/109 + assert label_counts.columns[-1] == "n_nested_b" + assert label_counts.columns[-2] == "n_nested_a" + # Test join behavior if join: assert total_counts.columns.tolist() == base.columns.tolist() + ["n_nested"]