From de46a67c6645c5b856f5009f7ce3c8345c6534f2 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 6 Jun 2024 13:25:04 -0700 Subject: [PATCH] allow dropna on layer columns --- src/nested_pandas/nestedframe/core.py | 13 +++++++------ tests/nested_pandas/nestedframe/test_nestedframe.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 6142238..b3be03e 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -201,7 +201,6 @@ def _resolve_dropna_target(self, on_nested, subset): """resolves the target layer for a given set of dropna kwargs""" nested_cols = self.nested_columns - columns = self.columns # first check the subset kwarg input subset_target = [] @@ -210,13 +209,15 @@ def _resolve_dropna_target(self, on_nested, subset): subset = [subset] for col in subset: - col = col.split(".")[0] - if col in nested_cols: - subset_target.append(col) - elif col in columns: + # Without a ".", always assume base layer + if "." not in col: subset_target.append("base") else: - raise ValueError(f"Column name {col} not found in any base or nested columns") + layer, col = col.split(".") + if layer in nested_cols: + subset_target.append(layer) + else: + raise ValueError(f"layer '{layer}' not found in the base columns") # Check for 1 target subset_target = np.unique(subset_target) diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index b9afca9..66931cc 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -3,6 +3,7 @@ import pyarrow as pa import pytest from nested_pandas import NestedFrame +from nested_pandas.datasets import generate_data from pandas.testing import assert_frame_equal @@ -203,6 +204,15 @@ def test_dropna(): assert len(dn_hierarchical["nested"].nest.to_flat() == 8) +def test_dropna_layer_as_base_column(): + """Test that a nested column still works as a top level column for dropna""" + nf = generate_data(10, 100, seed=1).query("nested.t>19.75") + nf = nf.dropna(subset=["nested"]) + + # make sure rows have been dropped as expected + assert len(nf) == 6 + + def test_dropna_inplace_base(): """Test in-place behavior of dropna"""