From 7f438af58f8b89d1e74b0f32179e1803c8f87998 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Jan 2025 23:51:04 +0100 Subject: [PATCH 1/5] feat: add duckdb dataframe drop_nulls --- narwhals/_dask/dataframe.py | 14 +++++++------- narwhals/_duckdb/dataframe.py | 9 +++++++++ narwhals/_pandas_like/dataframe.py | 14 +++++++------- tests/frame/drop_nulls_test.py | 7 +------ 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 89e1bc482..2f15eaafb 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -347,13 +347,13 @@ def join_asof( self, other: Self, *, - left_on: str | None = None, - right_on: str | None = None, - on: str | None = None, - by_left: str | list[str] | None = None, - by_right: str | list[str] | None = None, - by: str | list[str] | None = None, - strategy: Literal["backward", "forward", "nearest"] = "backward", + left_on: str | None, + right_on: str | None, + on: str | None, + by_left: str | list[str] | None, + by_right: str | list[str] | None, + by: str | list[str] | None, + strategy: Literal["backward", "forward", "nearest"], ) -> Self: plx = self.__native_namespace__() return self._from_native_frame( diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 98eca0bdb..5a680e5bb 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -321,3 +321,12 @@ def sort( ) ) return self._from_native_frame(result) + + def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + import duckdb + + rel = self._native_frame + subset_ = subset if subset is not None else rel.columns + keep_condition = " and ".join(f"{col} is not null" for col in subset_) + query = f"""select * from rel where {keep_condition}""" # noqa: S608 + return self._from_native_frame(duckdb.sql(query)) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 47c43a69a..1d96a2f7d 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -654,13 +654,13 @@ def join_asof( self, other: Self, *, - left_on: str | None = None, - right_on: str | None = None, - on: str | None = None, - by_left: str | list[str] | None = None, - by_right: str | list[str] | None = None, - by: str | list[str] | None = None, - strategy: Literal["backward", "forward", "nearest"] = "backward", + left_on: str | None, + right_on: str | None, + on: str | None, + by_left: str | list[str] | None, + by_right: str | list[str] | None, + by: str | list[str] | None, + strategy: Literal["backward", "forward", "nearest"], ) -> Self: plx = self.__native_namespace__() return self._from_native_frame( diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py index 368ad6ba0..451b1cbd7 100644 --- a/tests/frame/drop_nulls_test.py +++ b/tests/frame/drop_nulls_test.py @@ -12,9 +12,7 @@ } -def test_drop_nulls(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_drop_nulls(constructor: Constructor) -> None: result = nw.from_native(constructor(data)).drop_nulls() expected = { "a": [2.0, 4.0], @@ -35,9 +33,6 @@ def test_drop_nulls_subset( constructor: Constructor, subset: str | list[str], expected: dict[str, float], - request: pytest.FixtureRequest, ) -> None: - if "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) result = nw.from_native(constructor(data)).drop_nulls(subset=subset) assert_equal_data(result, expected) From 5880e60c2b9408225ee2501aba1d3700fea21dc1 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Jan 2025 23:53:50 +0100 Subject: [PATCH 2/5] rollback --- narwhals/_dask/dataframe.py | 14 +++++++------- narwhals/_pandas_like/dataframe.py | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 2f15eaafb..89e1bc482 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -347,13 +347,13 @@ def join_asof( self, other: Self, *, - left_on: str | None, - right_on: str | None, - on: str | None, - by_left: str | list[str] | None, - by_right: str | list[str] | None, - by: str | list[str] | None, - strategy: Literal["backward", "forward", "nearest"], + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, + by_left: str | list[str] | None = None, + by_right: str | list[str] | None = None, + by: str | list[str] | None = None, + strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: plx = self.__native_namespace__() return self._from_native_frame( diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 1d96a2f7d..47c43a69a 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -654,13 +654,13 @@ def join_asof( self, other: Self, *, - left_on: str | None, - right_on: str | None, - on: str | None, - by_left: str | list[str] | None, - by_right: str | list[str] | None, - by: str | list[str] | None, - strategy: Literal["backward", "forward", "nearest"], + left_on: str | None = None, + right_on: str | None = None, + on: str | None = None, + by_left: str | list[str] | None = None, + by_right: str | list[str] | None = None, + by: str | list[str] | None = None, + strategy: Literal["backward", "forward", "nearest"] = "backward", ) -> Self: plx = self.__native_namespace__() return self._from_native_frame( From f1feb939215911d4f990c068ba6712dfe0cf3135 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 16 Jan 2025 09:23:31 +0100 Subject: [PATCH 3/5] test columns with spaces --- narwhals/_duckdb/dataframe.py | 10 ++++++++-- tests/frame/drop_nulls_test.py | 14 +++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 5a680e5bb..c446351f1 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -326,7 +326,13 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: import duckdb rel = self._native_frame - subset_ = subset if subset is not None else rel.columns - keep_condition = " and ".join(f"{col} is not null" for col in subset_) + subset_ = ( + [subset] + if isinstance(subset, str) + else rel.columns + if subset is None + else subset + ) + keep_condition = " and ".join(f'"{col}" is not null' for col in subset_) query = f"""select * from rel where {keep_condition}""" # noqa: S608 return self._from_native_frame(duckdb.sql(query)) diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py index 451b1cbd7..c49b17126 100644 --- a/tests/frame/drop_nulls_test.py +++ b/tests/frame/drop_nulls_test.py @@ -7,16 +7,16 @@ from tests.utils import assert_equal_data data = { - "a": [1.0, 2.0, None, 4.0], - "b": [None, 3.0, None, 5.0], + "alpha": [1.0, 2.0, None, 4.0], + "beta gamma": [None, 3.0, None, 5.0], } def test_drop_nulls(constructor: Constructor) -> None: result = nw.from_native(constructor(data)).drop_nulls() expected = { - "a": [2.0, 4.0], - "b": [3.0, 5.0], + "alpha": [2.0, 4.0], + "beta gamma": [3.0, 5.0], } assert_equal_data(result, expected) @@ -24,9 +24,9 @@ def test_drop_nulls(constructor: Constructor) -> None: @pytest.mark.parametrize( ("subset", "expected"), [ - ("a", {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), - (["a"], {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), - (["a", "b"], {"a": [2.0, 4.0], "b": [3.0, 5.0]}), + ("alpha", {"alpha": [1, 2.0, 4.0], "beta gamma": [None, 3.0, 5.0]}), + (["alpha"], {"alpha": [1, 2.0, 4.0], "beta gamma": [None, 3.0, 5.0]}), + (["alpha", "beta gamma"], {"alpha": [2.0, 4.0], "beta gamma": [3.0, 5.0]}), ], ) def test_drop_nulls_subset( From 30836eea6d5af8694be8d92449557f1a89b7fe9b Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 16 Jan 2025 09:25:57 +0100 Subject: [PATCH 4/5] single double quote --- narwhals/_duckdb/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index c446351f1..3ae94de74 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -334,5 +334,5 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: else subset ) keep_condition = " and ".join(f'"{col}" is not null' for col in subset_) - query = f"""select * from rel where {keep_condition}""" # noqa: S608 + query = f"select * from rel where {keep_condition}" # noqa: S608 return self._from_native_frame(duckdb.sql(query)) From a43e5c26240791fd1ad3ba0b8dd52a5080ceacf4 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 16 Jan 2025 09:34:05 +0100 Subject: [PATCH 5/5] push list conversion to narwhals BaseFrame level --- narwhals/_arrow/dataframe.py | 3 +-- narwhals/_dask/dataframe.py | 3 +-- narwhals/_duckdb/dataframe.py | 10 ++-------- narwhals/_pandas_like/dataframe.py | 3 +-- narwhals/_spark_like/dataframe.py | 2 +- narwhals/dataframe.py | 3 ++- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index ed738647c..96a8ef717 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -395,10 +395,9 @@ def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 ) return self._from_native_frame(self._native_frame.drop(to_drop)) - def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + def drop_nulls(self: Self, subset: list[str] | None) -> Self: if subset is None: return self._from_native_frame(self._native_frame.drop_null()) - subset = [subset] if isinstance(subset, str) else subset plx = self.__narwhals_namespace__() return self.filter(~plx.any_horizontal(plx.col(*subset).is_null())) diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 89e1bc482..1a8efc446 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -149,10 +149,9 @@ def select( ) return self._from_native_frame(df) - def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + def drop_nulls(self: Self, subset: list[str] | None) -> Self: if subset is None: return self._from_native_frame(self._native_frame.dropna()) - subset = [subset] if isinstance(subset, str) else subset plx = self.__narwhals_namespace__() return self.filter(~plx.any_horizontal(plx.col(*subset).is_null())) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 3ae94de74..a3c2798b1 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -322,17 +322,11 @@ def sort( ) return self._from_native_frame(result) - def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + def drop_nulls(self: Self, subset: list[str] | None) -> Self: import duckdb rel = self._native_frame - subset_ = ( - [subset] - if isinstance(subset, str) - else rel.columns - if subset is None - else subset - ) + subset_ = subset if subset is not None else rel.columns keep_condition = " and ".join(f'"{col}" is not null' for col in subset_) query = f"select * from rel where {keep_condition}" # noqa: S608 return self._from_native_frame(duckdb.sql(query)) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 47c43a69a..fdd53b4a6 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -371,10 +371,9 @@ def select( ) return self._from_native_frame(df) - def drop_nulls(self, subset: str | list[str] | None) -> Self: + def drop_nulls(self, subset: list[str] | None) -> Self: if subset is None: return self._from_native_frame(self._native_frame.dropna(axis=0)) - subset = [subset] if isinstance(subset, str) else subset plx = self.__narwhals_namespace__() return self.filter(~plx.any_horizontal(plx.col(*subset).is_null())) diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index 7e7f41ddb..34da43fde 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -183,7 +183,7 @@ def sort( sort_cols = [sort_f(col) for col, sort_f in zip(flat_by, sort_funcs)] return self._from_native_frame(self._native_frame.sort(*sort_cols)) - def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + def drop_nulls(self: Self, subset: list[str] | None) -> Self: return self._from_native_frame(self._native_frame.dropna(subset=subset)) def rename(self: Self, mapping: dict[str, str]) -> Self: diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 1ae43028c..e45041679 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -107,7 +107,8 @@ def with_row_index(self, name: str = "index") -> Self: self._compliant_frame.with_row_index(name), ) - def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: + def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: + subset = [subset] if isinstance(subset, str) else subset return self._from_compliant_dataframe( self._compliant_frame.drop_nulls(subset=subset), )