From 03e4ca3ca5e22190008f04dbc12a0fc01e05fff7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 24 Dec 2024 17:52:54 +0000 Subject: [PATCH 1/3] chore: use __narwhals_namespace__ more internally --- narwhals/_arrow/dataframe.py | 2 +- narwhals/_arrow/namespace.py | 4 +--- narwhals/_dask/dataframe.py | 6 ++---- narwhals/_dask/namespace.py | 4 +--- narwhals/_pandas_like/dataframe.py | 3 +-- narwhals/_pandas_like/namespace.py | 8 +------- narwhals/_spark_like/dataframe.py | 8 ++------ 7 files changed, 9 insertions(+), 26 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 34758bd82..16669719e 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -504,7 +504,7 @@ def filter(self: Self, *predicates: IntoArrowExpr, **constraints: Any) -> Self: predicates, (plx.col(name) == v for name, v in constraints.items()) ) ) - # Safety: all_horizontal's expression only returns a single column. + # `[0]` is safe as all_horizontal's expression only returns a single column mask = expr._call(self)[0]._native_series return self._from_native_frame(self._native_frame.filter(mask)) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index cb59d5b71..884bc8f08 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -461,11 +461,9 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]: import pyarrow as pa import pyarrow.compute as pc - from narwhals._arrow.namespace import ArrowNamespace from narwhals._expression_parsing import parse_into_expr - plx = ArrowNamespace(backend_version=self._backend_version, version=self._version) - + plx = df.__narwhals_namespace__() condition = parse_into_expr(self._condition, namespace=plx)(df)[0] try: value_series = parse_into_expr(self._then_value, namespace=plx)(df)[0] diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 7a79a2d36..0e762b1fa 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -105,13 +105,11 @@ def filter(self, *predicates: DaskExpr, **constraints: Any) -> Self: ) raise NotImplementedError(msg) - from narwhals._dask.namespace import DaskNamespace - - plx = DaskNamespace(backend_version=self._backend_version, version=self._version) + plx = self.__narwhals_namespace__() expr = plx.all_horizontal( *chain(predicates, (plx.col(name) == v for name, v in constraints.items())) ) - # Safety: all_horizontal's expression only returns a single column. + # `[0]` is safe as all_horizontal's expression only returns a single column mask = expr._call(self)[0] return self._from_native_frame(self._native_frame.loc[mask]) diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 7c0a6b7eb..38cd16a87 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -416,11 +416,9 @@ def __init__( self._version = version def __call__(self, df: DaskLazyFrame) -> Sequence[dask_expr.Series]: - from narwhals._dask.namespace import DaskNamespace from narwhals._expression_parsing import parse_into_expr - plx = DaskNamespace(backend_version=self._backend_version, version=self._version) - + plx = df.__narwhals_namespace__() condition = parse_into_expr(self._condition, namespace=plx)(df)[0] condition = cast("dask_expr.Series", condition) try: diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 6e640c98b..6f8706ae4 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -412,7 +412,7 @@ def filter(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> Self: predicates, (plx.col(name) == v for name, v in constraints.items()) ) ) - # Safety: all_horizontal's expression only returns a single column. + # `[0]` is safe as all_horizontal's expression only returns a single column mask = expr._call(self)[0] _mask = validate_dataframe_comparand(self._native_frame.index, mask) return self._from_native_frame(self._native_frame.loc[_mask]) @@ -1006,7 +1006,6 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel ] plx = self.__native_namespace__() - return self._from_native_frame( plx.concat([exploded_frame, *exploded_series], axis=1)[original_columns] ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 3aa4015a0..0b060708b 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -503,15 +503,9 @@ def __init__( def __call__(self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: from narwhals._expression_parsing import parse_into_expr - from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._pandas_like.utils import broadcast_align_and_extract_native - plx = PandasLikeNamespace( - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - + plx = df.__narwhals_namespace__() condition = parse_into_expr(self._condition, namespace=plx)(df)[0] try: value_series = parse_into_expr(self._then_value, namespace=plx)(df)[0] diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index d488ed7f2..28cefd6fd 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -104,8 +104,6 @@ def select( return self._from_native_frame(self._native_frame.select(*new_columns_list)) def filter(self, *predicates: SparkLikeExpr) -> Self: - from narwhals._spark_like.namespace import SparkLikeNamespace - if ( len(predicates) == 1 and isinstance(predicates[0], list) @@ -113,11 +111,9 @@ def filter(self, *predicates: SparkLikeExpr) -> Self: ): msg = "`LazyFrame.filter` is not supported for PySpark backend with boolean masks." raise NotImplementedError(msg) - plx = SparkLikeNamespace( - backend_version=self._backend_version, version=self._version - ) + plx = self.__narwhals_namespace__() expr = plx.all_horizontal(*predicates) - # Safety: all_horizontal's expression only returns a single column. + # `[0]` is safe as all_horizontal's expression only returns a single column condition = expr._call(self)[0] spark_df = self._native_frame.where(condition) return self._from_native_frame(spark_df) From 396ea0a153b4c9b3f066a8186be8ff402a0e9677 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 24 Dec 2024 13:28:56 +0000 Subject: [PATCH 2/3] polars 1.18 compat --- narwhals/_duckdb/dataframe.py | 14 +++++--------- tests/dtypes_test.py | 9 +-------- tests/frame/concat_test.py | 10 ++++++++-- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 339fca137..4f5803905 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -104,16 +104,12 @@ def select( *exprs: Any, **named_exprs: Any, ) -> Self: - if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover - msg = ( - "`select`-ing not by name is not supported for DuckDB backend.\n\n" - "If you would like to see this kind of object better supported in " - "Narwhals, please open a feature request " - "at https://github.com/narwhals-dev/narwhals/issues." + new_columns_map = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) + return self._from_native_frame( + self._native_frame.select( + *(val.alias(col) for col, val in new_columns_map.items()) ) - raise NotImplementedError(msg) - - return self._from_native_frame(self._native_frame.select(*exprs)) + ) def __getattr__(self, attr: str) -> Any: if attr == "schema": diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 0624352c1..aa497785a 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -203,7 +203,7 @@ def test_pandas_fixed_offset_1302() -> None: def test_huge_int() -> None: df = pl.DataFrame({"a": [1, 2, 3]}) if POLARS_VERSION >= (1, 18): # pragma: no cover - result = nw.from_native(df).schema + result = nw.from_native(df.select(pl.col("a").cast(pl.Int128))).schema assert result["a"] == nw.Int128 else: # pragma: no cover # Int128 was not available yet @@ -221,13 +221,6 @@ def test_huge_int() -> None: result = nw.from_native(rel).schema assert result["a"] == nw.UInt128 - if POLARS_VERSION >= (1, 18): # pragma: no cover - result = nw.from_native(df).schema - assert result["a"] == nw.UInt128 - else: # pragma: no cover - # UInt128 was not available yet - pass - # TODO(unassigned): once other libraries support Int128/UInt128, # add tests for them too diff --git a/tests/frame/concat_test.py b/tests/frame/concat_test.py index 567cb4cac..26bbd2e62 100644 --- a/tests/frame/concat_test.py +++ b/tests/frame/concat_test.py @@ -44,9 +44,15 @@ def test_concat_vertical(constructor: Constructor) -> None: with pytest.raises(ValueError, match="No items"): nw.concat([], how="vertical") - with pytest.raises((Exception, TypeError), match="unable to vstack"): + with pytest.raises( + (Exception, TypeError), + match="unable to vstack|inputs should all have the same schema", + ): nw.concat([df_left, df_right.rename({"d": "i"})], how="vertical").collect() - with pytest.raises((Exception, TypeError), match="unable to vstack|unable to append"): + with pytest.raises( + (Exception, TypeError), + match="unable to vstack|unable to append|inputs should all have the same schema", + ): nw.concat([df_left, df_left.select("d")], how="vertical").collect() From 7b30df7ca29fc0252ec11c52d580cc83906548ec Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 24 Dec 2024 18:10:09 +0000 Subject: [PATCH 3/3] fix --- narwhals/_duckdb/dataframe.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 4f5803905..339fca137 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -104,12 +104,16 @@ def select( *exprs: Any, **named_exprs: Any, ) -> Self: - new_columns_map = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) - return self._from_native_frame( - self._native_frame.select( - *(val.alias(col) for col, val in new_columns_map.items()) + if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover + msg = ( + "`select`-ing not by name is not supported for DuckDB backend.\n\n" + "If you would like to see this kind of object better supported in " + "Narwhals, please open a feature request " + "at https://github.com/narwhals-dev/narwhals/issues." ) - ) + raise NotImplementedError(msg) + + return self._from_native_frame(self._native_frame.select(*exprs)) def __getattr__(self, attr: str) -> Any: if attr == "schema":