From 321c6e62d896060c24ff63b68c75dc92c4acd005 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 23 Mar 2024 11:28:52 +0000 Subject: [PATCH 1/2] wip str namespace --- narwhals/expression.py | 14 ++++++++++++++ narwhals/pandas_like/expr.py | 22 +++------------------- tpch/q2.py | 2 +- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/narwhals/expression.py b/narwhals/expression.py index b786b2493..440ee14e8 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -164,6 +164,20 @@ def sample( ) ) + @property + def str(self) -> ExprStringNamespace: + return ExprStringNamespace(self) + + +class ExprStringNamespace: + def __init__(self, expr: Expr) -> None: + self._expr = expr + + def ends_with(self, suffix: str) -> Expr: + return self._expr.__class__( + lambda plx: self._expr._call(plx).str.ends_with(suffix) + ) + def col(*names: str | Iterable[str]) -> Expr: return Expr(lambda plx: plx.col(*names)) diff --git a/narwhals/pandas_like/expr.py b/narwhals/pandas_like/expr.py index b2b64d2cf..e32feed65 100644 --- a/narwhals/pandas_like/expr.py +++ b/narwhals/pandas_like/expr.py @@ -204,11 +204,11 @@ def alias(self, name: str) -> Self: ) @property - def str(self) -> ExprStringNamespace: - return ExprStringNamespace(self) + def str(self) -> PandasExprStringNamespace: + return PandasExprStringNamespace(self) -class ExprStringNamespace: +class PandasExprStringNamespace: def __init__(self, expr: PandasExpr) -> None: self._expr = expr @@ -229,19 +229,3 @@ def ends_with(self, suffix: str) -> PandasExpr: output_names=self._expr._output_names, implementation=self._expr._implementation, ) - - def strip_chars(self, characters: str = " ") -> PandasExpr: - return PandasExpr( - lambda df: [ - PandasSeries( - series._series.str.strip(characters), - implementation=df._implementation, - ) - for series in self._expr._call(df) - ], - depth=self._expr._depth + 1, - function_name=f"{self._expr._function_name}->str.strip_chars", - root_names=self._expr._root_names, - output_names=self._expr._output_names, - implementation=self._expr._implementation, - ) diff --git a/tpch/q2.py b/tpch/q2.py index 933f96391..47dbe36bf 100644 --- a/tpch/q2.py +++ b/tpch/q2.py @@ -35,7 +35,7 @@ def q2( .filter(nw.col("p_size") == var_1) .filter(nw.col("p_type").str.ends_with(var_2)) .filter(nw.col("r_name") == var_3) - ).cache() + ) final_cols = [ "s_acctbal", From 9b0918628fed4d9f012f979d309a17243ddae114 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 23 Mar 2024 11:48:36 +0000 Subject: [PATCH 2/2] fixup --- tests/test_group_by.py | 38 ++++++++++++++++++++++++++++++++++++++ tests/test_str.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tests/test_group_by.py create mode 100644 tests/test_str.py diff --git a/tests/test_group_by.py b/tests/test_group_by.py new file mode 100644 index 000000000..95e45c7ec --- /dev/null +++ b/tests/test_group_by.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import pandas as pd +import polars as pl +import pytest + +import narwhals as nw +from tests.utils import compare_dicts + +df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) + + +def test_group_by_complex() -> None: + df = nw.LazyFrame(df_pandas) + with pytest.warns(UserWarning, match="complex group-by"): + result = nw.to_native( + df.group_by("a").agg((nw.col("b") - nw.col("z").mean()).mean()).sort("a") + ) + expected = {"a": [1, 2, 3], "b": [-3.0, -3.0, -4.0]} + compare_dicts(result, expected) + + df = nw.LazyFrame(df_lazy) + result = nw.to_native( + df.group_by("a").agg((nw.col("b") - nw.col("z").mean()).mean()).sort("a") + ) + expected = {"a": [1, 2, 3], "b": [-3.0, -3.0, -4.0]} + compare_dicts(result, expected) + + +def test_invalid_group_by() -> None: + df = nw.LazyFrame(df_pandas) + with pytest.raises(RuntimeError, match="does your"): + df.group_by("a").agg(nw.col("b")) + with pytest.raises( + ValueError, match=r"Anonymous expressions are not supported in group_by\.agg" + ): + df.group_by("a").agg(nw.all().mean()) diff --git a/tests/test_str.py b/tests/test_str.py new file mode 100644 index 000000000..55c3819b0 --- /dev/null +++ b/tests/test_str.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import os +import warnings +from typing import Any + +import pandas as pd +import polars as pl +import pytest + +import narwhals as nw +from tests.utils import compare_dicts + +df_pandas = pd.DataFrame({"a": ["fdas", "edfas"]}) +df_polars = pl.LazyFrame({"a": ["fdas", "edfas"]}) + +if os.environ.get("CI", None): + import modin.pandas as mpd + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + df_mpd = mpd.DataFrame({"a": ["fdas", "edfas"]}) +else: + df_mpd = df_pandas.copy() + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_polars, df_mpd], +) +def test_ends_with(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.select(nw.col("a").str.ends_with("das")) + result_native = nw.to_native(result) + expected = { + "a": [True, False], + } + compare_dicts(result_native, expected)