Skip to content

Commit

Permalink
Merge pull request #26 from MarcoGorelli/str
Browse files Browse the repository at this point in the history
Str namespace
  • Loading branch information
MarcoGorelli authored Mar 23, 2024
2 parents 7881416 + 9b09186 commit e193455
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 20 deletions.
14 changes: 14 additions & 0 deletions narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,20 @@ def sample(
)
)

@property
def str(self) -> ExprStringNamespace:
return ExprStringNamespace(self)


class ExprStringNamespace:
def __init__(self, expr: Expr) -> None:
self._expr = expr

def ends_with(self, suffix: str) -> Expr:
return self._expr.__class__(
lambda plx: self._expr._call(plx).str.ends_with(suffix)
)


def col(*names: str | Iterable[str]) -> Expr:
return Expr(lambda plx: plx.col(*names))
Expand Down
22 changes: 3 additions & 19 deletions narwhals/pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,11 @@ def alias(self, name: str) -> Self:
)

@property
def str(self) -> ExprStringNamespace:
return ExprStringNamespace(self)
def str(self) -> PandasExprStringNamespace:
return PandasExprStringNamespace(self)


class ExprStringNamespace:
class PandasExprStringNamespace:
def __init__(self, expr: PandasExpr) -> None:
self._expr = expr

Expand All @@ -229,19 +229,3 @@ def ends_with(self, suffix: str) -> PandasExpr:
output_names=self._expr._output_names,
implementation=self._expr._implementation,
)

def strip_chars(self, characters: str = " ") -> PandasExpr:
return PandasExpr(
lambda df: [
PandasSeries(
series._series.str.strip(characters),
implementation=df._implementation,
)
for series in self._expr._call(df)
],
depth=self._expr._depth + 1,
function_name=f"{self._expr._function_name}->str.strip_chars",
root_names=self._expr._root_names,
output_names=self._expr._output_names,
implementation=self._expr._implementation,
)
38 changes: 38 additions & 0 deletions tests/test_group_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import annotations

import pandas as pd
import polars as pl
import pytest

import narwhals as nw
from tests.utils import compare_dicts

df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})


def test_group_by_complex() -> None:
df = nw.LazyFrame(df_pandas)
with pytest.warns(UserWarning, match="complex group-by"):
result = nw.to_native(
df.group_by("a").agg((nw.col("b") - nw.col("z").mean()).mean()).sort("a")
)
expected = {"a": [1, 2, 3], "b": [-3.0, -3.0, -4.0]}
compare_dicts(result, expected)

df = nw.LazyFrame(df_lazy)
result = nw.to_native(
df.group_by("a").agg((nw.col("b") - nw.col("z").mean()).mean()).sort("a")
)
expected = {"a": [1, 2, 3], "b": [-3.0, -3.0, -4.0]}
compare_dicts(result, expected)


def test_invalid_group_by() -> None:
df = nw.LazyFrame(df_pandas)
with pytest.raises(RuntimeError, match="does your"):
df.group_by("a").agg(nw.col("b"))
with pytest.raises(
ValueError, match=r"Anonymous expressions are not supported in group_by\.agg"
):
df.group_by("a").agg(nw.all().mean())
38 changes: 38 additions & 0 deletions tests/test_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import annotations

import os
import warnings
from typing import Any

import pandas as pd
import polars as pl
import pytest

import narwhals as nw
from tests.utils import compare_dicts

df_pandas = pd.DataFrame({"a": ["fdas", "edfas"]})
df_polars = pl.LazyFrame({"a": ["fdas", "edfas"]})

if os.environ.get("CI", None):
import modin.pandas as mpd

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
df_mpd = mpd.DataFrame({"a": ["fdas", "edfas"]})
else:
df_mpd = df_pandas.copy()


@pytest.mark.parametrize(
"df_raw",
[df_pandas, df_polars, df_mpd],
)
def test_ends_with(df_raw: Any) -> None:
df = nw.LazyFrame(df_raw)
result = df.select(nw.col("a").str.ends_with("das"))
result_native = nw.to_native(result)
expected = {
"a": [True, False],
}
compare_dicts(result_native, expected)
2 changes: 1 addition & 1 deletion tpch/q2.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def q2(
.filter(nw.col("p_size") == var_1)
.filter(nw.col("p_type").str.ends_with(var_2))
.filter(nw.col("r_name") == var_3)
).cache()
)

final_cols = [
"s_acctbal",
Expand Down

0 comments on commit e193455

Please sign in to comment.