Skip to content

Commit

Permalink
test: add test for nw.Expr|Series.str.contains with literal=True (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessandroMiola authored Dec 28, 2024
1 parent 74e8f95 commit f26b765
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 24 deletions.
23 changes: 19 additions & 4 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4178,15 +4178,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
>>> def agnostic_contains(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... default_match=nw.col("pets").str.contains("parrot|Dove"),
Expand All @@ -4196,16 +4198,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
... ),
... ).to_native()
We can then pass either pandas or Polars to `func`:
We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:
>>> my_library_agnostic_function(df_pd)
>>> agnostic_contains(df_pd)
pets default_match case_insensitive_match literal_match
0 cat False False False
1 dog False False False
2 rabbit and parrot True True False
3 dove False True False
4 None None None None
>>> my_library_agnostic_function(df_pl)
>>> agnostic_contains(df_pl)
shape: (5, 4)
┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
│ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │
Expand All @@ -4218,6 +4221,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
│ dove ┆ false ┆ true ┆ false │
│ null ┆ null ┆ null ┆ null │
└───────────────────┴───────────────┴────────────────────────┴───────────────┘
>>> agnostic_contains(df_pa)
pyarrow.Table
pets: string
default_match: bool
case_insensitive_match: bool
literal_match: bool
----
pets: [["cat","dog","rabbit and parrot","dove",null]]
default_match: [[false,false,true,false,null]]
case_insensitive_match: [[false,false,true,true,null]]
literal_match: [[false,false,false,false,null]]
"""
return self._expr.__class__(
lambda plx: self._expr._to_compliant_expr(plx).str.contains(
Expand Down
22 changes: 18 additions & 4 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4137,29 +4137,31 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeriesT
>>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
>>> s_pd = pd.Series(pets)
>>> s_pl = pl.Series(pets)
>>> s_pa = pa.chunked_array([pets])
We define a dataframe-agnostic function:
>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
>>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT:
... s = nw.from_native(s_native, series_only=True)
... return s.str.contains("parrot|dove").to_native()
We can then pass either pandas or Polars to `func`:
We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:
>>> my_library_agnostic_function(s_pd)
>>> agnostic_contains(s_pd)
0 False
1 False
2 True
3 True
4 None
dtype: object
>>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE
>>> agnostic_contains(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (5,)
Series: '' [bool]
[
Expand All @@ -4169,6 +4171,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
true
null
]
>>> agnostic_contains(s_pa) # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
false,
false,
true,
true,
null
]
]
"""
return self._narwhals_series._from_compliant_series(
self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
Expand Down
52 changes: 36 additions & 16 deletions tests/expr_and_series/str/contains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", "Parrot|dove", None]}


def test_contains_case_insensitive(
Expand All @@ -17,12 +17,11 @@ def test_contains_case_insensitive(
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.with_columns(
nw.col("pets").str.contains("(?i)parrot|Dove").alias("result")
result = df.select(
nw.col("pets").str.contains("(?i)parrot|Dove").alias("case_insensitive_match")
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"result": [False, False, True, True],
"case_insensitive_match": [False, False, True, True, True, None],
}
assert_equal_data(result, expected)

Expand All @@ -34,31 +33,52 @@ def test_contains_series_case_insensitive(
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_columns(
case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove")
)
result = df.select(case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove"))
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_insensitive_match": [False, False, True, True],
"case_insensitive_match": [False, False, True, True, True, None],
}
assert_equal_data(result, expected)


def test_contains_case_sensitive(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.with_columns(nw.col("pets").str.contains("parrot|Dove").alias("result"))
result = df.select(nw.col("pets").str.contains("parrot|Dove").alias("default_match"))
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"result": [False, False, True, False],
"default_match": [False, False, True, False, False, None],
}
assert_equal_data(result, expected)


def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_columns(case_sensitive_match=df["pets"].str.contains("parrot|Dove"))
result = df.select(default_match=df["pets"].str.contains("parrot|Dove"))
expected = {
"default_match": [False, False, True, False, False, None],
}
assert_equal_data(result, expected)


def test_contains_literal(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.select(
nw.col("pets").str.contains("Parrot|dove").alias("default_match"),
nw.col("pets").str.contains("Parrot|dove", literal=True).alias("literal_match"),
)
expected = {
"default_match": [False, False, False, True, True, None],
"literal_match": [False, False, False, False, True, None],
}
assert_equal_data(result, expected)


def test_contains_series_literal(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.select(
default_match=df["pets"].str.contains("Parrot|dove"),
literal_match=df["pets"].str.contains("Parrot|dove", literal=True),
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_sensitive_match": [False, False, True, False],
"default_match": [False, False, False, True, True, None],
"literal_match": [False, False, False, False, True, None],
}
assert_equal_data(result, expected)

0 comments on commit f26b765

Please sign in to comment.