Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: add test for nw.Expr|Series.str.contains with literal=True #1670

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4178,15 +4178,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
>>> def agnostic_contains(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... default_match=nw.col("pets").str.contains("parrot|Dove"),
Expand All @@ -4196,16 +4198,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
... ),
... ).to_native()

We can then pass either pandas or Polars to `func`:
We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:

>>> my_library_agnostic_function(df_pd)
>>> agnostic_contains(df_pd)
pets default_match case_insensitive_match literal_match
0 cat False False False
1 dog False False False
2 rabbit and parrot True True False
3 dove False True False
4 None None None None
>>> my_library_agnostic_function(df_pl)

>>> agnostic_contains(df_pl)
shape: (5, 4)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match β”‚
Expand All @@ -4218,6 +4221,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
β”‚ dove ┆ false ┆ true ┆ false β”‚
β”‚ null ┆ null ┆ null ┆ null β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

>>> agnostic_contains(df_pa)
pyarrow.Table
pets: string
default_match: bool
case_insensitive_match: bool
literal_match: bool
----
pets: [["cat","dog","rabbit and parrot","dove",null]]
default_match: [[false,false,true,false,null]]
case_insensitive_match: [[false,false,true,true,null]]
literal_match: [[false,false,false,false,null]]
"""
return self._expr.__class__(
lambda plx: self._expr._to_compliant_expr(plx).str.contains(
Expand Down
22 changes: 18 additions & 4 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4137,29 +4137,31 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeriesT
>>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
>>> s_pd = pd.Series(pets)
>>> s_pl = pl.Series(pets)
>>> s_pa = pa.chunked_array([pets])

We define a dataframe-agnostic function:

>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
>>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT:
... s = nw.from_native(s_native, series_only=True)
... return s.str.contains("parrot|dove").to_native()

We can then pass either pandas or Polars to `func`:
We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:

>>> my_library_agnostic_function(s_pd)
>>> agnostic_contains(s_pd)
0 False
1 False
2 True
3 True
4 None
dtype: object

>>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE
>>> agnostic_contains(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (5,)
Series: '' [bool]
[
Expand All @@ -4169,6 +4171,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
true
null
]

>>> agnostic_contains(s_pa) # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
false,
false,
true,
true,
null
]
]
"""
return self._narwhals_series._from_compliant_series(
self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
Expand Down
52 changes: 36 additions & 16 deletions tests/expr_and_series/str/contains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", "Parrot|dove", None]}


def test_contains_case_insensitive(
Expand All @@ -17,12 +17,11 @@ def test_contains_case_insensitive(
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.with_columns(
nw.col("pets").str.contains("(?i)parrot|Dove").alias("result")
result = df.select(
nw.col("pets").str.contains("(?i)parrot|Dove").alias("case_insensitive_match")
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"result": [False, False, True, True],
"case_insensitive_match": [False, False, True, True, True, None],
}
assert_equal_data(result, expected)

Expand All @@ -34,31 +33,52 @@ def test_contains_series_case_insensitive(
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_columns(
case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove")
)
result = df.select(case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove"))
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_insensitive_match": [False, False, True, True],
"case_insensitive_match": [False, False, True, True, True, None],
}
assert_equal_data(result, expected)


def test_contains_case_sensitive(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.with_columns(nw.col("pets").str.contains("parrot|Dove").alias("result"))
result = df.select(nw.col("pets").str.contains("parrot|Dove").alias("default_match"))
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"result": [False, False, True, False],
"default_match": [False, False, True, False, False, None],
}
assert_equal_data(result, expected)


def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_columns(case_sensitive_match=df["pets"].str.contains("parrot|Dove"))
result = df.select(default_match=df["pets"].str.contains("parrot|Dove"))
expected = {
"default_match": [False, False, True, False, False, None],
}
assert_equal_data(result, expected)


def test_contains_literal(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.select(
nw.col("pets").str.contains("Parrot|dove").alias("default_match"),
nw.col("pets").str.contains("Parrot|dove", literal=True).alias("literal_match"),
)
expected = {
"default_match": [False, False, False, True, True, None],
"literal_match": [False, False, False, False, True, None],
}
assert_equal_data(result, expected)


def test_contains_series_literal(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.select(
default_match=df["pets"].str.contains("Parrot|dove"),
literal_match=df["pets"].str.contains("Parrot|dove", literal=True),
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_sensitive_match": [False, False, True, False],
"default_match": [False, False, False, True, True, None],
"literal_match": [False, False, False, False, True, None],
}
assert_equal_data(result, expected)
Loading