test: add test for nw.Expr|Series.str.contains with literal=True (#…

…1670)
narwhals-dev · Dec 28, 2024 · f26b765 · f26b765
1 parent 74e8f95
commit f26b765
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 24 deletions.
diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -4178,15 +4178,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from narwhals.typing import IntoFrameT
             >>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             We define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_contains(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.with_columns(
             ...         default_match=nw.col("pets").str.contains("parrot|Dove"),
@@ -4196,16 +4198,17 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
             ...         ),
             ...     ).to_native()
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:
 
-            >>> my_library_agnostic_function(df_pd)
+            >>> agnostic_contains(df_pd)
                             pets default_match case_insensitive_match literal_match
             0                cat         False                  False         False
             1                dog         False                  False         False
             2  rabbit and parrot          True                   True         False
             3               dove         False                   True         False
             4               None          None                   None          None
-            >>> my_library_agnostic_function(df_pl)
+
+            >>> agnostic_contains(df_pl)
             shape: (5, 4)
             ┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
             │ pets              ┆ default_match ┆ case_insensitive_match ┆ literal_match │
@@ -4218,6 +4221,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT:
             │ dove              ┆ false         ┆ true                   ┆ false         │
             │ null              ┆ null          ┆ null                   ┆ null          │
             └───────────────────┴───────────────┴────────────────────────┴───────────────┘
+
+            >>> agnostic_contains(df_pa)
+            pyarrow.Table
+            pets: string
+            default_match: bool
+            case_insensitive_match: bool
+            literal_match: bool
+            ----
+            pets: [["cat","dog","rabbit and parrot","dove",null]]
+            default_match: [[false,false,true,false,null]]
+            case_insensitive_match: [[false,false,true,true,null]]
+            literal_match: [[false,false,false,false,null]]
         """
         return self._expr.__class__(
             lambda plx: self._expr._to_compliant_expr(plx).str.contains(

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -4137,29 +4137,31 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from narwhals.typing import IntoSeriesT
             >>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
             >>> s_pd = pd.Series(pets)
             >>> s_pl = pl.Series(pets)
+            >>> s_pa = pa.chunked_array([pets])
 
             We define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+            >>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT:
             ...     s = nw.from_native(s_native, series_only=True)
             ...     return s.str.contains("parrot|dove").to_native()
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`:
 
-            >>> my_library_agnostic_function(s_pd)
+            >>> agnostic_contains(s_pd)
             0    False
             1    False
             2     True
             3     True
             4     None
             dtype: object
 
-            >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            >>> agnostic_contains(s_pl)  # doctest: +NORMALIZE_WHITESPACE
             shape: (5,)
             Series: '' [bool]
             [
@@ -4169,6 +4171,18 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT:
                true
                null
             ]
+
+            >>> agnostic_contains(s_pa)  # doctest: +ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                false,
+                false,
+                true,
+                true,
+                null
+              ]
+            ]
         """
         return self._narwhals_series._from_compliant_series(
             self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)

diff --git a/tests/expr_and_series/str/contains_test.py b/tests/expr_and_series/str/contains_test.py
@@ -7,7 +7,7 @@
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
-data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
+data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", "Parrot|dove", None]}
 
 
 def test_contains_case_insensitive(
@@ -17,12 +17,11 @@ def test_contains_case_insensitive(
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(data))
-    result = df.with_columns(
-        nw.col("pets").str.contains("(?i)parrot|Dove").alias("result")
+    result = df.select(
+        nw.col("pets").str.contains("(?i)parrot|Dove").alias("case_insensitive_match")
     )
     expected = {
-        "pets": ["cat", "dog", "rabbit and parrot", "dove"],
-        "result": [False, False, True, True],
+        "case_insensitive_match": [False, False, True, True, True, None],
     }
     assert_equal_data(result, expected)
 
@@ -34,31 +33,52 @@ def test_contains_series_case_insensitive(
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df.with_columns(
-        case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove")
-    )
+    result = df.select(case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove"))
     expected = {
-        "pets": ["cat", "dog", "rabbit and parrot", "dove"],
-        "case_insensitive_match": [False, False, True, True],
+        "case_insensitive_match": [False, False, True, True, True, None],
     }
     assert_equal_data(result, expected)
 
 
 def test_contains_case_sensitive(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
-    result = df.with_columns(nw.col("pets").str.contains("parrot|Dove").alias("result"))
+    result = df.select(nw.col("pets").str.contains("parrot|Dove").alias("default_match"))
     expected = {
-        "pets": ["cat", "dog", "rabbit and parrot", "dove"],
-        "result": [False, False, True, False],
+        "default_match": [False, False, True, False, False, None],
     }
     assert_equal_data(result, expected)
 
 
 def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df.with_columns(case_sensitive_match=df["pets"].str.contains("parrot|Dove"))
+    result = df.select(default_match=df["pets"].str.contains("parrot|Dove"))
+    expected = {
+        "default_match": [False, False, True, False, False, None],
+    }
+    assert_equal_data(result, expected)
+
+
+def test_contains_literal(constructor: Constructor) -> None:
+    df = nw.from_native(constructor(data))
+    result = df.select(
+        nw.col("pets").str.contains("Parrot|dove").alias("default_match"),
+        nw.col("pets").str.contains("Parrot|dove", literal=True).alias("literal_match"),
+    )
+    expected = {
+        "default_match": [False, False, False, True, True, None],
+        "literal_match": [False, False, False, False, True, None],
+    }
+    assert_equal_data(result, expected)
+
+
+def test_contains_series_literal(constructor_eager: ConstructorEager) -> None:
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df.select(
+        default_match=df["pets"].str.contains("Parrot|dove"),
+        literal_match=df["pets"].str.contains("Parrot|dove", literal=True),
+    )
     expected = {
-        "pets": ["cat", "dog", "rabbit and parrot", "dove"],
-        "case_sensitive_match": [False, False, True, False],
+        "default_match": [False, False, False, True, True, None],
+        "literal_match": [False, False, False, False, True, None],
     }
     assert_equal_data(result, expected)