narwhals-dev · camriddell · Dec 19, 2024 · Dec 20, 2024 · Dec 27, 2024 · Dec 27, 2024
diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md
@@ -33,6 +33,7 @@
         - is_in
         - is_last_distinct
         - is_null
+        - is_nan
         - is_unique
         - len
         - map_batches

diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md
@@ -39,6 +39,7 @@
         - is_in
         - is_last_distinct
         - is_null
+        - is_nan
         - is_sorted
         - is_unique
         - item

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -312,6 +312,9 @@ def null_count(self: Self) -> Self:
     def is_null(self: Self) -> Self:
         return reuse_series_implementation(self, "is_null")
 
+    def is_nan(self: Self) -> Self:
+        return reuse_series_implementation(self, "is_nan")
+
     def is_between(self: Self, lower_bound: Any, upper_bound: Any, closed: str) -> Self:
         return reuse_series_implementation(
             self, "is_between", lower_bound, upper_bound, closed

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -514,6 +514,12 @@ def is_null(self: Self) -> Self:
         ser = self._native_series
         return self._from_native_series(ser.is_null())
 
+    def is_nan(self: Self) -> Self:
+        import pyarrow.compute as pc
+
+        ser = self._native_series
+        return self._from_native_series(pc.is_nan(ser))
+
     def cast(self: Self, dtype: DType) -> Self:
         import pyarrow.compute as pc
 

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -689,6 +689,25 @@ def is_null(self: Self) -> Self:
             returns_scalar=False,
         )
 
+    def is_nan(self: Self) -> Self:
+        def func(_input: dask_expr.Series) -> dask_expr.Series:
+            dtype = native_to_narwhals_dtype(_input, self._version, Implementation.DASK)
+            dtypes = import_dtypes_module(self._version)
+            if dtype == dtypes.Float64:
+                return _input != _input  # noqa: PLR0124
+
+            import dask_expr as dx
+
+            return dx.new_collection(
+                dx.expr.ScalarToSeries(frame=False, index=_input.index)
+            )
+
+        return self._from_call(
+            func,
+            "is_null",
+            returns_scalar=False,
+        )
+
     def len(self: Self) -> Self:
         return self._from_call(
             lambda _input: _input.size,

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
@@ -306,6 +306,9 @@ def is_between(
     def is_null(self) -> Self:
         return reuse_series_implementation(self, "is_null")
 
+    def is_nan(self) -> Self:
+        return reuse_series_implementation(self, "is_nan")
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -654,6 +654,17 @@ def is_null(self) -> PandasLikeSeries:
         ser = self._native_series
         return self._from_native_series(ser.isna())
 
+    def is_nan(self) -> PandasLikeSeries:
+        ser = self._native_series
+        dtypes = import_dtypes_module(self._version)
+        if self.dtype == dtypes.Float64:
+            return self._from_native_series(ser != ser)  # noqa: PLR0124
+        return self._from_native_series(
+            self._implementation.to_native_namespace().Series(
+                data=False, index=ser.index, name=ser.name
+            )
+        )
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -1921,6 +1921,78 @@ def is_null(self) -> Self:
         """
         return self.__class__(lambda plx: self._to_compliant_expr(plx).is_null())
 
+    def is_nan(self) -> Self:
+        """Returns a boolean Series indicating which values are NaN.
+
+        Returns:
+            A new expression.
+
+        Notes:
+            pandas, Polars and PyArrow handle null values differently. Polars and PyArrow
+            distinguish between NaN and Null, whereas pandas doesn't.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> from narwhals.typing import IntoFrameT
+            >>> df_pd = pd.DataFrame(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... ).astype({"a": "Int64"})
+            >>> df_pl = pl.DataFrame(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
+            >>> df_pa = pa.table(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
+
+            Let's define a dataframe-agnostic function:
+
+            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            ...     df = nw.from_native(df_native)
+            ...     return df.with_columns(
+            ...         a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
+            ...     ).to_native()
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> my_library_agnostic_function(df_pd)
+                  a    b  a_is_nan  b_is_nan
+            0     2  2.0     False     False
+            1     4  4.0     False     False
+            2  <NA>  NaN     False      True
+            3     3  3.0     False     False
+            4     5  5.0     False     False
+
+            >>> my_library_agnostic_function(df_pl)  # nan != null for polars
+            shape: (5, 4)
+            ┌──────┬─────┬──────────┬──────────┐
+            │ a    ┆ b   ┆ a_is_nan ┆ b_is_nan │
+            │ ---  ┆ --- ┆ ---      ┆ ---      │
+            │ i64  ┆ f64 ┆ bool     ┆ bool     │
+            ╞══════╪═════╪══════════╪══════════╡
+            │ 2    ┆ 2.0 ┆ false    ┆ false    │
+            │ 4    ┆ 4.0 ┆ false    ┆ false    │
+            │ null ┆ NaN ┆ false    ┆ true     │
+            │ 3    ┆ 3.0 ┆ false    ┆ false    │
+            │ 5    ┆ 5.0 ┆ false    ┆ false    │
+            └──────┴─────┴──────────┴──────────┘
+
+            >>> my_library_agnostic_function(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: int64
+            b: double
+            a_is_nan: bool
+            b_is_nan: bool
+            ----
+            a: [[2,4,null,3,5]]
+            b: [[2,4,nan,3,5]]
+            a_is_nan: [[false,false,null,false,false]]
+            b_is_nan: [[false,false,true,false,false]]
+        """
+        return self.__class__(lambda plx: self._to_compliant_expr(plx).is_nan())
+
     def arg_true(self) -> Self:
         """Find elements where boolean expression is True.
 

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -1915,6 +1915,46 @@ def is_null(self) -> Self:
         """
         return self._from_compliant_series(self._compliant_series.is_null())
 
+    def is_nan(self) -> Self:
+        """Returns a boolean Series indicating which values are null.
+
+        Notes:
+            pandas and Polars handle NaN values differently. Polars distinguishes
+            between NaN and Null, whereas pandas doesn't.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> from narwhals.typing import IntoSeriesT
+            >>> s = [1.0, 2.0, float("nan")]
+            >>> s_pd = pd.Series(s, dtype="float64")
+            >>> s_pl = pl.Series(s)
+
+            We define a dataframe-agnostic function:
+
+            >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+            ...     s = nw.from_native(s_native, series_only=True)
+            ...     return s.is_nan().to_native()
+
+            We can then pass either pandas or Polars to `func`:
+
+            >>> my_library_agnostic_function(s_pd)
+            0    False
+            1    False
+            2     True
+            dtype: bool
+            >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            shape: (3,)
+            Series: '' [bool]
+            [
+               false
+               false
+               true
+            ]
+        """
+        return self._from_compliant_series(self._compliant_series.is_nan())
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+import narwhals.stable.v1 as nw
+from tests.conftest import dask_lazy_p2_constructor
+from tests.conftest import pandas_constructor
+from tests.utils import Constructor
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+
+NON_NULLABLE_CONSTRUCTORS = [pandas_constructor, dask_lazy_p2_constructor]
+
+
+def test_nan(constructor: Constructor) -> None:
+    data_na = {"a": [0, 1, None]}
+    df = nw.from_native(constructor(data_na)).select(nw.col("a") / nw.col("a"))
+    result = df.select(nw.col("a").is_nan())
+    if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS):
+        # Null values are coerced to NaN for non-nullable datatypes
+        expected = {"a": [True, False, True]}
+    else:
+        # Null are preserved and should be differentiated for nullable datatypes
+        expected = {"a": [True, False, None]}  # type: ignore[list-item]
+
+    assert_equal_data(result, expected)
+
+
+def test_nan_series(constructor_eager: ConstructorEager) -> None:
+    data_na = {"a": [0, 1, None]}
+    df = nw.from_native(constructor_eager(data_na), eager_only=True).select(
+        nw.col("a") / nw.col("a")
+    )
+    result = {"a": df["a"].is_nan()}
+    if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
+        # Null values are coerced to NaN for non-nullable datatypes
+        expected = {"a": [True, False, True]}
+    else:
+        # Null are preserved for nullable datatypes
+        expected = {"a": [True, False, None]}  # type: ignore[list-item]
+
+    assert_equal_data(result, expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,6 +33,7 @@ @@
             - is_in
             - is_last_distinct
             - is_null
+            - is_nan
             - is_unique
             - len
             - map_batches
@@ Expand Down @@