narwhals-dev · MarcoGorelli · Jan 3, 2025 · Dec 19, 2024 · Dec 20, 2024 · Dec 27, 2024
diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md
@@ -32,6 +32,7 @@
         - is_first_distinct
         - is_in
         - is_last_distinct
+        - is_nan
         - is_null
         - is_unique
         - len

diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md
@@ -38,6 +38,7 @@
         - is_first_distinct
         - is_in
         - is_last_distinct
+        - is_nan
         - is_null
         - is_sorted
         - is_unique

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -317,6 +317,9 @@ def null_count(self: Self) -> Self:
     def is_null(self: Self) -> Self:
         return reuse_series_implementation(self, "is_null")
 
+    def is_nan(self: Self) -> Self:
+        return reuse_series_implementation(self, "is_nan")
+
     def is_between(self: Self, lower_bound: Any, upper_bound: Any, closed: str) -> Self:
         return reuse_series_implementation(
             self,

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -520,6 +520,11 @@ def is_null(self: Self) -> Self:
         ser = self._native_series
         return self._from_native_series(ser.is_null())
 
+    def is_nan(self: Self) -> Self:
+        import pyarrow.compute as pc
+
+        return self._from_native_series(pc.is_nan(self._native_series))
+
     def cast(self: Self, dtype: DType) -> Self:
         import pyarrow.compute as pc
 

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -16,6 +16,7 @@
 from narwhals._pandas_like.utils import calculate_timestamp_datetime
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
 from narwhals.exceptions import ColumnNotFoundError
+from narwhals.exceptions import InvalidOperationError
 from narwhals.typing import CompliantExpr
 from narwhals.utils import Implementation
 from narwhals.utils import generate_temporary_column_name
@@ -706,6 +707,20 @@ def is_null(self: Self) -> Self:
             returns_scalar=self._returns_scalar,
         )
 
+    def is_nan(self: Self) -> Self:
+        def func(_input: dask_expr.Series) -> dask_expr.Series:
+            dtype = native_to_narwhals_dtype(_input, self._version, self._implementation)
+            if dtype.is_numeric():
+                return _input != _input  # noqa: PLR0124
+            msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?"
+            raise InvalidOperationError(msg)
+
+        return self._from_call(
+            func,
+            "is_null",
+            returns_scalar=self._returns_scalar,
+        )
+
     def len(self: Self) -> Self:
         return self._from_call(
             lambda _input: _input.size,

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
@@ -311,6 +311,9 @@ def is_between(
     def is_null(self) -> Self:
         return reuse_series_implementation(self, "is_null")
 
+    def is_nan(self) -> Self:
+        return reuse_series_implementation(self, "is_nan")
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -20,6 +20,7 @@
 from narwhals._pandas_like.utils import set_index
 from narwhals._pandas_like.utils import to_datetime
 from narwhals.dependencies import is_numpy_scalar
+from narwhals.exceptions import InvalidOperationError
 from narwhals.typing import CompliantSeries
 from narwhals.utils import Implementation
 from narwhals.utils import import_dtypes_module
@@ -623,8 +624,6 @@ def mean(self) -> Any:
         return ser.mean()
 
     def median(self) -> Any:
-        from narwhals.exceptions import InvalidOperationError
-
         if not self.dtype.is_numeric():
             msg = "`median` operation not supported for non-numeric input type."
             raise InvalidOperationError(msg)
@@ -663,6 +662,13 @@ def is_null(self) -> PandasLikeSeries:
         ser = self._native_series
         return self._from_native_series(ser.isna())
 
+    def is_nan(self) -> PandasLikeSeries:
+        ser = self._native_series
+        if self.dtype.is_numeric():
+            return self._from_native_series(ser != ser)  # noqa: PLR0124
+        msg = f"`.is_nan` only supported for numeric dtype and not {self.dtype}, did you mean `.is_null`?"
+        raise InvalidOperationError(msg)
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py
@@ -79,6 +79,15 @@ def ewm_mean(
             )
         return self._from_native_expr(native_expr)
 
+    def is_nan(self: Self) -> Self:
+        if self._backend_version < (1, 18):  # pragma: no cover
+            import polars as pl
+
+            return self._from_native_expr(
+                pl.when(self._native_expr.is_not_null()).then(self._native_expr.is_nan())
+            )
+        return self._from_native_expr(self._native_expr.is_nan())
+
     def rolling_var(
         self: Self,
         window_size: int,

diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py
@@ -220,6 +220,19 @@ def __rpow__(self: Self, other: PolarsSeries | Any) -> Self:
     def __invert__(self: Self) -> Self:
         return self._from_native_series(self._native_series.__invert__())
 
+    def is_nan(self: Self) -> Self:
+        import polars as pl
+
+        native = self._native_series
+
+        if self._backend_version < (1, 18):  # pragma: no cover
+            return self._from_native_series(
+                pl.select(pl.when(native.is_not_null()).then(native.is_nan()))[
+                    native.name
+                ]
+            )
+        return self._from_native_series(native.is_nan())
+
     def median(self: Self) -> Any:
         from narwhals.exceptions import InvalidOperationError
 

diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -1925,6 +1925,70 @@ def is_null(self) -> Self:
         """
         return self.__class__(lambda plx: self._to_compliant_expr(plx).is_null())
 
+    def is_nan(self) -> Self:
+        """Indicate which values are NaN.
+
+        Returns:
+            A new expression.
+
+        Notes:
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../pandas_like_concepts/null_handling.md/)
+            for reference.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> from narwhals.typing import IntoFrameT
+            >>> data = {"orig": [0.0, None, 2.0]}
+            >>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"})
+            >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
+
+            Let's define a dataframe-agnostic function:
+
+            >>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT:
+            ...     df = nw.from_native(df_native)
+            ...     return df.with_columns(
+            ...         divided=nw.col("orig") / nw.col("orig"),
+            ...         divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(),
+            ...     ).to_native()
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_self_div_is_nan`:
+
+            >>> print(agnostic_self_div_is_nan(df_pd))
+               orig  divided  divided_is_nan
+            0   0.0      NaN            True
+            1  <NA>     <NA>            <NA>
+            2   2.0      1.0           False
+
+            >>> print(agnostic_self_div_is_nan(df_pl))
+            shape: (3, 3)
+            ┌──────┬─────────┬────────────────┐
+            │ orig ┆ divided ┆ divided_is_nan │
+            │ ---  ┆ ---     ┆ ---            │
+            │ f64  ┆ f64     ┆ bool           │
+            ╞══════╪═════════╪════════════════╡
+            │ 0.0  ┆ NaN     ┆ true           │
+            │ null ┆ null    ┆ null           │
+            │ 2.0  ┆ 1.0     ┆ false          │
+            └──────┴─────────┴────────────────┘
+
+            >>> print(agnostic_self_div_is_nan(df_pa))
+            pyarrow.Table
+            orig: double
+            divided: double
+            divided_is_nan: bool
+            ----
+            orig: [[0,null,2]]
+            divided: [[nan,null,1]]
+            divided_is_nan: [[true,null,false]]
+
+        """
+        return self.__class__(lambda plx: self._to_compliant_expr(plx).is_nan())
+
     def arg_true(self) -> Self:
         """Find elements where boolean expression is True.
 

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -2437,6 +2437,59 @@ def is_null(self) -> Self:
         """
         return self._from_compliant_series(self._compliant_series.is_null())
 
+    def is_nan(self) -> Self:
+        """Returns a boolean Series indicating which values are NaN.
+
+        Returns:
+            A boolean Series indicating which values are NaN.
+
+        Notes:
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../pandas_like_concepts/null_handling.md/)
+            for reference.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> from narwhals.typing import IntoSeriesT
+
+            >>> data = [0.0, None, 2.0]
+            >>> s_pd = pd.Series(data, dtype="Float64")
+            >>> s_pl = pl.Series(data)
+            >>> s_pa = pa.chunked_array([data], type=pa.float64())
+
+            >>> def agnostic_self_div_is_nan(s_native: IntoSeriesT) -> IntoSeriesT:
+            ...     s = nw.from_native(s_native, series_only=True)
+            ...     return s.is_nan().to_native()
+
+            >>> print(agnostic_self_div_is_nan(s_pd))
+            0    False
+            1     <NA>
+            2    False
+            dtype: boolean
+
+            >>> print(agnostic_self_div_is_nan(s_pl))  # doctest: +NORMALIZE_WHITESPACE
+            shape: (3,)
+            Series: '' [bool]
+            [
+                    false
+                    null
+                    false
+            ]
+
+            >>> print(agnostic_self_div_is_nan(s_pa))  # doctest: +NORMALIZE_WHITESPACE
+            [
+              [
+                false,
+                null,
+                false
+              ]
+            ]
+        """
+        return self._from_compliant_series(self._compliant_series.is_nan())
+
     def fill_null(
         self,
         value: Any | None = None,

diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py
@@ -0,0 +1,119 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+import narwhals.stable.v1 as nw
+from tests.conftest import dask_lazy_p1_constructor
+from tests.conftest import dask_lazy_p2_constructor
+from tests.conftest import modin_constructor
+from tests.conftest import pandas_constructor
+from tests.utils import Constructor
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+
+NON_NULLABLE_CONSTRUCTORS = [
+    pandas_constructor,
+    dask_lazy_p1_constructor,
+    dask_lazy_p2_constructor,
+    modin_constructor,
+]
+
+
+def test_nan(constructor: Constructor) -> None:
+    data_na = {"int": [0, 1, None]}
+    df = nw.from_native(constructor(data_na)).with_columns(
+        float=nw.col("int").cast(nw.Float64), float_na=nw.col("int") / nw.col("int")
+    )
+    result = df.select(
+        int=nw.col("int").is_nan(),
+        float=nw.col("float").is_nan(),
+        float_na=nw.col("float_na").is_nan(),
+    )
+
+    expected: dict[str, list[Any]]
+    if any(constructor is c for c in NON_NULLABLE_CONSTRUCTORS):
+        # Null values are coerced to NaN for non-nullable datatypes
+        expected = {
+            "int": [False, False, True],
+            "float": [False, False, True],
+            "float_na": [True, False, True],
+        }
+    else:
+        # Null are preserved and should be differentiated for nullable datatypes
+        expected = {
+            "int": [False, False, None],
+            "float": [False, False, None],
+            "float_na": [True, False, None],
+        }
+
+    assert_equal_data(result, expected)
+
+
+def test_nan_series(constructor_eager: ConstructorEager) -> None:
+    data_na = {"int": [0, 1, None]}
+    df = nw.from_native(constructor_eager(data_na), eager_only=True).with_columns(
+        float=nw.col("int").cast(nw.Float64), float_na=nw.col("int") / nw.col("int")
+    )
+
+    result = {
+        "int": df["int"].is_nan(),
+        "float": df["float"].is_nan(),
+        "float_na": df["float_na"].is_nan(),
+    }
+    expected: dict[str, list[Any]]
+    if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
+        # Null values are coerced to NaN for non-nullable datatypes
+        expected = {
+            "int": [False, False, True],
+            "float": [False, False, True],
+            "float_na": [True, False, True],
+        }
+    else:
+        # Null are preserved and should be differentiated for nullable datatypes
+        expected = {
+            "int": [False, False, None],
+            "float": [False, False, None],
+            "float_na": [True, False, None],
+        }
+
+    assert_equal_data(result, expected)
+
+
+def test_nan_non_float(constructor: Constructor) -> None:
+    from polars.exceptions import InvalidOperationError as PlInvalidOperationError
+    from pyarrow.lib import ArrowNotImplementedError
+
+    from narwhals.exceptions import InvalidOperationError as NwInvalidOperationError
+
+    data = {"a": ["x", "y"]}
+    df = nw.from_native(constructor(data))
+
+    exc = NwInvalidOperationError
+    if "polars" in str(constructor):
+        exc = PlInvalidOperationError
+    elif "pyarrow_table" in str(constructor):
+        exc = ArrowNotImplementedError
+
+    with pytest.raises(exc):
+        df.select(nw.col("a").is_nan()).lazy().collect()
+
+
+def test_nan_non_float_series(constructor_eager: ConstructorEager) -> None:
+    from polars.exceptions import InvalidOperationError as PlInvalidOperationError
+    from pyarrow.lib import ArrowNotImplementedError
+
+    from narwhals.exceptions import InvalidOperationError as NwInvalidOperationError
+
+    data = {"a": ["x", "y"]}
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+
+    exc = NwInvalidOperationError
+    if "polars" in str(constructor_eager):
+        exc = PlInvalidOperationError
+    elif "pyarrow_table" in str(constructor_eager):
+        exc = ArrowNotImplementedError
+
+    with pytest.raises(exc):
+        df["a"].is_nan()
-Original file line number
+Diff line change
@@ Expand Up / @@ -32,6 +32,7 @@ @@
             - is_first_distinct
             - is_in
             - is_last_distinct
+            - is_nan
             - is_null
             - is_unique
             - len
@@ Expand Down @@