From 89fe3f4c370210537aff0deffaf6a90626fbbf69 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 1 Nov 2024 19:13:28 +0100 Subject: [PATCH] is-finite for eager --- docs/api-reference/expr.md | 1 + docs/api-reference/series.md | 1 + narwhals/_arrow/expr.py | 3 ++ narwhals/_arrow/series.py | 5 ++++ narwhals/_pandas_like/expr.py | 3 ++ narwhals/_pandas_like/series.py | 5 ++++ narwhals/expr.py | 51 +++++++++++++++++++++++++++++++++ narwhals/series.py | 46 +++++++++++++++++++++++++++++ 8 files changed, 115 insertions(+) diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index 7188b2c36..99cbd8762 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -21,6 +21,7 @@ - clip - is_between - is_duplicated + - is_finite - is_first_distinct - is_in - is_last_distinct diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index e8572dda8..2a8dba1af 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -26,6 +26,7 @@ - is_between - is_duplicated - is_empty + - is_finite - is_first_distinct - is_in - is_last_distinct diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 35e936d72..3dab97678 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -372,6 +372,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") + def is_finite(self: Self) -> Self: + return reuse_series_implementation(self, "is_finite") + @property def dt(self: Self) -> ArrowExprDateTimeNamespace: return ArrowExprDateTimeNamespace(self) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 70009df43..c20276b1b 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -721,6 +721,11 @@ def mode(self: Self) -> ArrowSeries: plx.col(col_token) == plx.col(col_token).max() )[self.name] + def is_finite(self: Self) -> Self: + import pyarrow.compute as pc # ignore-banned-import + + return self._from_native_series(pc.is_finite(self._native_series)) + def __iter__(self: Self) -> Iterator[Any]: yield from self._native_series.__iter__() diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index a58597eea..f5512219d 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -387,6 +387,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") + def is_finite(self: Self) -> Self: + return reuse_series_implementation(self, "is_finite") + @property def str(self: Self) -> PandasLikeExprStringNamespace: return PandasLikeExprStringNamespace(self) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 35df78e2f..c2c525786 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -686,6 +686,11 @@ def mode(self: Self) -> Self: def __iter__(self: Self) -> Iterator[Any]: yield from self._native_series.__iter__() + def is_finite(self: Self) -> Self: + import numpy as np # ignore-banned-import + + return self._from_native_series(np.isfinite(self._native_series)) + @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) diff --git a/narwhals/expr.py b/narwhals/expr.py index 6c2d28962..077e4841d 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2313,6 +2313,57 @@ def mode(self: Self) -> Self: """ return self.__class__(lambda plx: self._call(plx).mode()) + def is_finite(self: Self) -> Self: + """ + Returns a boolean Series indicating which values are finite. + + Returns: + Expression of `Boolean` data type. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = { + ... "a": [1.0, 2], + ... "b": [3.0, float("inf")], + ... } + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.all().is_finite()) + + We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + + >>> func(pd.DataFrame(data)) + a b + 0 True True + 1 True False + + >>> func(pl.DataFrame(data)) + shape: (2, 2) + ┌──────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞══════╪═══════╡ + │ true ┆ true │ + │ true ┆ false │ + └──────┴───────┘ + + >>> func(pa.table(data)) + pyarrow.Table + a: bool + b: bool + ---- + a: [[true,true]] + b: [[true,false]] + """ + return self.__class__(lambda plx: self._call(plx).is_finite()) + @property def str(self: Self) -> ExprStringNamespace[Self]: return ExprStringNamespace(self) diff --git a/narwhals/series.py b/narwhals/series.py index 6f5223202..79460408a 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2525,6 +2525,52 @@ def mode(self: Self) -> Self: """ return self._from_compliant_series(self._compliant_series.mode()) + def is_finite(self: Self) -> Self: + """ + Returns a boolean Series indicating which values are finite. + + Returns: + Expression of `Boolean` data type. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = [1.0, float("inf")] + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.is_finite() + + We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + + >>> func(pd.Series(data)) + 0 True + 1 False + dtype: bool + + >>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [bool] + [ + true + false + ] + + >>> func(pa.chunked_array([data])) # doctest: +ELLIPSIS + + [ + [ + true, + false + ] + ] + """ + return self._from_compliant_series(self._compliant_series.is_finite()) + def __iter__(self: Self) -> Iterator[Any]: yield from self._compliant_series.__iter__()