diff --git a/narwhals/expr.py b/narwhals/expr.py index 84646cf8b..119189b59 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -17,6 +17,7 @@ from narwhals.expr_str import ExprStringNamespace from narwhals.utils import _validate_rolling_arguments from narwhals.utils import flatten +from narwhals.utils import issue_deprecation_warning if TYPE_CHECKING: from typing_extensions import Self @@ -1690,90 +1691,27 @@ def replace_strict( def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: """Sort this column. Place null values first. + !!! warning + `Expr.sort` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').sort())`, use + `df.select(nw.col('a')).sort()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: descending: Sort in descending order. nulls_last: Place null values last instead of first. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [5, None, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define dataframe-agnostic functions: - - >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").sort()).to_native() - - >>> def agnostic_sort_descending(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").sort(descending=True)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sort` and `agnostic_sort_descending`: - - >>> agnostic_sort(df_pd) - a - 1 NaN - 2 1.0 - 3 2.0 - 0 5.0 - - >>> agnostic_sort(df_pl) - shape: (4, 1) - ┌──────┐ - │ a │ - │ --- │ - │ i64 │ - ╞══════╡ - │ null │ - │ 1 │ - │ 2 │ - │ 5 │ - └──────┘ - - >>> agnostic_sort(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[null,1,2,5]] - - >>> agnostic_sort_descending(df_pd) - a - 1 NaN - 0 5.0 - 3 2.0 - 2 1.0 - - >>> agnostic_sort_descending(df_pl) - shape: (4, 1) - ┌──────┐ - │ a │ - │ --- │ - │ i64 │ - ╞══════╡ - │ null │ - │ 5 │ - │ 2 │ - │ 1 │ - └──────┘ - - >>> agnostic_sort_descending(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[null,5,2,1]] """ + msg = ( + "`Expr.sort` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').sort())`, use `df.select(nw.col('a')).sort()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( lambda plx: self._to_compliant_expr(plx).sort( descending=descending, nulls_last=nulls_last @@ -2394,6 +2332,13 @@ def sample( ) -> Self: """Sample randomly from this expression. + !!! warning + `Expr.sample` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').sample())`, use + `df.select(nw.col('a')).sample()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of items to return. Cannot be used with fraction. fraction: Fraction of items to return. Cannot be used with n. @@ -2403,54 +2348,14 @@ def sample( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").sample(fraction=1.0, with_replacement=True) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sample`: - - >>> agnostic_sample(df_pd) # doctest: +SKIP - a - 2 3 - 0 1 - 2 3 - - >>> agnostic_sample(df_pl) # doctest: +SKIP - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2 │ - │ 3 │ - │ 3 │ - └─────┘ - - >>> agnostic_sample(df_pa) # doctest: +SKIP - pyarrow.Table - a: int64 - ---- - a: [[1,3,3]] """ + msg = ( + "`Expr.sample` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( lambda plx: self._to_compliant_expr(plx).sample( n, fraction=fraction, with_replacement=with_replacement, seed=seed @@ -2907,113 +2812,51 @@ def quantile( def head(self, n: int = 10) -> Self: r"""Get the first `n` rows. + !!! warning + `Expr.head` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').head())`, use + `df.select(nw.col('a')).head()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of rows to return. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the first 3 rows: - - >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").head(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_head`: - - >>> agnostic_head(df_pd) - a - 0 0 - 1 1 - 2 2 - - >>> agnostic_head(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 0 │ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_head(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[0,1,2]] """ + msg = ( + "`Expr.head` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).head()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) def tail(self, n: int = 10) -> Self: r"""Get the last `n` rows. + !!! warning + `Expr.tail` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').tail())`, use + `df.select(nw.col('a')).tail()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of rows to return. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the last 3 rows: - - >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").tail(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_tail`: - - >>> agnostic_tail(df_pd) - a - 7 7 - 8 8 - 9 9 - - >>> agnostic_tail(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 7 │ - │ 8 │ - │ 9 │ - └─────┘ - - >>> agnostic_tail(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[7,8,9]] """ + msg = ( + "`Expr.tail` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).tail()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) def round(self, decimals: int = 0) -> Self: @@ -3141,57 +2984,27 @@ def len(self) -> Self: def gather_every(self: Self, n: int, offset: int = 0) -> Self: r"""Take every nth value in the Series and return as new Series. + !!! warning + `Expr.gather_every` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').gather_every())`, use + `df.select(nw.col('a')).gather_every()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Gather every *n*-th row. offset: Starting index. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_gather_every`: - - >>> agnostic_gather_every(df_pd) - a - 1 2 - 3 4 - - >>> agnostic_gather_every(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_gather_every(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] """ + msg = ( + "`Expr.gather_every` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).gather_every()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) ) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 171c52912..b4b3540ab 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -836,6 +836,91 @@ def rolling_std( ddof=ddof, ) + def head(self, n: int = 10) -> Self: + r"""Get the first `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) + + def tail(self, n: int = 10) -> Self: + r"""Get the last `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) + + def gather_every(self: Self, n: int, offset: int = 0) -> Self: + r"""Take every nth value in the Series and return as new Series. + + Arguments: + n: Gather every *n*-th row. + offset: Starting index. + + Returns: + A new expression. + """ + return self.__class__( + lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) + ) + + def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: + """Sort this column. Place null values first. + + Arguments: + descending: Sort in descending order. + nulls_last: Place null values last instead of first. + + Returns: + A new expression. + """ + return self.__class__( + lambda plx: self._to_compliant_expr(plx).sort( + descending=descending, nulls_last=nulls_last + ) + ) + + def sample( + self: Self, + n: int | None = None, + *, + fraction: float | None = None, + with_replacement: bool = False, + seed: int | None = None, + ) -> Self: + """Sample randomly from this expression. + + !!! warning + `Expr.sample` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').sample())`, use + `df.select(nw.col('a')).sample()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + + Arguments: + n: Number of items to return. Cannot be used with fraction. + fraction: Fraction of items to return. Cannot be used with n. + with_replacement: Allow values to be sampled more than once. + seed: Seed for the random number generator. If set to None (default), a random + seed is generated for each sample operation. + + Returns: + A new expression. + """ + return self.__class__( + lambda plx: self._to_compliant_expr(plx).sample( + n, fraction=fraction, with_replacement=with_replacement, seed=seed + ) + ) + class Schema(NwSchema): """Ordered mapping of column names to their data type. diff --git a/narwhals/utils.py b/narwhals/utils.py index 73f0e20ab..1a353079a 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -981,7 +981,7 @@ def validate_strict_and_pass_though( msg = ( "`strict` in `from_native` is deprecated, please use `pass_through` instead.\n\n" "Note: `strict` will remain available in `narwhals.stable.v1`.\n" - "See [stable api](../backcompat.md/) for more information.\n" + "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" ) issue_deprecation_warning(msg, _version="1.13.0") pass_through = not strict diff --git a/tests/expr_and_series/gather_every_test.py b/tests/expr_and_series/gather_every_test.py index fdaaac08b..bcda13d83 100644 --- a/tests/expr_and_series/gather_every_test.py +++ b/tests/expr_and_series/gather_every_test.py @@ -2,6 +2,7 @@ import pytest +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -21,6 +22,9 @@ def test_gather_every_expr( assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw_main.col("a").gather_every(n=n, offset=offset)) + @pytest.mark.parametrize("n", [1, 2, 3]) @pytest.mark.parametrize("offset", [1, 2, 3]) diff --git a/tests/expr_and_series/head_test.py b/tests/expr_and_series/head_test.py index 49cc41248..952b3de16 100644 --- a/tests/expr_and_series/head_test.py +++ b/tests/expr_and_series/head_test.py @@ -2,7 +2,8 @@ import pytest -import narwhals as nw +import narwhals as nw_main +import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -18,6 +19,9 @@ def test_head( expected = {"a": [1, 2]} assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw_main.col("a").head(5)) + @pytest.mark.parametrize("n", [2, -1]) def test_head_series(constructor_eager: ConstructorEager, n: int) -> None: diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py index 8e88ba7c7..eaea6a527 100644 --- a/tests/expr_and_series/sample_test.py +++ b/tests/expr_and_series/sample_test.py @@ -2,6 +2,7 @@ import pytest +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -18,21 +19,20 @@ def test_expr_sample(constructor_eager: ConstructorEager) -> None: expected_series = (2,) assert result_series == expected_series + with pytest.deprecated_call(): + df.select(nw_main.col("a").sample(n=2)) -def test_expr_sample_fraction( - constructor_eager: ConstructorEager, request: pytest.FixtureRequest -) -> None: - if "dask" in str(constructor_eager): - request.applymarker(pytest.mark.xfail) + +def test_expr_sample_fraction(constructor_eager: ConstructorEager) -> None: df = nw.from_native( - constructor_eager({"a": [1, 2, 3] * 10, "b": [4, 5, 6] * 10}) - ).lazy() + constructor_eager({"a": [1, 2, 3] * 10, "b": [4, 5, 6] * 10}), eager_only=True + ) - result_expr = df.select(nw.col("a").sample(fraction=0.1)).collect().shape + result_expr = df.select(nw.col("a").sample(fraction=0.1)).shape expected_expr = (3, 1) assert result_expr == expected_expr - result_series = df.collect()["a"].sample(fraction=0.1).shape + result_series = df["a"].sample(fraction=0.1).shape expected_series = (3,) assert result_series == expected_series diff --git a/tests/expr_and_series/sort_test.py b/tests/expr_and_series/sort_test.py index 0d95722d8..e35514cd8 100644 --- a/tests/expr_and_series/sort_test.py +++ b/tests/expr_and_series/sort_test.py @@ -4,6 +4,7 @@ import pytest +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -29,6 +30,11 @@ def test_sort_expr( nw.col("b").sort(descending=descending, nulls_last=nulls_last), ) assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select( + "a", + nw_main.col("b").sort(descending=descending, nulls_last=nulls_last), + ) @pytest.mark.parametrize( diff --git a/tests/expr_and_series/tail_test.py b/tests/expr_and_series/tail_test.py index e04813814..2baabfd9c 100644 --- a/tests/expr_and_series/tail_test.py +++ b/tests/expr_and_series/tail_test.py @@ -2,13 +2,14 @@ import pytest -import narwhals as nw +import narwhals as nw_main +import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @pytest.mark.parametrize("n", [2, -1]) -def test_head( +def test_tail( constructor_eager: ConstructorEager, n: int, request: pytest.FixtureRequest ) -> None: if "polars" in str(constructor_eager) and n < 0: @@ -18,9 +19,12 @@ def test_head( expected = {"a": [2, 3]} assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw_main.col("a").tail(5)) + @pytest.mark.parametrize("n", [2, -1]) -def test_head_series(constructor_eager: ConstructorEager, n: int) -> None: +def test_tail_series(constructor_eager: ConstructorEager, n: int) -> None: df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True) result = df.select(df["a"].tail(n)) expected = {"a": [2, 3]}