Skip to content

Commit

Permalink
feat: gather_every methods (#619)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Jul 25, 2024
1 parent a4ca4e6 commit 21cc4f5
Show file tree
Hide file tree
Showing 15 changed files with 239 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- drop
- drop_nulls
- filter
- gather_every
- get_column
- group_by
- head
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- drop_nulls
- fill_null
- filter
- gather_every
- head
- is_between
- is_duplicated
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/lazyframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- drop
- drop_nulls
- filter
- gather_every
- group_by
- head
- join
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- dtype
- fill_null
- filter
- gather_every
- head
- is_between
- is_duplicated
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,6 @@ def unique(

keep_idx = self.select(*subset).is_unique()
return self.filter(keep_idx)

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_dataframe(self._native_dataframe[offset::n])
3 changes: 3 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ def quantile(
self, "quantile", quantile, interpolation, returns_scalar=True
)

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return reuse_series_implementation(self, "gather_every", n=n, offset=offset)

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,9 @@ def quantile(
0
]

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_series(self._native_series[offset::n])

@property
def shape(self) -> tuple[int]:
return (len(self._native_series),)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,6 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->

def clone(self: Self) -> Self:
return self._from_native_dataframe(self._native_dataframe.copy())

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_dataframe(self._native_dataframe.iloc[offset::n])
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,9 @@ def round(self: Self, decimals: int) -> Self:
def len(self: Self) -> Self:
return reuse_series_implementation(self, "len", returns_scalar=True)

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return reuse_series_implementation(self, "gather_every", n=n, offset=offset)

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,9 @@ def to_dummies(
backend_version=self._backend_version,
)

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_series(self._native_series.iloc[offset::n])

@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
87 changes: 87 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@ def join(
def clone(self) -> Self:
return self._from_compliant_dataframe(self._compliant_frame.clone())

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_compliant_dataframe(
self._compliant_frame.gather_every(n=n, offset=offset)
)


class DataFrame(BaseFrame[FrameT]):
"""
Expand Down Expand Up @@ -1974,6 +1979,47 @@ def clone(self) -> Self:
"""
return super().clone()

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""
Take every nth row in the DataFrame and return as a new DataFrame.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function in which gather every 2 rows,
starting from a offset of 1:
>>> @nw.narwhalify
... def func(df_any):
... return df_any.gather_every(n=2, offset=1)
>>> func(df_pd)
a b
1 2 6
3 4 8
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2 ┆ 6 │
│ 4 ┆ 8 │
└─────┴─────┘
"""
return super().gather_every(n=n, offset=offset)


class LazyFrame(BaseFrame[FrameT]):
"""
Expand Down Expand Up @@ -3232,3 +3278,44 @@ def lazy(self) -> Self:
<LazyFrame ...>
"""
return super().lazy() # type: ignore[return-value]

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""
Take every nth row in the DataFrame and return as a new DataFrame.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> lf_pl = pl.LazyFrame(data)
Let's define a dataframe-agnostic function in which gather every 2 rows,
starting from a offset of 1:
>>> @nw.narwhalify
... def func(df_any):
... return df_any.gather_every(n=2, offset=1)
>>> func(df_pd)
a b
1 2 6
3 4 8
>>> func(lf_pl).collect()
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2 ┆ 6 │
│ 4 ┆ 8 │
└─────┴─────┘
"""
return super().gather_every(n=n, offset=offset)
43 changes: 43 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1722,6 +1722,49 @@ def len(self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).len())

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""
Take every nth value in the Series and return as new Series.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function in which gather every 2 rows,
starting from a offset of 1:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").gather_every(n=2, offset=1))
>>> func(df_pd)
a
1 2
3 4
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 4 │
└─────┘
"""
return self.__class__(
lambda plx: self._call(plx).gather_every(n=n, offset=offset)
)

@property
def str(self: Self) -> ExprStringNamespace:
return ExprStringNamespace(self)
Expand Down
40 changes: 40 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2047,6 +2047,46 @@ def to_dummies(
level=self._level,
)

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""
Take every nth value in the Series and return as new Series.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = [1, 2, 3, 4]
>>> s_pd = pd.Series(name="a", data=data)
>>> s_pl = pl.Series(name="a", values=data)
Let's define a dataframe-agnostic function in which gather every 2 rows,
starting from a offset of 1:
>>> @nw.narwhalify
... def func(s):
... return s.gather_every(n=2, offset=1)
>>> func(s_pd)
1 2
3 4
Name: a, dtype: int64
>>> func(s_pl) # doctest:+NORMALIZE_WHITESPACE
shape: (2,)
Series: 'a' [i64]
[
2
4
]
"""
return self._from_compliant_series(
self._compliant_series.gather_every(n=n, offset=offset)
)

@property
def str(self) -> SeriesStringNamespace:
return SeriesStringNamespace(self)
Expand Down
30 changes: 30 additions & 0 deletions tests/expr_and_series/gather_every_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

data = {"a": list(range(10))}


@pytest.mark.parametrize("n", [1, 2, 3])
@pytest.mark.parametrize("offset", [1, 2, 3])
def test_gather_every_expr(constructor_with_lazy: Any, n: int, offset: int) -> None:
df = nw.from_native(constructor_with_lazy(data))

result = df.select(nw.col("a").gather_every(n=n, offset=offset))
expected = {"a": data["a"][offset::n]}

compare_dicts(result, expected)


@pytest.mark.parametrize("n", [1, 2, 3])
@pytest.mark.parametrize("offset", [1, 2, 3])
def test_gather_every_series(constructor_series: Any, n: int, offset: int) -> None:
series = nw.from_native(constructor_series(data["a"]), series_only=True)

result = series.gather_every(n=n, offset=offset)
expected = data["a"][offset::n]

assert result.to_list() == expected
17 changes: 17 additions & 0 deletions tests/frame/gather_every_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

data = {"a": list(range(10))}


@pytest.mark.parametrize("n", [1, 2, 3])
@pytest.mark.parametrize("offset", [1, 2, 3])
def test_gather_every(constructor_with_lazy: Any, n: int, offset: int) -> None:
df = nw.from_native(constructor_with_lazy(data))
result = df.gather_every(n=n, offset=offset)
expected = {"a": data["a"][offset::n]}
compare_dicts(result, expected)

0 comments on commit 21cc4f5

Please sign in to comment.