Skip to content

Commit

Permalink
feat: dask lazyframe remaining methods (#778)
Browse files Browse the repository at this point in the history
* feat: dask dataframe remaining methods

* gather_every in Expr
  • Loading branch information
FBruzzesi authored Aug 11, 2024
1 parent 75d61e4 commit cb4a583
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 22 deletions.
17 changes: 17 additions & 0 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,20 @@ def group_by(self, *by: str) -> Any:
from narwhals._dask.group_by import DaskLazyGroupBy

return DaskLazyGroupBy(self, list(by))

def tail(self: Self, n: int) -> Self:
return self._from_native_dataframe(
self._native_dataframe.tail(n=n, compute=False)
)

def gather_every(self: Self, n: int, offset: int) -> Self:
row_index_token = generate_unique_token(n_bytes=8, columns=self.columns)
pln = self.__narwhals_namespace__()
return (
self.with_row_index(name=row_index_token)
.filter(
pln.col(row_index_token) >= offset, # type: ignore[operator]
(pln.col(row_index_token) - offset) % n == 0, # type: ignore[arg-type]
)
.drop(row_index_token)
)
10 changes: 10 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,16 @@ def func(_input: Any) -> Any:
returns_scalar=False,
)

def tail(self: Self) -> NoReturn:
# We can't (yet?) allow methods which modify the index
msg = "`Expr.tail` is not supported for the Dask backend. Please use `LazyFrame.tail` instead."
raise NotImplementedError(msg)

def gather_every(self: Self, n: int, offset: int = 0) -> NoReturn:
# We can't (yet?) allow methods which modify the index
msg = "`Expr.gather_every` is not supported for the Dask backend. Please use `LazyFrame.gather_every` instead."
raise NotImplementedError(msg)

@property
def str(self: Self) -> DaskExprStringNamespace:
return DaskExprStringNamespace(self)
Expand Down
28 changes: 28 additions & 0 deletions tests/expr_and_series/tail_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import Any

import pytest

import narwhals as nw
from tests.utils import compare_dicts


@pytest.mark.parametrize("n", [2, -1])
def test_head(constructor: Any, n: int, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
if "polars" in str(constructor) and n < 0:
request.applymarker(pytest.mark.xfail)
df = nw.from_native(constructor({"a": [1, 2, 3]}))
result = df.select(nw.col("a").tail(n))
expected = {"a": [2, 3]}
compare_dicts(result, expected)


@pytest.mark.parametrize("n", [2, -1])
def test_head_series(constructor_eager: Any, n: int) -> None:
df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
result = df.select(df["a"].tail(n))
expected = {"a": [2, 3]}
compare_dicts(result, expected)
4 changes: 1 addition & 3 deletions tests/frame/gather_every_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@

@pytest.mark.parametrize("n", [1, 2, 3])
@pytest.mark.parametrize("offset", [1, 2, 3])
def test_gather_every(constructor: Any, n: int, offset: int, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_gather_every(constructor: Any, n: int, offset: int) -> None:
df = nw.from_native(constructor(data))
result = df.gather_every(n=n, offset=offset)
expected = {"a": data["a"][offset::n]}
Expand Down
6 changes: 1 addition & 5 deletions tests/frame/tail_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@

from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts


def test_tail(constructor: Any, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_tail(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9]}

Expand Down
14 changes: 0 additions & 14 deletions tests/series_only/tail_test.py

This file was deleted.

0 comments on commit cb4a583

Please sign in to comment.