Skip to content

Commit

Permalink
feat: implement anti-join, str.len_chars, and null_count for DuckDB (#…
Browse files Browse the repository at this point in the history
…1777)


---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
MarcoGorelli and pre-commit-ci[bot] authored Jan 10, 2025
1 parent 8229282 commit 339683c
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 29 deletions.
2 changes: 1 addition & 1 deletion narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"],
how: Literal["left", "inner", "cross", "anti", "semi"],
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
6 changes: 1 addition & 5 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand All @@ -226,10 +226,6 @@ def join(
right_on = [right_on]
original_alias = self._native_frame.alias

if how not in ("inner", "left", "semi", "cross"):
msg = "Only inner and left join is implemented for DuckDB"
raise NotImplementedError(msg)

if how == "cross":
if self._backend_version < (1, 1, 4):
msg = f"DuckDB>=1.1.4 is required for cross-join, found version: {self._backend_version}"
Expand Down
25 changes: 19 additions & 6 deletions narwhals/_duckdb/expr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import functools
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -488,6 +487,15 @@ def min(self) -> Self:
lambda _input: FunctionExpression("min", _input), "min", returns_scalar=True
)

def null_count(self) -> Self:
from duckdb import FunctionExpression

return self._from_call(
lambda _input: FunctionExpression("sum", _input.isnull().cast("int")),
"null_count",
returns_scalar=True,
)

def is_null(self) -> Self:
return self._from_call(
lambda _input: _input.isnull(), "is_null", returns_scalar=self._returns_scalar
Expand All @@ -497,11 +505,7 @@ def is_in(self, other: Sequence[Any]) -> Self:
from duckdb import ConstantExpression

return self._from_call(
lambda _input: functools.reduce(
lambda x, y: x | _input.isin(ConstantExpression(y)),
other[1:],
_input.isin(ConstantExpression(other[0])),
),
lambda _input: _input.isin(*[ConstantExpression(x) for x in other]),
"is_in",
returns_scalar=self._returns_scalar,
)
Expand Down Expand Up @@ -619,6 +623,15 @@ def func(_input: duckdb.Expression) -> duckdb.Expression:
func, "slice", returns_scalar=self._compliant_expr._returns_scalar
)

def len_chars(self) -> DuckDBExpr:
from duckdb import FunctionExpression

return self._compliant_expr._from_call(
lambda _input: FunctionExpression("length", _input),
"len_chars",
returns_scalar=self._compliant_expr._returns_scalar,
)

def to_lowercase(self) -> DuckDBExpr:
from duckdb import FunctionExpression

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def join(
self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
8 changes: 1 addition & 7 deletions tests/expr_and_series/null_count_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import ConstructorEager
Expand All @@ -13,11 +11,7 @@
}


def test_null_count_expr(
constructor: Constructor, request: pytest.FixtureRequest
) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_null_count_expr(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.col("a", "b").null_count())
expected = {
Expand Down
6 changes: 1 addition & 5 deletions tests/expr_and_series/str/len_chars_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import ConstructorEager
Expand All @@ -10,9 +8,7 @@
data = {"a": ["foo", "foobar", "Café", "345", "東京"]}


def test_str_len_chars(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_str_len_chars(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.col("a").str.len_chars())
expected = {
Expand Down
3 changes: 0 additions & 3 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,7 @@ def test_anti_join(
join_key: list[str],
filter_expr: nw.Expr,
expected: dict[str, list[Any]],
request: pytest.FixtureRequest,
) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
Expand Down

0 comments on commit 339683c

Please sign in to comment.