Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add duckdb dataframe drop_nulls #1811

Merged
merged 5 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,9 @@ def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001
)
return self._from_native_frame(self._native_frame.drop(to_drop))

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.drop_null())
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
3 changes: 1 addition & 2 deletions narwhals/_dask/dataframe.py
FBruzzesi marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,9 @@ def select(
)
return self._from_native_frame(df)

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.dropna())
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
10 changes: 2 additions & 8 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,17 +322,11 @@ def sort(
)
return self._from_native_frame(result)

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
import duckdb

rel = self._native_frame
subset_ = (
[subset]
if isinstance(subset, str)
else rel.columns
if subset is None
else subset
)
subset_ = subset if subset is not None else rel.columns
keep_condition = " and ".join(f'"{col}" is not null' for col in subset_)
query = f"select * from rel where {keep_condition}" # noqa: S608
return self._from_native_frame(duckdb.sql(query))
3 changes: 1 addition & 2 deletions narwhals/_pandas_like/dataframe.py
FBruzzesi marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,9 @@ def select(
)
return self._from_native_frame(df)

def drop_nulls(self, subset: str | list[str] | None) -> Self:
def drop_nulls(self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.dropna(axis=0))
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_spark_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def sort(
sort_cols = [sort_f(col) for col, sort_f in zip(flat_by, sort_funcs)]
return self._from_native_frame(self._native_frame.sort(*sort_cols))

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
return self._from_native_frame(self._native_frame.dropna(subset=subset))

def rename(self: Self, mapping: dict[str, str]) -> Self:
Expand Down
3 changes: 2 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def with_row_index(self, name: str = "index") -> Self:
self._compliant_frame.with_row_index(name),
)

def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait I think it's fine for narwhals.DataFrame.drop_nulls to default to None? so users can just call df.drop_nulls()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the BaseFrame implementation, the default still remains in DataFrame and LazyFrame and are passed along

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops sorry! all good then πŸ™Œ

subset = [subset] if isinstance(subset, str) else subset
return self._from_compliant_dataframe(
self._compliant_frame.drop_nulls(subset=subset),
)
Expand Down
Loading