Skip to content

Commit

Permalink
duplicated no threads?
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Dec 27, 2024
1 parent aa63dc8 commit 2a93c53
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
13 changes: 8 additions & 5 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,6 @@ def write_csv(self: Self, file: Any) -> Any:
return pa_csv.write_csv(pa_table, file)

def is_duplicated(self: Self) -> ArrowSeries:
import numpy as np # ignore-banned-import
import pyarrow as pa
import pyarrow.compute as pc

Expand All @@ -608,14 +607,18 @@ def is_duplicated(self: Self) -> ArrowSeries:
columns = self.columns
col_token = generate_temporary_column_name(n_bytes=8, columns=columns)
row_count = (
df.append_column(col_token, pa.array(np.arange(len(self))))
df.append_column(col_token, pa.repeat(pa.scalar(1), len(self)))
.group_by(columns)
.aggregate([(col_token, "count")])
.aggregate([(col_token, "sum")])
)
is_duplicated = pc.greater(
df.join(
row_count, keys=columns, right_keys=columns, join_type="inner"
).column(f"{col_token}_count"),
row_count,
keys=columns,
right_keys=columns,
join_type="inner",
use_threads=False,
).column(f"{col_token}_sum"),
1,
)
return ArrowSeries(
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
)

value_series_native = value_series._native_series
condition_native = condition._native_series.combine_chunks()
condition_native = condition._native_series

if self._otherwise_value is None:
otherwise_native = pa.repeat(
Expand Down

0 comments on commit 2a93c53

Please sign in to comment.