Skip to content

Commit

Permalink
Fix key bug, simplify some implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Nov 26, 2024
1 parent 5d6f192 commit 988fdb3
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 30 deletions.
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def hasnans(self):
raise NotImplementedError

@property
def nlevels(self):
def nlevels(self) -> int:
"""
Number of levels.
"""
Expand Down Expand Up @@ -1951,7 +1951,6 @@ def drop_duplicates(
return self._from_columns_like_self(
drop_duplicates(
list(self._columns),
keys=range(len(self._columns)),
keep=keep,
nulls_are_equal=nulls_are_equal,
),
Expand Down
13 changes: 0 additions & 13 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,19 +1014,6 @@ def to_arrow(self):
}
)

@_performance_tracking
def _positions_from_column_names(self, column_names) -> list[int]:
"""Map each column name into their positions in the frame.
The order of indices returned corresponds to the column order in this
Frame.
"""
return [
i
for i, name in enumerate(self._column_names)
if name in set(column_names)
]

@_performance_tracking
def _copy_type_metadata(self: Self, other: Self) -> Self:
"""
Expand Down
27 changes: 12 additions & 15 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3055,21 +3055,21 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
return result

def _positions_from_column_names(
self, column_names, offset_by_index_columns=False
):
self,
column_names: set[abc.Hashable],
offset_by_index_columns: bool = True,
) -> list[int]:
"""Map each column name into their positions in the frame.
Return positions of the provided column names, offset by the number of
index columns if `offset_by_index_columns` is True. The order of
indices returned corresponds to the column order in this Frame.
"""
num_index_columns = (
len(self.index._data) if offset_by_index_columns else 0
)
start = self.index.nlevels if offset_by_index_columns else 0
return [
i + num_index_columns
for i, name in enumerate(self._column_names)
if name in set(column_names)
i
for i, name in enumerate(self._column_names, start=start)
if name in column_names
]

def drop_duplicates(
Expand Down Expand Up @@ -4349,9 +4349,7 @@ def _drop_na_rows(self, how="any", subset=None, thresh=None):
cudf.core._internals.stream_compaction.drop_nulls(
[*self.index._columns, *data_columns],
how=how,
keys=self._positions_from_column_names(
subset, offset_by_index_columns=True
),
keys=self._positions_from_column_names(subset),
thresh=thresh,
),
self._column_names,
Expand Down Expand Up @@ -6282,17 +6280,16 @@ def ge(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
other=other, op="__ge__", fill_value=fill_value, can_reindex=True
)

def _preprocess_subset(self, subset):
def _preprocess_subset(self, subset) -> set[abc.Hashable]:
if subset is None:
subset = self._column_names
elif (
not np.iterable(subset)
or isinstance(subset, str)
is_scalar(subset)
or isinstance(subset, tuple)
and subset in self._column_names
):
subset = (subset,)
diff = set(subset) - set(self._data)
diff = set(subset) - set(self._column_names)
if len(diff) != 0:
raise KeyError(f"columns {diff} do not exist")
return subset
Expand Down

0 comments on commit 988fdb3

Please sign in to comment.