diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 74518214fa6..66af2a121dd 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -421,7 +421,7 @@ def hasnans(self): raise NotImplementedError @property - def nlevels(self): + def nlevels(self) -> int: """ Number of levels. """ @@ -1951,7 +1951,6 @@ def drop_duplicates( return self._from_columns_like_self( drop_duplicates( list(self._columns), - keys=range(len(self._columns)), keep=keep, nulls_are_equal=nulls_are_equal, ), diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 0c0f271fe6f..3bfa720c5a3 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1014,19 +1014,6 @@ def to_arrow(self): } ) - @_performance_tracking - def _positions_from_column_names(self, column_names) -> list[int]: - """Map each column name into their positions in the frame. - - The order of indices returned corresponds to the column order in this - Frame. - """ - return [ - i - for i, name in enumerate(self._column_names) - if name in set(column_names) - ] - @_performance_tracking def _copy_type_metadata(self: Self, other: Self) -> Self: """ diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 9b22a23b772..e947661dda9 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3055,21 +3055,21 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self: return result def _positions_from_column_names( - self, column_names, offset_by_index_columns=False - ): + self, + column_names: set[abc.Hashable], + offset_by_index_columns: bool = True, + ) -> list[int]: """Map each column name into their positions in the frame. Return positions of the provided column names, offset by the number of index columns if `offset_by_index_columns` is True. The order of indices returned corresponds to the column order in this Frame. """ - num_index_columns = ( - len(self.index._data) if offset_by_index_columns else 0 - ) + start = self.index.nlevels if offset_by_index_columns else 0 return [ - i + num_index_columns - for i, name in enumerate(self._column_names) - if name in set(column_names) + i + for i, name in enumerate(self._column_names, start=start) + if name in column_names ] def drop_duplicates( @@ -4349,9 +4349,7 @@ def _drop_na_rows(self, how="any", subset=None, thresh=None): cudf.core._internals.stream_compaction.drop_nulls( [*self.index._columns, *data_columns], how=how, - keys=self._positions_from_column_names( - subset, offset_by_index_columns=True - ), + keys=self._positions_from_column_names(subset), thresh=thresh, ), self._column_names, @@ -6282,17 +6280,16 @@ def ge(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 other=other, op="__ge__", fill_value=fill_value, can_reindex=True ) - def _preprocess_subset(self, subset): + def _preprocess_subset(self, subset) -> set[abc.Hashable]: if subset is None: subset = self._column_names elif ( - not np.iterable(subset) - or isinstance(subset, str) + is_scalar(subset) or isinstance(subset, tuple) and subset in self._column_names ): subset = (subset,) - diff = set(subset) - set(self._data) + diff = set(subset) - set(self._column_names) if len(diff) != 0: raise KeyError(f"columns {diff} do not exist") return subset