Skip to content

Commit

Permalink
Replace RangeIndex._start/_stop/_step with _range (#15576)
Browse files Browse the repository at this point in the history
The `._start/_stop/_step` attributes are wholly redundant with the similar attributes on a `range` object, so replacing with those attributes where needed

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15576
  • Loading branch information
mroeschke authored Apr 24, 2024
1 parent 2eb71b2 commit 8b4dc91
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 75 deletions.
128 changes: 54 additions & 74 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
is_integer,
is_list_like,
is_scalar,
is_signed_integer_dtype,
)
from cudf.core._base_index import BaseIndex
from cudf.core._compat import PANDAS_LT_300
Expand Down Expand Up @@ -149,6 +148,15 @@ def _index_from_data(data: MutableMapping, name: Any = no_default):
return index_class_type._from_data(data, name)


def validate_range_arg(arg, arg_name: Literal["start", "stop", "step"]) -> int:
"""Validate start/stop/step argument in RangeIndex.__init__"""
if not is_integer(arg):
raise TypeError(
f"{arg_name} must be an integer, not {type(arg).__name__}"
)
return int(arg)


class RangeIndex(BaseIndex, BinaryOperand):
"""
Immutable Index implementing a monotonic integer range.
Expand Down Expand Up @@ -197,44 +205,29 @@ class RangeIndex(BaseIndex, BinaryOperand):
def __init__(
self, start, stop=None, step=1, dtype=None, copy=False, name=None
):
if step == 0:
raise ValueError("Step must not be zero.")
if not cudf.api.types.is_hashable(name):
raise ValueError("Name must be a hashable value.")
if dtype is not None and not is_signed_integer_dtype(dtype):
self._name = name
if dtype is not None and cudf.dtype(dtype).kind != "i":
raise ValueError(f"{dtype=} must be a signed integer type")

if isinstance(start, range):
therange = start
start = therange.start
stop = therange.stop
step = therange.step
if stop is None:
start, stop = 0, start
if not is_integer(start):
raise TypeError(
f"start must be an integer, not {type(start).__name__}"
)
self._start = int(start)
if not is_integer(stop):
raise TypeError(
f"stop must be an integer, not {type(stop).__name__}"
)
self._stop = int(stop)
if step is not None:
if not is_integer(step):
raise TypeError(
f"step must be an integer, not {type(step).__name__}"
)
self._step = int(step)
self._range = start
else:
self._step = 1
self._index = None
self._name = name
self._range = range(self._start, self._stop, self._step)
# _end is the actual last element of RangeIndex,
# whereas _stop is an upper bound.
self._end = self._start + self._step * (len(self._range) - 1)
if stop is None:
start, stop = 0, start
start = validate_range_arg(start, "start")
stop = validate_range_arg(stop, "stop")
if step is not None:
step = validate_range_arg(step, "step")
else:
step = 1
try:
self._range = range(start, stop, step)
except ValueError as err:
if step == 0:
raise ValueError("Step must not be zero.") from err
raise

def _copy_type_metadata(
self, other: RangeIndex, *, override_dtypes=None
Expand All @@ -251,9 +244,9 @@ def searchsorted(
na_position: Literal["first", "last"] = "last",
):
assert (len(self) <= 1) or (
ascending == (self._step > 0)
ascending == (self.step > 0)
), "Invalid ascending flag"
return search_range(value, self.as_range, side=side)
return search_range(value, self._range, side=side)

@property # type: ignore
@_cudf_nvtx_annotate
Expand All @@ -271,23 +264,23 @@ def start(self):
"""
The value of the `start` parameter (0 if this was not supplied).
"""
return self._start
return self._range.start

@property # type: ignore
@_cudf_nvtx_annotate
def stop(self):
"""
The value of the stop parameter.
"""
return self._stop
return self._range.stop

@property # type: ignore
@_cudf_nvtx_annotate
def step(self):
"""
The value of the step parameter.
"""
return self._step
return self._range.step

@property # type: ignore
@_cudf_nvtx_annotate
Expand Down Expand Up @@ -368,9 +361,7 @@ def copy(self, name=None, deep=False):

name = self.name if name is None else name

return RangeIndex(
start=self._start, stop=self._stop, step=self._step, name=name
)
return RangeIndex(self._range, name=name)

@_cudf_nvtx_annotate
def astype(self, dtype, copy: bool = True):
Expand All @@ -389,8 +380,8 @@ def duplicated(self, keep="first"):
@_cudf_nvtx_annotate
def __repr__(self):
return (
f"{self.__class__.__name__}(start={self._start}, stop={self._stop}"
f", step={self._step}"
f"{self.__class__.__name__}(start={self.start}, stop={self.stop}"
f", step={self.step}"
+ (
f", name={pd.io.formats.printing.default_pprint(self.name)}"
if self.name is not None
Expand All @@ -401,16 +392,16 @@ def __repr__(self):

@_cudf_nvtx_annotate
def __len__(self):
return len(range(self._start, self._stop, self._step))
return len(self._range)

@_cudf_nvtx_annotate
def __getitem__(self, index):
if isinstance(index, slice):
sl_start, sl_stop, sl_step = index.indices(len(self))

lo = self._start + sl_start * self._step
hi = self._start + sl_stop * self._step
st = self._step * sl_step
lo = self.start + sl_start * self.step
hi = self.start + sl_stop * self.step
st = self.step * sl_step
return RangeIndex(start=lo, stop=hi, step=st, name=self._name)

elif isinstance(index, Number):
Expand All @@ -419,18 +410,13 @@ def __getitem__(self, index):
index += len_self
if not (0 <= index < len_self):
raise IndexError("Index out of bounds")
return self._start + index * self._step
return self.start + index * self.step
return self._as_int_index()[index]

@_cudf_nvtx_annotate
def equals(self, other):
if isinstance(other, RangeIndex):
if (self._start, self._stop, self._step) == (
other._start,
other._stop,
other._step,
):
return True
return self._range == other._range
return self._as_int_index().equals(other)

@_cudf_nvtx_annotate
Expand All @@ -442,9 +428,9 @@ def serialize(self):
# We don't need to store the GPU buffer for RangeIndexes
# cuDF only needs to store start/stop and rehydrate
# during de-serialization
header["index_column"]["start"] = self._start
header["index_column"]["stop"] = self._stop
header["index_column"]["step"] = self._step
header["index_column"]["start"] = self.start
header["index_column"]["stop"] = self.stop
header["index_column"]["step"] = self.step
frames = []

header["name"] = pickle.dumps(self.name)
Expand Down Expand Up @@ -484,9 +470,9 @@ def to_pandas(
elif arrow_type:
raise NotImplementedError(f"{arrow_type=} is not implemented.")
return pd.RangeIndex(
start=self._start,
stop=self._stop,
step=self._step,
start=self.start,
stop=self.stop,
step=self.step,
dtype=self.dtype,
name=self.name,
)
Expand All @@ -495,19 +481,15 @@ def to_pandas(
def is_unique(self):
return True

@cached_property
def as_range(self):
return range(self._start, self._stop, self._step)

@cached_property # type: ignore
@_cudf_nvtx_annotate
def is_monotonic_increasing(self):
return self._step > 0 or len(self) <= 1
return self.step > 0 or len(self) <= 1

@cached_property # type: ignore
@_cudf_nvtx_annotate
def is_monotonic_decreasing(self):
return self._step < 0 or len(self) <= 1
return self.step < 0 or len(self) <= 1

@_cudf_nvtx_annotate
def memory_usage(self, deep=False):
Expand Down Expand Up @@ -590,12 +572,12 @@ def get_indexer(self, target, limit=None, method=None, tolerance=None):
def get_loc(self, key):
if not is_scalar(key):
raise TypeError("Should be a scalar-like")
idx = (key - self._start) / self._step
idx_int_upper_bound = (self._stop - self._start) // self._step
idx = (key - self.start) / self.step
idx_int_upper_bound = (self.stop - self.start) // self.step
if idx > idx_int_upper_bound or idx < 0:
raise KeyError(key)

idx_int = (key - self._start) // self._step
idx_int = (key - self.start) // self.step
if idx_int != idx:
raise KeyError(key)
return idx_int
Expand All @@ -607,9 +589,9 @@ def _union(self, other, sort=None):
# following notation: *_o -> other, *_s -> self,
# and *_r -> result
start_s, step_s = self.start, self.step
end_s = self._end
end_s = self.start + self.step * (len(self) - 1)
start_o, step_o = other.start, other.step
end_o = other._end
end_o = other.start + other.step * (len(other) - 1)
if self.step < 0:
start_s, step_s, end_s = end_s, -step_s, start_s
if other.step < 0:
Expand Down Expand Up @@ -854,9 +836,7 @@ def argsort(
raise ValueError(f"invalid na_position: {na_position}")

indices = cupy.arange(0, len(self))
if (ascending and self._step < 0) or (
not ascending and self._step > 0
):
if (ascending and self.step < 0) or (not ascending and self.step > 0):
indices = indices[::-1]
return indices

Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1606,7 +1606,7 @@ def test_rangeindex_name_not_hashable():
def test_index_rangeindex_search_range():
# step > 0
ridx = RangeIndex(-13, 17, 4)
ri = ridx.as_range
ri = ridx._range
for i in range(len(ridx)):
assert i == search_range(ridx[i], ri, side="left")
assert i + 1 == search_range(ridx[i], ri, side="right")
Expand Down

0 comments on commit 8b4dc91

Please sign in to comment.