Skip to content

Commit

Permalink
Use ColumnAccessor row and column length attributes more consistently (
Browse files Browse the repository at this point in the history
…#15857)

Also ensures any calls to `_num_rows` uses the cached version

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15857
  • Loading branch information
mroeschke authored May 24, 2024
1 parent 4a3315b commit 81cadb6
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 20 deletions.
29 changes: 14 additions & 15 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,7 @@ def __setitem__(self, arg, value):
else:
# disc. with pandas here
# pandas raises key error here
self.insert(len(self._data), arg, value)
self.insert(self._num_columns, arg, value)

elif can_convert_to_column(arg):
mask = arg
Expand Down Expand Up @@ -1846,7 +1846,7 @@ def _clean_renderable_dataframe(self, output):
if lines[-1].startswith("["):
lines = lines[:-1]
lines.append(
"[%d rows x %d columns]" % (len(self), len(self._data.names))
"[%d rows x %d columns]" % (len(self), self._num_columns)
)
return "\n".join(lines)

Expand Down Expand Up @@ -1901,7 +1901,7 @@ def _get_renderable_dataframe(self):
else pd.options.display.width / 2
)

if len(self) <= nrows and len(self._data.names) <= ncols:
if len(self) <= nrows and self._num_columns <= ncols:
output = self.copy(deep=False)
elif self.empty and len(self.index) > 0:
max_seq_items = pd.options.display.max_seq_items
Expand All @@ -1922,15 +1922,15 @@ def _get_renderable_dataframe(self):
else:
output = self.copy(deep=False)
else:
left_cols = len(self._data.names)
left_cols = self._num_columns
right_cols = 0
upper_rows = len(self)
lower_rows = 0
if len(self) > nrows and nrows > 0:
upper_rows = int(nrows / 2.0) + 1
lower_rows = upper_rows + (nrows % 2)
if len(self._data.names) > ncols:
right_cols = len(self._data.names) - int(ncols / 2.0)
if left_cols > ncols:
right_cols = left_cols - int(ncols / 2.0)
# adjust right columns for output if multiindex.
right_cols = (
right_cols - 1
Expand All @@ -1945,11 +1945,11 @@ def _get_renderable_dataframe(self):
else:
# If right_cols is 0 or negative, it means
# self has lesser number of columns than ncols.
# Hence assign len(self._data.names) which
# Hence assign self._num_columns which
# will result in empty `*_right` quadrants.
# This is because `*_left` quadrants will
# contain all columns.
right_cols = len(self._data.names)
right_cols = self._num_columns

upper_left = self.head(upper_rows).iloc[:, :left_cols]
upper_right = self.head(upper_rows).iloc[:, right_cols:]
Expand Down Expand Up @@ -1983,8 +1983,7 @@ def _repr_html_(self):
if lines[-2].startswith("<p>"):
lines = lines[:-2]
lines.append(
"<p>%d rows × %d columns</p>"
% (len(self), len(self._data.names))
"<p>%d rows × %d columns</p>" % (len(self), self._num_columns)
)
lines.append("</div>")
return "\n".join(lines)
Expand Down Expand Up @@ -2660,9 +2659,9 @@ def columns(self, columns):
level_names = (pd_columns.name,)
label_dtype = pd_columns.dtype

if len(pd_columns) != len(self._data.names):
if len(pd_columns) != self._num_columns:
raise ValueError(
f"Length mismatch: expected {len(self._data.names)} elements, "
f"Length mismatch: expected {self._num_columns} elements, "
f"got {len(pd_columns)} elements"
)

Expand All @@ -2683,7 +2682,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None:
* The possible .columns.dtype
* The .columns.names/name (depending on if it's a MultiIndex)
"""
if len(self._data.names) != len(other.names):
if self._num_columns != len(other.names):
raise ValueError(
f"Length mismatch: expected {len(other)} elements, "
f"got {len(self)} elements"
Expand Down Expand Up @@ -3207,7 +3206,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
if name in self._data:
raise NameError(f"duplicated column name {name}")

num_cols = len(self._data)
num_cols = self._num_columns
if loc < 0:
loc += num_cols + 1

Expand Down Expand Up @@ -5032,7 +5031,7 @@ def info(
)
lines.append(index_summary)

if len(self._data) == 0:
if self._num_columns == 0:
lines.append(f"Empty {type(self).__name__}")
cudf.utils.ioutils.buffer_write_lines(buf, lines)
return
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _num_columns(self) -> int:

@property
def _num_rows(self) -> int:
return 0 if self._num_columns == 0 else len(self._data.columns[0])
return self._data.nrows

@property
def _column_names(self) -> Tuple[Any, ...]: # TODO: Tuple[str]?
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def __init__(self, data=None, index=None):
@property
def _num_rows(self) -> int:
# Important to use the index because the data may be empty.
# TODO: Remove once DataFrame.__init__ is cleaned up
return len(self.index)

@property
Expand Down Expand Up @@ -448,6 +449,7 @@ def _scan(self, op, axis=None, skipna=True):
def _check_data_index_length_match(self) -> None:
# Validate that the number of rows in the data matches the index if the
# data is not empty. This is a helper for the constructor.
# TODO: Use self._num_rows once DataFrame.__init__ is cleaned up
if self._data.nrows > 0 and self._data.nrows != len(self.index):
raise ValueError(
f"Length of values ({self._data.nrows}) does not "
Expand Down Expand Up @@ -639,7 +641,7 @@ def index(self, value):
new_length = len(value)

# A DataFrame with 0 columns can have an index of arbitrary length.
if len(self._data) > 0 and new_length != old_length:
if self._num_columns > 0 and new_length != old_length:
raise ValueError(
f"Length mismatch: Expected axis has {old_length} elements, "
f"new values have {len(value)} elements"
Expand Down Expand Up @@ -1129,7 +1131,7 @@ def dot(self, other, reflect=False):
common = self._data.to_pandas_index().union(
other.index.to_pandas()
)
if len(common) > len(self._data.names) or len(common) > len(
if len(common) > self._num_columns or len(common) > len(
other.index
):
raise ValueError("matrices are not aligned")
Expand Down Expand Up @@ -2757,7 +2759,7 @@ def sort_index(
out = self[labels]
if ignore_index:
out._data.rangeindex = True
out._data.names = list(range(len(self._data.names)))
out._data.names = list(range(self._num_columns))

return self._mimic_inplace(out, inplace=inplace)

Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ def get_slice_bound(self, label, side, kind=None):
@_cudf_nvtx_annotate
def nlevels(self):
"""Integer number of levels in this MultiIndex."""
return len(self._data)
return self._num_columns

@property # type: ignore
@_cudf_nvtx_annotate
Expand Down

0 comments on commit 81cadb6

Please sign in to comment.