diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1f530aa3108..acfc2d781a7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1429,7 +1429,7 @@ def __setitem__(self, arg, value): else: # disc. with pandas here # pandas raises key error here - self.insert(len(self._data), arg, value) + self.insert(self._num_columns, arg, value) elif can_convert_to_column(arg): mask = arg @@ -1846,7 +1846,7 @@ def _clean_renderable_dataframe(self, output): if lines[-1].startswith("["): lines = lines[:-1] lines.append( - "[%d rows x %d columns]" % (len(self), len(self._data.names)) + "[%d rows x %d columns]" % (len(self), self._num_columns) ) return "\n".join(lines) @@ -1901,7 +1901,7 @@ def _get_renderable_dataframe(self): else pd.options.display.width / 2 ) - if len(self) <= nrows and len(self._data.names) <= ncols: + if len(self) <= nrows and self._num_columns <= ncols: output = self.copy(deep=False) elif self.empty and len(self.index) > 0: max_seq_items = pd.options.display.max_seq_items @@ -1922,15 +1922,15 @@ def _get_renderable_dataframe(self): else: output = self.copy(deep=False) else: - left_cols = len(self._data.names) + left_cols = self._num_columns right_cols = 0 upper_rows = len(self) lower_rows = 0 if len(self) > nrows and nrows > 0: upper_rows = int(nrows / 2.0) + 1 lower_rows = upper_rows + (nrows % 2) - if len(self._data.names) > ncols: - right_cols = len(self._data.names) - int(ncols / 2.0) + if left_cols > ncols: + right_cols = left_cols - int(ncols / 2.0) # adjust right columns for output if multiindex. right_cols = ( right_cols - 1 @@ -1945,11 +1945,11 @@ def _get_renderable_dataframe(self): else: # If right_cols is 0 or negative, it means # self has lesser number of columns than ncols. - # Hence assign len(self._data.names) which + # Hence assign self._num_columns which # will result in empty `*_right` quadrants. # This is because `*_left` quadrants will # contain all columns. - right_cols = len(self._data.names) + right_cols = self._num_columns upper_left = self.head(upper_rows).iloc[:, :left_cols] upper_right = self.head(upper_rows).iloc[:, right_cols:] @@ -1983,8 +1983,7 @@ def _repr_html_(self): if lines[-2].startswith("

"): lines = lines[:-2] lines.append( - "

%d rows × %d columns

" - % (len(self), len(self._data.names)) + "

%d rows × %d columns

" % (len(self), self._num_columns) ) lines.append("") return "\n".join(lines) @@ -2660,9 +2659,9 @@ def columns(self, columns): level_names = (pd_columns.name,) label_dtype = pd_columns.dtype - if len(pd_columns) != len(self._data.names): + if len(pd_columns) != self._num_columns: raise ValueError( - f"Length mismatch: expected {len(self._data.names)} elements, " + f"Length mismatch: expected {self._num_columns} elements, " f"got {len(pd_columns)} elements" ) @@ -2683,7 +2682,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None: * The possible .columns.dtype * The .columns.names/name (depending on if it's a MultiIndex) """ - if len(self._data.names) != len(other.names): + if self._num_columns != len(other.names): raise ValueError( f"Length mismatch: expected {len(other)} elements, " f"got {len(self)} elements" @@ -3207,7 +3206,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): if name in self._data: raise NameError(f"duplicated column name {name}") - num_cols = len(self._data) + num_cols = self._num_columns if loc < 0: loc += num_cols + 1 @@ -5032,7 +5031,7 @@ def info( ) lines.append(index_summary) - if len(self._data) == 0: + if self._num_columns == 0: lines.append(f"Empty {type(self).__name__}") cudf.utils.ioutils.buffer_write_lines(buf, lines) return diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 92ca76d6ceb..7b561906afb 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -76,7 +76,7 @@ def _num_columns(self) -> int: @property def _num_rows(self) -> int: - return 0 if self._num_columns == 0 else len(self._data.columns[0]) + return self._data.nrows @property def _column_names(self) -> Tuple[Any, ...]: # TODO: Tuple[str]? diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index b4a689804c7..a31430e1571 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -289,6 +289,7 @@ def __init__(self, data=None, index=None): @property def _num_rows(self) -> int: # Important to use the index because the data may be empty. + # TODO: Remove once DataFrame.__init__ is cleaned up return len(self.index) @property @@ -448,6 +449,7 @@ def _scan(self, op, axis=None, skipna=True): def _check_data_index_length_match(self) -> None: # Validate that the number of rows in the data matches the index if the # data is not empty. This is a helper for the constructor. + # TODO: Use self._num_rows once DataFrame.__init__ is cleaned up if self._data.nrows > 0 and self._data.nrows != len(self.index): raise ValueError( f"Length of values ({self._data.nrows}) does not " @@ -639,7 +641,7 @@ def index(self, value): new_length = len(value) # A DataFrame with 0 columns can have an index of arbitrary length. - if len(self._data) > 0 and new_length != old_length: + if self._num_columns > 0 and new_length != old_length: raise ValueError( f"Length mismatch: Expected axis has {old_length} elements, " f"new values have {len(value)} elements" @@ -1129,7 +1131,7 @@ def dot(self, other, reflect=False): common = self._data.to_pandas_index().union( other.index.to_pandas() ) - if len(common) > len(self._data.names) or len(common) > len( + if len(common) > self._num_columns or len(common) > len( other.index ): raise ValueError("matrices are not aligned") @@ -2757,7 +2759,7 @@ def sort_index( out = self[labels] if ignore_index: out._data.rangeindex = True - out._data.names = list(range(len(self._data.names))) + out._data.names = list(range(self._num_columns)) return self._mimic_inplace(out, inplace=inplace) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index c149a1028a0..049fac45ba8 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -527,7 +527,7 @@ def get_slice_bound(self, label, side, kind=None): @_cudf_nvtx_annotate def nlevels(self): """Integer number of levels in this MultiIndex.""" - return len(self._data) + return self._num_columns @property # type: ignore @_cudf_nvtx_annotate