Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-24.10 into branch-24.12 #16856

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def concat_columns(object columns):
def concat_tables(object tables, bool ignore_index=False):
plc_tables = []
for table in tables:
cols = table._data.columns
cols = table._columns
if not ignore_index:
cols = table._index._data.columns + cols
cols = table._index._columns + cols
plc_tables.append(pylibcudf.Table([c.to_pylibcudf(mode="read") for c in cols]))

return data_from_pylibcudf_table(
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ cdef class _CPackedColumns:

p.column_names = input_table._column_names
p.column_dtypes = {}
for name, col in input_table._data.items():
for name, col in input_table._column_labels_and_values:
if isinstance(col.dtype, cudf.core.dtypes._BaseDtype):
p.column_dtypes[name] = col.dtype

Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def read_csv(
elif isinstance(dtype, abc.Collection):
for index, col_dtype in enumerate(dtype):
if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
col_name = df._data.names[index]
col_name = df._column_names[index]
df._data[col_name] = df._data[col_name].astype(col_dtype)

if names is not None and len(names) and isinstance(names[0], int):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/io/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ cdef update_struct_field_names(
):
# Deprecated, remove in favor of add_col_struct_names
# when a reader is ported to pylibcudf
for i, (name, col) in enumerate(table._data.items()):
for i, (name, col) in enumerate(table._column_labels_and_values):
table._data[name] = update_column_struct_field_names(
col, schema_info[i]
)
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,24 +235,24 @@ cdef object _process_metadata(object df,
df._index = idx
elif set(index_col).issubset(names):
index_data = df[index_col]
actual_index_names = list(index_col_names.values())
if len(index_data._data) == 1:
actual_index_names = iter(index_col_names.values())
if index_data._num_columns == 1:
idx = cudf.Index._from_column(
index_data._data.columns[0],
name=actual_index_names[0]
index_data._columns[0],
name=next(actual_index_names)
)
else:
idx = cudf.MultiIndex.from_frame(
index_data,
names=actual_index_names
names=list(actual_index_names)
)
df.drop(columns=index_col, inplace=True)
df._index = idx
else:
if use_pandas_metadata:
df.index.names = index_col

if len(df._data.names) == 0 and column_index_type is not None:
if df._num_columns == 0 and column_index_type is not None:
df._data.label_dtype = cudf.dtype(column_index_type)

return df
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ cdef table_view table_view_from_table(tbl, ignore_index=False) except*:
If True, don't include the index in the columns.
"""
return table_view_from_columns(
tbl._index._data.columns + tbl._data.columns
tbl._index._columns + tbl._columns
if not ignore_index and tbl._index is not None
else tbl._data.columns
else tbl._columns
)


Expand All @@ -62,7 +62,7 @@ cpdef generate_pandas_metadata(table, index):
index_descriptors = []
columns_to_convert = list(table._columns)
# Columns
for name, col in table._data.items():
for name, col in table._column_labels_and_values:
if cudf.get_option("mode.pandas_compatible"):
# in pandas-compat mode, non-string column names are stringified.
col_names.append(str(name))
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1951,7 +1951,7 @@ def drop_duplicates(
return self._from_columns_like_self(
drop_duplicates(
list(self._columns),
keys=range(len(self._data)),
keys=range(len(self._columns)),
keep=keep,
nulls_are_equal=nulls_are_equal,
),
Expand Down
24 changes: 12 additions & 12 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ def __setitem__(self, key: abc.Hashable, value: ColumnBase) -> None:
self.set_by_label(key, value)

def __delitem__(self, key: abc.Hashable) -> None:
old_ncols = len(self._data)
old_ncols = len(self)
del self._data[key]
new_ncols = len(self._data)
new_ncols = len(self)
self._clear_cache(old_ncols, new_ncols)

def __len__(self) -> int:
Expand Down Expand Up @@ -213,7 +213,7 @@ def level_names(self) -> tuple[abc.Hashable, ...]:

@property
def nlevels(self) -> int:
if len(self._data) == 0:
if len(self) == 0:
return 0
if not self.multiindex:
return 1
Expand All @@ -226,7 +226,7 @@ def name(self) -> abc.Hashable:

@cached_property
def nrows(self) -> int:
if len(self._data) == 0:
if len(self) == 0:
return 0
else:
return len(next(iter(self.values())))
Expand Down Expand Up @@ -257,9 +257,9 @@ def _clear_cache(self, old_ncols: int, new_ncols: int) -> None:
Parameters
----------
old_ncols: int
len(self._data) before self._data was modified
len(self) before self._data was modified
new_ncols: int
len(self._data) after self._data was modified
len(self) after self._data was modified
"""
cached_properties = ("columns", "names", "_grouped_data")
for attr in cached_properties:
Expand Down Expand Up @@ -335,7 +335,7 @@ def insert(
if name in self._data:
raise ValueError(f"Cannot insert '{name}', already exists")

old_ncols = len(self._data)
old_ncols = len(self)
if loc == -1:
loc = old_ncols
elif not (0 <= loc <= old_ncols):
Expand Down Expand Up @@ -414,7 +414,7 @@ def get_labels_by_index(self, index: Any) -> tuple:
tuple
"""
if isinstance(index, slice):
start, stop, step = index.indices(len(self._data))
start, stop, step = index.indices(len(self))
return self.names[start:stop:step]
elif pd.api.types.is_integer(index):
return (self.names[index],)
Expand Down Expand Up @@ -526,9 +526,9 @@ def set_by_label(self, key: abc.Hashable, value: ColumnBase) -> None:
if len(self) > 0 and len(value) != self.nrows:
raise ValueError("All columns must be of equal length")

old_ncols = len(self._data)
old_ncols = len(self)
self._data[key] = value
new_ncols = len(self._data)
new_ncols = len(self)
self._clear_cache(old_ncols, new_ncols)

def _select_by_label_list_like(self, key: tuple) -> Self:
Expand Down Expand Up @@ -718,12 +718,12 @@ def droplevel(self, level: int) -> None:
if level < 0:
level += self.nlevels

old_ncols = len(self._data)
old_ncols = len(self)
self._data = {
_remove_key_level(key, level): value # type: ignore[arg-type]
for key, value in self._data.items()
}
new_ncols = len(self._data)
new_ncols = len(self)
self._level_names = (
self._level_names[:level] + self._level_names[level + 1 :]
)
Expand Down
Loading
Loading