Skip to content

Commit

Permalink
Fix Series.to_frame(name=None) setting a None name (#16698)
Browse files Browse the repository at this point in the history
In pandas 2.0, `to_frame(name=None)` allowed the resulting column name to be `None` pandas-dev/pandas#45523

Looks like based on the current default of `cudf.Series.to_frame`, this behavior was not reflected.

Additionally, created a `SingleColumnFrame._to_frame` to more easily share the logic between `Series.to_frame` and `Index.to_frame`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16698
  • Loading branch information
mroeschke authored Aug 30, 2024
1 parent 8f2d687 commit f932bf9
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 68 deletions.
58 changes: 0 additions & 58 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,64 +798,6 @@ def fillna(self, value, downcast=None):

return super().fillna(value=value)

def to_frame(self, index=True, name=no_default):
"""Create a DataFrame with a column containing this Index
Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index
name : object, defaults to index.name
The passed name should substitute for the index name (if it has
one).
Returns
-------
DataFrame
DataFrame containing the original Index data.
See Also
--------
Index.to_series : Convert an Index to a Series.
Series.to_frame : Convert Series to DataFrame.
Examples
--------
>>> import cudf
>>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
>>> idx.to_frame()
animal
animal
Ant Ant
Bear Bear
Cow Cow
By default, the original Index is reused. To enforce a new Index:
>>> idx.to_frame(index=False)
animal
0 Ant
1 Bear
2 Cow
To override the name of the resulting column, specify `name`:
>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""

if name is no_default:
col_name = 0 if self.name is None else self.name
else:
col_name = name

return cudf.DataFrame(
{col_name: self._values}, index=self if index else None
)

def to_arrow(self):
"""Convert to a suitable Arrow object."""
raise NotImplementedError
Expand Down
57 changes: 57 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,11 @@ def to_pandas(
name=self.name,
)

def to_frame(
self, index: bool = True, name: Hashable = no_default
) -> cudf.DataFrame:
return self._as_int_index().to_frame(index=index, name=name)

@property
def is_unique(self) -> bool:
return True
Expand Down Expand Up @@ -1646,6 +1651,58 @@ def to_pandas(
result.name = self.name
return result

def to_frame(
self, index: bool = True, name: Hashable = no_default
) -> cudf.DataFrame:
"""Create a DataFrame with a column containing this Index
Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index
name : object, defaults to index.name
The passed name should substitute for the index name (if it has
one).
Returns
-------
DataFrame
DataFrame containing the original Index data.
See Also
--------
Index.to_series : Convert an Index to a Series.
Series.to_frame : Convert Series to DataFrame.
Examples
--------
>>> import cudf
>>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
>>> idx.to_frame()
animal
animal
Ant Ant
Bear Bear
Cow Cow
By default, the original Index is reused. To enforce a new Index:
>>> idx.to_frame(index=False)
animal
0 Ant
1 Bear
2 Cow
To override the name of the resulting column, specify `name`:
>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""
return self._to_frame(name=name, index=self if index else None)

def append(self, other):
if is_list_like(other):
to_concat = [self]
Expand Down
12 changes: 2 additions & 10 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ def reset_index(
)

@_performance_tracking
def to_frame(self, name=None):
def to_frame(self, name: abc.Hashable = no_default) -> cudf.DataFrame:
"""Convert Series into a DataFrame
Parameters
Expand Down Expand Up @@ -1192,15 +1192,7 @@ def to_frame(self, name=None):
13 <NA>
15 d
""" # noqa: E501

if name is not None:
col = name
elif self.name is None:
col = 0
else:
col = self.name

return cudf.DataFrame({col: self._column}, index=self.index)
return self._to_frame(name=name, index=self.index)

@_performance_tracking
def memory_usage(self, index=True, deep=False):
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,17 @@ def to_arrow(self) -> pa.Array:
"""
return self._column.to_arrow()

def _to_frame(
self, name: Hashable, index: cudf.Index | None
) -> cudf.DataFrame:
"""Helper function for Series.to_frame, Index.to_frame"""
if name is no_default:
col_name = 0 if self.name is None else self.name
else:
col_name = name
ca = ColumnAccessor({col_name: self._column}, verify=False)
return cudf.DataFrame._from_data(ca, index=index)

@property # type: ignore
@_performance_tracking
def is_unique(self) -> bool:
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2557,6 +2557,13 @@ def test_series_arrow_list_types_roundtrip():
cudf.from_pandas(pdf)


@pytest.mark.parametrize("base_name", [None, "a"])
def test_series_to_frame_none_name(base_name):
result = cudf.Series(range(1), name=base_name).to_frame(name=None)
expected = pd.Series(range(1), name=base_name).to_frame(name=None)
assert_eq(result, expected)


@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
@pytest.mark.parametrize(
"data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
Expand Down

0 comments on commit f932bf9

Please sign in to comment.