Skip to content

Commit

Permalink
Don't sort columns for DataFrame init from list of Series (#14136)
Browse files Browse the repository at this point in the history
closes #14132

This PR removes the re-sorting of dataframe columns when initialized by a series list.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: #14136
  • Loading branch information
mroeschke authored Sep 20, 2023
1 parent f7ca051 commit eb6d134
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7885,9 +7885,7 @@ def _get_union_of_indices(indexes):
return indexes[0]
else:
merged_index = cudf.core.index.GenericIndex._concat(indexes)
merged_index = merged_index.drop_duplicates()
inds = merged_index._values.argsort()
return merged_index.take(inds)
return merged_index.drop_duplicates()


def _get_union_of_series_names(series_list):
Expand Down
12 changes: 12 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,18 @@ def test_init_unaligned_with_index():
assert_eq(pdf, gdf, check_dtype=False)


def test_init_series_list_columns_unsort():
pseries = [
pd.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
]
gseries = [
cudf.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
]
pdf = pd.DataFrame(pseries)
gdf = cudf.DataFrame(gseries)
assert_eq(pdf, gdf)


def test_series_basic():
# Make series from buffer
a1 = np.arange(10, dtype=np.float64)
Expand Down

0 comments on commit eb6d134

Please sign in to comment.