diff --git a/CHANGELOG.md b/CHANGELOG.md index 11c2259c22f..c013b0bf463 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ - PR #6199 Fix index preservation for dask_cudf parquet - PR #6207 Remove shared libs from Java sources jar - PR #6212 Update codeowners file +- PR #6157 Fix issue related to `Series.concat` to concat a non-empty and empty series. - PR #6226 Add in some JNI checks for null handles diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 2c842b22b96..acf8b5dd95b 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -241,7 +241,7 @@ def concat(objs, axis=0, ignore_index=False, sort=None): df.columns = result_columns.unique() if ignore_index: - df.index = None + df.index = cudf.RangeIndex(len(objs[0])) return df elif not match_index: return df.sort_index() @@ -282,9 +282,15 @@ def concat(objs, axis=0, ignore_index=False, sort=None): objs, axis=axis, ignore_index=ignore_index, sort=sort ) elif typ is cudf.Series: - return cudf.Series._concat( - objs, axis=axis, index=None if ignore_index else True - ) + objs = [obj for obj in objs if len(obj)] + if len(objs) == 0: + return cudf.Series() + elif len(objs) == 1 and not ignore_index: + return objs[0] + else: + return cudf.Series._concat( + objs, axis=axis, index=None if ignore_index else True + ) elif typ is cudf.MultiIndex: return cudf.MultiIndex._concat(objs) elif issubclass(typ, cudf.Index): diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index b3368074e4b..e130bab6b5e 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -527,6 +527,39 @@ def test_concat_empty_dataframes(df, other, ignore_index): ) +@pytest.mark.parametrize("ignore_index", [True, False]) +@pytest.mark.parametrize("axis", [0, "index"]) +@pytest.mark.parametrize( + "data", + [ + (["a", "b", "c"], ["a", "b", "c"]), + (["a", "b", "c"], ["XX", "YY", "ZZ"]), + ], +) +def test_concat_empty_and_nonempty_series(ignore_index, data, axis): + s1 = gd.Series() + s2 = gd.Series(data[0]) + ps1 = s1.to_pandas() + ps2 = s2.to_pandas() + got = gd.concat([s1, s2], axis=axis, ignore_index=ignore_index) + expect = pd.concat([ps1, ps2], axis=axis, ignore_index=ignore_index) + + assert_eq(got, expect) + + +@pytest.mark.parametrize("ignore_index", [True, False]) +@pytest.mark.parametrize("axis", [0, "index"]) +def test_concat_two_empty_series(ignore_index, axis): + s1 = gd.Series() + s2 = gd.Series() + ps1 = s1.to_pandas() + ps2 = s2.to_pandas() + got = gd.concat([s1, s2], axis=axis, ignore_index=ignore_index) + expect = pd.concat([ps1, ps2], axis=axis, ignore_index=ignore_index) + + assert_eq(got, expect) + + @pytest.mark.parametrize( "df1,df2", [