diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bb9f48d17b2e1..ab5746eca1b18 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -733,6 +733,7 @@ Groupby/resample/rolling - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) +- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3fa34007a739b..f4e3f3e8b1001 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,6 +583,8 @@ def _wrap_applied_output( if is_transform: # GH#47787 see test_group_on_empty_multiindex res_index = data.index + elif not self.group_keys: + res_index = None else: res_index = self._grouper.result_index @@ -1967,6 +1969,8 @@ def _wrap_applied_output( if is_transform: # GH#47787 see test_group_on_empty_multiindex res_index = data.index + elif not self.group_keys: + res_index = None else: res_index = self._grouper.result_index diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 64220f1d3d5b4..b7e6e55739c17 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -159,6 +159,7 @@ def test_agg_apply_corner(ts, tsframe): tm.assert_frame_equal(grouped.agg("sum"), exp_df) res = grouped.apply(np.sum, axis=0) + exp_df = exp_df.reset_index(drop=True) tm.assert_frame_equal(res, exp_df) diff --git a/pandas/tests/groupby/test_all_methods.py b/pandas/tests/groupby/test_all_methods.py index 945c3e421a132..4625c5c27a803 100644 --- a/pandas/tests/groupby/test_all_methods.py +++ b/pandas/tests/groupby/test_all_methods.py @@ -22,7 +22,7 @@ def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"], group_keys=False) + gb = df.groupby(["a", "b", "c"], group_keys=True) method = getattr(gb, groupby_func) args = get_groupby_method_args(groupby_func, df) if groupby_func == "corrwith": diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 366eb59ee226a..4e7c0acb127ed 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -777,10 +777,21 @@ def test_evaluate_with_empty_groups(self, func, expected): # (not testing other agg fns, because they return # different index objects. df = DataFrame({1: [], 2: []}) - g = df.groupby(1, group_keys=False) + g = df.groupby(1, group_keys=True) result = getattr(g[2], func)(lambda x: x) tm.assert_series_equal(result, expected) + def test_groupby_apply_empty_with_group_keys_false(self): + # 60471 + # test apply'ing empty groups with group_keys False + # (not testing other agg fns, because they return + # different index objects. + df = DataFrame({"A": [], "B": [], "C": []}) + g = df.groupby("A", group_keys=False) + result = g.apply(lambda x: x / x.sum(), include_groups=False) + expected = DataFrame({"B": [], "C": []}, index=None) + tm.assert_frame_equal(result, expected) + def test_groupby_empty(self): # https://github.com/pandas-dev/pandas/issues/27190 s = Series([], name="name", dtype="float64")