diff --git a/dask_expr/_groupby.py b/dask_expr/_groupby.py index 49f342f3..8d00e051 100644 --- a/dask_expr/_groupby.py +++ b/dask_expr/_groupby.py @@ -1621,6 +1621,7 @@ def __getitem__(self, key): return SeriesGroupBy( self.obj, by=self.by, + group_keys=self.group_keys, slice=key, sort=self.sort, dropna=self.dropna, @@ -2194,6 +2195,7 @@ def __init__( self, obj, by, + group_keys=True, sort=None, observed=None, dropna=None, @@ -2218,7 +2220,13 @@ def __init__( obj._meta.groupby(by, **_as_dict("observed", observed)) super().__init__( - obj, by=by, slice=slice, observed=observed, dropna=dropna, sort=sort + obj, + by=by, + group_keys=group_keys, + slice=slice, + observed=observed, + dropna=dropna, + sort=sort, ) @derived_from(pd.core.groupby.SeriesGroupBy) diff --git a/dask_expr/tests/test_groupby.py b/dask_expr/tests/test_groupby.py index 05ca7f3c..7e56a00e 100644 --- a/dask_expr/tests/test_groupby.py +++ b/dask_expr/tests/test_groupby.py @@ -1048,3 +1048,16 @@ def test_groupby_index_modified_divisions(): df.groupby(df.index.dt.date).count(), pdf.groupby(pdf.index.date).count(), ) + + +def test_groupby_getitem_apply_group_keys(): + pdf = pd.DataFrame( + { + "A": [0, 1] * 4, + "B": [1] * 8, + } + ) + df = from_pandas(pdf, npartitions=4) + result = df.groupby("A", group_keys=False).B.apply(lambda x: x, meta=("B", int)) + expected = pdf.groupby("A", group_keys=False).B.apply(lambda x: x) + assert_eq(result, expected)