From 73b5578967bedd1f94b8a54d9047f33364178783 Mon Sep 17 00:00:00 2001 From: Apoorv <113182336+ApoorvApoorv@users.noreply.github.com> Date: Wed, 31 Jul 2024 02:46:47 +0100 Subject: [PATCH] DOC: Added extra sentences to clarify series.GroupBy snippets in examples (#59331) * Added messages for each releveant snippet * some small corrections to clarify further * removed trailing whitespace * more formatting correction * more cleanup * reverting changes * trying to format documentation correctly * removed some part of addee text * testing if removing list works * reverting some changes * reverting changes * checking if minor changes also leads to failures * reverting all changes to pass the tests * checking is small changes causes errors as well * pusing the changes back --- pandas/core/series.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index f340821775015..a197886748bce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1815,14 +1815,30 @@ def _set_name( Parrot 30.0 Parrot 20.0 Name: Max Speed, dtype: float64 + + We can pass a list of values to group the Series data by custom labels: + >>> ser.groupby(["a", "b", "a", "b"]).mean() a 210.0 b 185.0 Name: Max Speed, dtype: float64 + + Grouping by numeric labels yields similar results: + + >>> ser.groupby([0, 1, 0, 1]).mean() + 0 210.0 + 1 185.0 + Name: Max Speed, dtype: float64 + + We can group by a level of the index: + >>> ser.groupby(level=0).mean() Falcon 370.0 Parrot 25.0 Name: Max Speed, dtype: float64 + + We can group by a condition applied to the Series values: + >>> ser.groupby(ser > 100).mean() Max Speed False 25.0 @@ -1845,11 +1861,16 @@ def _set_name( Parrot Captive 30.0 Wild 20.0 Name: Max Speed, dtype: float64 + >>> ser.groupby(level=0).mean() Animal Falcon 370.0 Parrot 25.0 Name: Max Speed, dtype: float64 + + We can also group by the 'Type' level of the hierarchical index + to get the mean speed for each type: + >>> ser.groupby(level="Type").mean() Type Captive 210.0 @@ -1865,12 +1886,17 @@ def _set_name( b 3 dtype: int64 + To include `NA` values in the group keys, set `dropna=False`: + >>> ser.groupby(level=0, dropna=False).sum() a 3 b 3 NaN 3 dtype: int64 + We can also group by a custom list with NaN values to handle + missing group labels: + >>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot'] >>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed") >>> ser.groupby(["a", "b", "a", np.nan]).mean()