From b6d1eeb1d7c743ec37638e13b0b035b092181165 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Sun, 29 Dec 2024 21:48:57 +0800 Subject: [PATCH] docs: Add more return type descriptions for Series (#1674) --------- Co-authored-by: Marco Edward Gorelli --- narwhals/series.py | 144 +++++++++++++++++++++++++++++++-- narwhals/stable/v1/__init__.py | 6 +- 2 files changed, 142 insertions(+), 8 deletions(-) diff --git a/narwhals/series.py b/narwhals/series.py index 228fb01c6..1868eb396 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -256,6 +256,9 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: indices: Position(s) to set items at. values: Values to set. + Returns: + A new Series with values set at given positions. + Note: This method always returns a new Series, without modifying the original one. Using this function in a for-loop is an anti-pattern, we recommend building @@ -319,6 +322,9 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: def shape(self) -> tuple[int]: """Get the shape of the Series. + Returns: + A tuple containing the length of the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -359,6 +365,9 @@ def _from_compliant_series(self, series: Any) -> Self: def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. + Returns: + A new Series with the results of the piped function applied. + Examples: >>> import polars as pl >>> import pandas as pd @@ -416,6 +425,9 @@ def len(self) -> int: Null values count towards the total. + Returns: + The number of elements in the Series. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -444,6 +456,9 @@ def len(self) -> int: def dtype(self: Self) -> DType: """Get the data type of the Series. + Returns: + The data type of the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -472,6 +487,9 @@ def dtype(self: Self) -> DType: def name(self) -> str: """Get the name of the Series. + Returns: + The name of the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -639,7 +657,7 @@ def to_frame(self) -> DataFrame[Any]: """Convert to dataframe. Returns: - A new DataFrame. + A DataFrame containing this Series as a single column. Examples: >>> import pandas as pd @@ -719,6 +737,9 @@ def to_list(self) -> list[Any]: def mean(self) -> Any: """Reduce this Series to the mean value. + Returns: + The average of all elements in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -749,6 +770,9 @@ def median(self) -> Any: Notes: Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median. + Returns: + The median value of all elements in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -910,6 +934,9 @@ def all(self) -> Any: def min(self) -> Any: """Get the minimal value in this Series. + Returns: + The minimum value in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -937,6 +964,9 @@ def min(self) -> Any: def max(self) -> Any: """Get the maximum value in this Series. + Returns: + The maximum value in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1028,6 +1058,9 @@ def arg_max(self) -> int: def sum(self) -> Any: """Reduce this Series to the sum value. + Returns: + The sum of all elements in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1059,6 +1092,9 @@ def std(self, *, ddof: int = 1) -> Any: ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. + Returns: + The standard deviation of all elements in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1224,6 +1260,9 @@ def is_in(self, other: Any) -> Self: Arguments: other: Sequence of primitive type. + Returns: + A new Series with boolean values indicating if the elements are in the other sequence. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1303,6 +1342,9 @@ def drop_nulls(self) -> Self: See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. + Returns: + A new Series with null values removed. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1395,7 +1437,7 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: reverse: reverse the operation Returns: - A new Series with the cumulative sum of the values. + A new Series with the cumulative sum of non-null values. Examples: >>> import pandas as pd @@ -1440,6 +1482,9 @@ def unique(self, *, maintain_order: bool = False) -> Self: expensive to compute. Settings this to `True` blocks the possibility to run on the streaming engine for Polars. + Returns: + A new Series with duplicate values removed. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1487,6 +1532,9 @@ def diff(self) -> Self: s.diff().fill_null(0).cast(nw.Int64) + Returns: + A new Series with the difference between each element and its predecessor. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1527,6 +1575,9 @@ def shift(self, n: int) -> Self: n: Number of indices to shift forward. If a negative value is passed, values are shifted in the opposite direction instead. + Returns: + A new Series with values shifted by n positions. + Notes: pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, @@ -1586,6 +1637,9 @@ def sample( seed: Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. + Returns: + A new Series containing randomly sampled values from the original Series. + Notes: The `sample` method returns a Series with a specified number of randomly selected items chosen from this Series. @@ -1729,6 +1783,9 @@ def rename(self, name: str) -> Self: Arguments: name: The new name. + Returns: + A new Series with the updated name. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1793,6 +1850,9 @@ def replace_strict( (default), the data type is determined automatically based on the other inputs. + Returns: + A new Series with values replaced according to the mapping. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -1858,6 +1918,9 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: descending: Sort in descending order. nulls_last: Place null values last instead of first. + Returns: + A new sorted Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1922,6 +1985,9 @@ def is_null(self) -> Self: See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. + Returns: + A boolean Series indicating which values are null. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1984,6 +2050,9 @@ def fill_null( See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. + Returns: + A new Series with null values filled according to the specified value or strategy. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2075,15 +2144,16 @@ def is_between( Arguments: lower_bound: Lower bound value. - upper_bound: Upper bound value. - closed: Define which sides of the interval are closed (inclusive). Notes: If the value of the `lower_bound` is greater than that of the `upper_bound`, then the values will be False, as no value can satisfy the condition. + Returns: + A boolean Series indicating which values are between the given bounds. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2125,6 +2195,9 @@ def is_between( def n_unique(self) -> int: """Count the number of unique values. + Returns: + Number of unique values in the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2152,6 +2225,9 @@ def n_unique(self) -> int: def to_numpy(self) -> np.ndarray: """Convert to numpy. + Returns: + NumPy ndarray representation of the Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2180,6 +2256,9 @@ def to_numpy(self) -> np.ndarray: def to_pandas(self) -> pd.Series: """Convert to pandas. + Returns: + A pandas Series containing the data from this Series. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2337,6 +2416,9 @@ def __invert__(self) -> Self: def filter(self, other: Any) -> Self: """Filter elements in the Series based on a condition. + Returns: + A new Series with elements that satisfy the condition. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2377,7 +2459,7 @@ def is_duplicated(self: Self) -> Self: r"""Get a mask of all duplicated rows in the Series. Returns: - A new Series. + A new Series with boolean values indicating duplicated rows. Examples: >>> import narwhals as nw @@ -2416,6 +2498,9 @@ def is_duplicated(self: Self) -> Self: def is_empty(self: Self) -> bool: r"""Check if the series is empty. + Returns: + A boolean indicating if the series is empty. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -2446,6 +2531,9 @@ def is_empty(self: Self) -> bool: def is_unique(self: Self) -> Self: r"""Get a mask of all unique rows in the Series. + Returns: + A new Series with boolean values indicating unique rows. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -2489,6 +2577,9 @@ def null_count(self: Self) -> int: See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. + Returns: + The number of null values in the Series. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -2521,6 +2612,9 @@ def null_count(self: Self) -> int: def is_first_distinct(self: Self) -> Self: r"""Return a boolean mask indicating the first occurrence of each distinct value. + Returns: + A new Series with boolean values indicating the first occurrence of each distinct value. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -2561,6 +2655,9 @@ def is_first_distinct(self: Self) -> Self: def is_last_distinct(self: Self) -> Self: r"""Return a boolean mask indicating the last occurrence of each distinct value. + Returns: + A new Series with boolean values indicating the last occurrence of each distinct value. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -2604,6 +2701,9 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool: Arguments: descending: Check if the Series is sorted in descending order. + Returns: + A boolean indicating if the Series is sorted. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -2652,7 +2752,9 @@ def value_counts( normalize: If true gives relative frequencies of the unique values Returns: - A new DataFrame. + A DataFrame with two columns: + - The original values as first column + - Either count or proportion as second column, depending on normalize parameter. Examples: >>> import narwhals as nw @@ -2709,6 +2811,9 @@ def quantile( quantile: Quantile between 0.0 and 1.0. interpolation: Interpolation method. + Returns: + The quantile value. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -2749,6 +2854,9 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: mask: Boolean Series other: Series of same type. + Returns: + A new Series with values selected from self or other based on the mask. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -2805,6 +2913,9 @@ def item(self: Self, index: int | None = None) -> Any: If no index is provided, this is equivalent to `s[0]`, with a check that the shape is (1,). With an index, this is equivalent to `s[index]`. + Returns: + The scalar value of the Series or the element at the given index. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -2879,6 +2990,9 @@ def tail(self: Self, n: int = 10) -> Self: Arguments: n: Number of rows to return. + Returns: + A new Series with the last n rows. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -2918,6 +3032,9 @@ def round(self: Self, decimals: int = 0) -> Self: Arguments: decimals: Number of decimals to round by. + Returns: + A new Series with rounded values. + Notes: For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow. @@ -2969,6 +3086,9 @@ def to_dummies( separator: Separator/delimiter used when generating column names. drop_first: Remove the first category from the variable being encoded. + Returns: + A new DataFrame containing the dummy/indicator variables. + Notes: pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't. @@ -3039,6 +3159,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: n: Gather every *n*-th row. offset: Starting index. + Returns: + A new Series with every nth value starting from the offset. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT @@ -3075,6 +3198,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: def to_arrow(self: Self) -> pa.Array: r"""Convert to arrow. + Returns: + A PyArrow Array containing the data from the Series. + Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeries @@ -3116,6 +3242,9 @@ def mode(self: Self) -> Self: Can return multiple values. + Returns: + A new Series containing the mode(s) (values that appear most frequently). + Examples: >>> import pandas as pd >>> import polars as pl @@ -4280,6 +4409,9 @@ def head(self: Self, n: int = 5) -> SeriesT: Arguments: n: Number of elements to take. Negative indexing is supported (see note (1.)) + Returns: + A new Series containing the first n characters of each string. + Notes: 1. When the `n` input is negative, `head` returns characters up to the n-th from the end of the string. For example, if `n = -3`, then all characters except the last three are returned. diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 4bd0cc1bd..b234ad9b2 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -478,7 +478,7 @@ def to_frame(self) -> DataFrame[Any]: """Convert to dataframe. Returns: - A new DataFrame. + A DataFrame containing this Series as a single column. Examples: >>> import pandas as pd @@ -535,7 +535,9 @@ def value_counts( normalize: If true gives relative frequencies of the unique values Returns: - A new DataFrame. + A DataFrame with two columns: + - The original values as first column + - Either count or proportion as second column, depending on normalize parameter. Examples: >>> import narwhals as nw