From dfc91a485c3629af9cb38f966e4fd1639d22ae83 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Thu, 19 Dec 2024 14:31:05 +0100 Subject: [PATCH] docs: more args and returns documentation in DataFrame class (#1600) * more args and returns documentation in DataFrame class * address comment * more documentation --- narwhals/dataframe.py | 92 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 29ed1cf85..634f676f3 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -1100,6 +1100,9 @@ def row(self, index: int) -> tuple[Any, ...]: Arguments: index: Row number. + Returns: + A tuple of the values in the selected row. + Notes: cuDF doesn't support this method. @@ -1133,6 +1136,14 @@ def row(self, index: int) -> tuple[Any, ...]: def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. + Arguments: + function: Function to apply. + args: Positional arguments to pass to function. + kwargs: Keyword arguments to pass to function. + + Returns: + The original object with the function applied. + Examples: >>> import polars as pl >>> import pandas as pd @@ -1175,12 +1186,15 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se return super().pipe(function, *args, **kwargs) def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: - """Drop null values. + """Drop rows that contain null values. Arguments: subset: Column name(s) for which null values are considered. If set to None (default), use all columns. + Returns: + The original object with the rows removed that contained the null values. + Notes: pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't. @@ -1221,6 +1235,12 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: def with_row_index(self, name: str = "index") -> Self: """Insert column which enumerates rows. + Arguments: + name: The name of the column as a string. The default is "index". + + Returns: + The original object with the column added. + Examples: Construct pandas as polars DataFrames: @@ -1264,6 +1284,9 @@ def with_row_index(self, name: str = "index") -> Self: def schema(self) -> Schema: r"""Get an ordered mapping of column names to their data type. + Returns: + A Narwhals Schema object that displays the mapping of column names. + Examples: >>> import polars as pl >>> import pandas as pd @@ -1300,6 +1323,9 @@ def schema(self) -> Schema: def collect_schema(self: Self) -> Schema: r"""Get an ordered mapping of column names to their data type. + Returns: + A Narwhals Schema object that displays the mapping of column names. + Examples: >>> import polars as pl >>> import pandas as pd @@ -1337,6 +1363,9 @@ def collect_schema(self: Self) -> Schema: def columns(self) -> list[str]: """Get column names. + Returns: + The column names stored in a list. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1397,6 +1426,9 @@ def rows( in the same order as the frame columns. Setting named=True will return rows of dictionaries instead. + Returns: + The data as a list of rows. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1452,6 +1484,9 @@ def iter_rows( internally while iterating over the data. See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html + Returns: + An iterator over the DataFrame of rows. + Notes: cuDF doesn't support this method. @@ -1561,6 +1596,9 @@ def select( **named_exprs: Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used. + Returns: + The dataframe containing only the selected columns. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1674,6 +1712,9 @@ def rename(self, mapping: dict[str, str]) -> Self: Arguments: mapping: Key value pairs that map from old name to new name. + Returns: + The dataframe with the specified columns renamed. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1716,6 +1757,9 @@ def head(self, n: int = 5) -> Self: n: Number of rows to return. If a negative value is passed, return all rows except the last `abs(n)`. + Returns: + A subset of the dataframe of shape (n, n_columns). + Examples: >>> import pandas as pd >>> import polars as pl @@ -1762,6 +1806,9 @@ def tail(self, n: int = 5) -> Self: n: Number of rows to return. If a negative value is passed, return all rows except the first `abs(n)`. + Returns: + A subset of the dataframe of shape (n, n_columns). + Examples: >>> import pandas as pd >>> import polars as pl @@ -1804,6 +1851,9 @@ def tail(self, n: int = 5) -> Self: def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: """Remove columns from the dataframe. + Returns: + The dataframe with the specified columns removed. + Arguments: *columns: Names of the columns that should be removed from the dataframe. strict: Validate that all column names exist in the schema and throw an @@ -1890,6 +1940,9 @@ def unique( expensive to compute. Settings this to `True` blocks the possibility to run on the streaming engine for Polars. + Returns: + The dataframe with the duplicate rows removed. + Examples: >>> import pandas as pd >>> import polars as pl @@ -1939,6 +1992,9 @@ def filter( Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly joined with the other filter conditions using &. + Returns: + The filtered dataframe. + Examples: >>> import pandas as pd >>> import polars as pl @@ -2153,6 +2209,9 @@ def sort( specified per column by passing a sequence of booleans. nulls_last: Place null values last. + Returns: + The sorted dataframe. + Warning: Unlike Polars, it is not possible to specify a sequence of booleans for `nulls_last` in order to control per-column behaviour. Instead a single @@ -2518,6 +2577,9 @@ def is_duplicated(self: Self) -> Series[Any]: def is_empty(self: Self) -> bool: r"""Check if the dataframe is empty. + Returns: + A boolean indicating whether the dataframe is empty (True) or not (False). + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -2600,6 +2662,9 @@ def is_unique(self: Self) -> Series[Any]: def null_count(self: Self) -> Self: r"""Create a new DataFrame that shows the null counts per column. + Returns: + A dataframe of shape (1, n_columns). + Notes: pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't. @@ -2651,6 +2716,13 @@ def null_count(self: Self) -> Self: def item(self: Self, row: int | None = None, column: int | str | None = None) -> Any: r"""Return the DataFrame as a scalar, or return the element at the given row/column. + Arguments: + row: The *n*-th row. + column: The column selected via an integer or a string (column name). + + Returns: + A scalar or the specified element in the dataframe. + Notes: If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). With row/col, this is equivalent to df[row,col]. @@ -2682,6 +2754,9 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) -> def clone(self) -> Self: r"""Create a copy of this DataFrame. + Returns: + An identical copy of the original dataframe. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -2721,6 +2796,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: n: Gather every *n*-th row. offset: Starting index. + Returns: + The dataframe containing only the selected rows. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -2790,6 +2868,9 @@ def pivot( separator: Used as separator/delimiter in generated column names in case of multiple `values` columns. + Returns: + A new dataframe. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -2841,6 +2922,9 @@ def pivot( def to_arrow(self: Self) -> pa.Table: r"""Convert to arrow table. + Returns: + A new PyArrow table. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -2890,6 +2974,9 @@ def sample( seed: Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. + Returns: + A new dataframe. + Notes: The results may not be consistent across libraries. @@ -2956,6 +3043,9 @@ def unpivot( variable_name: Name to give to the `variable` column. Defaults to "variable". value_name: Name to give to the `value` column. Defaults to "value". + Returns: + The unpivoted dataframe. + Notes: If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, but with `index` replacing `id_vars` and `on` replacing `value_vars`.