diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 13c13ff14..1964c279f 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -3281,6 +3281,14 @@ def to_native(self) -> FrameT: def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. + Arguments: + function: Function to apply. + args: Positional arguments to pass to function. + kwargs: Keyword arguments to pass to function. + + Returns: + The original object with the function applied. + Examples: >>> import polars as pl >>> import pandas as pd @@ -3319,12 +3327,15 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se return super().pipe(function, *args, **kwargs) def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: - """Drop null values. + """Drop rows that contain null values. Arguments: subset: Column name(s) for which null values are considered. If set to None (default), use all columns. + Returns: + The original object with the rows removed that contained the null values. + Notes: pandas handles null values differently from Polars and PyArrow. See [null_handling](../../pandas_like_concepts/null_handling) @@ -3366,6 +3377,12 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: def with_row_index(self, name: str = "index") -> Self: """Insert column which enumerates rows. + Arguments: + name: The name of the column as a string. The default is "index". + + Returns: + The original object with the column added. + Examples: >>> import polars as pl >>> import pandas as pd @@ -3407,6 +3424,9 @@ def with_row_index(self, name: str = "index") -> Self: def schema(self) -> Schema: r"""Get an ordered mapping of column names to their data type. + Returns: + A Narwhals Schema object that displays the mapping of column names. + Examples: >>> import polars as pl >>> import narwhals as nw @@ -3426,6 +3446,9 @@ def schema(self) -> Schema: def collect_schema(self: Self) -> Schema: r"""Get an ordered mapping of column names to their data type. + Returns: + A Narwhals Schema object that displays the mapping of column names. + Examples: >>> import polars as pl >>> import narwhals as nw @@ -3446,6 +3469,9 @@ def collect_schema(self: Self) -> Schema: def columns(self) -> list[str]: r"""Get column names. + Returns: + The column names stored in a list. + Examples: >>> import pandas as pd >>> import polars as pl @@ -3563,6 +3589,9 @@ def select( **named_exprs: Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used. + Returns: + The LazyFrame containing only the selected columns. + Notes: If you'd like to select a column whose name isn't a string (for example, if you're working with pandas) then you should explicitly use `nw.col` instead @@ -3734,6 +3763,9 @@ def rename(self, mapping: dict[str, str]) -> Self: function that takes the old name as input and returns the new name. + Returns: + The LazyFrame with the specified columns renamed. + Examples: >>> import pandas as pd >>> import polars as pl @@ -3777,6 +3809,9 @@ def head(self, n: int = 5) -> Self: Arguments: n: Number of rows to return. + Returns: + A subset of the LazyFrame of shape (n, n_columns). + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -3835,6 +3870,9 @@ def tail(self, n: int = 5) -> Self: Arguments: n: Number of rows to return. + Returns: + A subset of the LazyFrame of shape (n, n_columns). + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -3895,6 +3933,9 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: strict: Validate that all column names exist in the schema and throw an exception if a column name does not exist in the schema. + Returns: + The LazyFrame with the specified columns removed. + Warning: `strict` argument is ignored for `polars<1.0.0`. @@ -4038,6 +4079,9 @@ def filter( Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly joined with the other filter conditions using &. + Returns: + The filtered LazyFrame. + Examples: >>> import pandas as pd >>> import polars as pl @@ -4221,6 +4265,9 @@ def group_by( drop_null_keys: if True, then groups where any key is null won't be included in the result. + Returns: + LazyGroupBy: Object which can be used to perform aggregations. + Examples: Group by one column and call `agg` to compute the grouped sum of another column. @@ -4336,6 +4383,9 @@ def sort( nulls_last: Place null values last; can specify a single boolean applying to all columns or a sequence of booleans for per-column control. + Returns: + The sorted LazyFrame. + Warning: Unlike Polars, it is not possible to specify a sequence of booleans for `nulls_last` in order to control per-column behaviour. Instead a single @@ -4411,7 +4461,7 @@ def join( suffix: Suffix to append to columns with a duplicate name. Returns: - A new joined LazyFrame + A new joined LazyFrame. Examples: >>> import narwhals as nw @@ -4507,7 +4557,7 @@ def join_asof( * *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key. Returns: - A new joined DataFrame + A new joined LazyFrame. Examples: >>> from datetime import datetime @@ -4670,6 +4720,9 @@ def join_asof( def clone(self) -> Self: r"""Create a copy of this DataFrame. + Returns: + An identical copy of the original LazyFrame. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -4709,6 +4762,9 @@ def lazy(self) -> Self: If a library does not support lazy execution, then this is a no-op. + Returns: + A LazyFrame. + Examples: Construct pandas and Polars objects: @@ -4746,6 +4802,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: n: Gather every *n*-th row. offset: Starting index. + Returns: + The LazyFrame containing only the selected rows. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -4805,6 +4864,9 @@ def unpivot( variable_name: Name to give to the `variable` column. Defaults to "variable". value_name: Name to give to the `value` column. Defaults to "value". + Returns: + The unpivoted LazyFrame. + Notes: If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, but with `index` replacing `id_vars` and `on` replacing `value_vars`.