Skip to content

Commit

Permalink
docs: more args and returns documentation in DataFrame class (#1600)
Browse files Browse the repository at this point in the history
* more args and returns documentation in DataFrame class

* address comment

* more documentation
  • Loading branch information
marenwestermann authored Dec 19, 2024
1 parent 0c933ee commit dfc91a4
Showing 1 changed file with 91 additions and 1 deletion.
92 changes: 91 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,9 @@ def row(self, index: int) -> tuple[Any, ...]:
Arguments:
index: Row number.
Returns:
A tuple of the values in the selected row.
Notes:
cuDF doesn't support this method.
Expand Down Expand Up @@ -1133,6 +1136,14 @@ def row(self, index: int) -> tuple[Any, ...]:
def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
"""Pipe function call.
Arguments:
function: Function to apply.
args: Positional arguments to pass to function.
kwargs: Keyword arguments to pass to function.
Returns:
The original object with the function applied.
Examples:
>>> import polars as pl
>>> import pandas as pd
Expand Down Expand Up @@ -1175,12 +1186,15 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
return super().pipe(function, *args, **kwargs)

def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
"""Drop null values.
"""Drop rows that contain null values.
Arguments:
subset: Column name(s) for which null values are considered. If set to None
(default), use all columns.
Returns:
The original object with the rows removed that contained the null values.
Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
Expand Down Expand Up @@ -1221,6 +1235,12 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
def with_row_index(self, name: str = "index") -> Self:
"""Insert column which enumerates rows.
Arguments:
name: The name of the column as a string. The default is "index".
Returns:
The original object with the column added.
Examples:
Construct pandas as polars DataFrames:
Expand Down Expand Up @@ -1264,6 +1284,9 @@ def with_row_index(self, name: str = "index") -> Self:
def schema(self) -> Schema:
r"""Get an ordered mapping of column names to their data type.
Returns:
A Narwhals Schema object that displays the mapping of column names.
Examples:
>>> import polars as pl
>>> import pandas as pd
Expand Down Expand Up @@ -1300,6 +1323,9 @@ def schema(self) -> Schema:
def collect_schema(self: Self) -> Schema:
r"""Get an ordered mapping of column names to their data type.
Returns:
A Narwhals Schema object that displays the mapping of column names.
Examples:
>>> import polars as pl
>>> import pandas as pd
Expand Down Expand Up @@ -1337,6 +1363,9 @@ def collect_schema(self: Self) -> Schema:
def columns(self) -> list[str]:
"""Get column names.
Returns:
The column names stored in a list.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1397,6 +1426,9 @@ def rows(
in the same order as the frame columns. Setting named=True will
return rows of dictionaries instead.
Returns:
The data as a list of rows.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1452,6 +1484,9 @@ def iter_rows(
internally while iterating over the data.
See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html
Returns:
An iterator over the DataFrame of rows.
Notes:
cuDF doesn't support this method.
Expand Down Expand Up @@ -1561,6 +1596,9 @@ def select(
**named_exprs: Additional columns to select, specified as keyword arguments.
The columns will be renamed to the keyword used.
Returns:
The dataframe containing only the selected columns.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1674,6 +1712,9 @@ def rename(self, mapping: dict[str, str]) -> Self:
Arguments:
mapping: Key value pairs that map from old name to new name.
Returns:
The dataframe with the specified columns renamed.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1716,6 +1757,9 @@ def head(self, n: int = 5) -> Self:
n: Number of rows to return. If a negative value is passed, return all rows
except the last `abs(n)`.
Returns:
A subset of the dataframe of shape (n, n_columns).
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1762,6 +1806,9 @@ def tail(self, n: int = 5) -> Self:
n: Number of rows to return. If a negative value is passed, return all rows
except the first `abs(n)`.
Returns:
A subset of the dataframe of shape (n, n_columns).
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1804,6 +1851,9 @@ def tail(self, n: int = 5) -> Self:
def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self:
"""Remove columns from the dataframe.
Returns:
The dataframe with the specified columns removed.
Arguments:
*columns: Names of the columns that should be removed from the dataframe.
strict: Validate that all column names exist in the schema and throw an
Expand Down Expand Up @@ -1890,6 +1940,9 @@ def unique(
expensive to compute. Settings this to `True` blocks the possibility
to run on the streaming engine for Polars.
Returns:
The dataframe with the duplicate rows removed.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1939,6 +1992,9 @@ def filter(
Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly
joined with the other filter conditions using &.
Returns:
The filtered dataframe.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -2153,6 +2209,9 @@ def sort(
specified per column by passing a sequence of booleans.
nulls_last: Place null values last.
Returns:
The sorted dataframe.
Warning:
Unlike Polars, it is not possible to specify a sequence of booleans for
`nulls_last` in order to control per-column behaviour. Instead a single
Expand Down Expand Up @@ -2518,6 +2577,9 @@ def is_duplicated(self: Self) -> Series[Any]:
def is_empty(self: Self) -> bool:
r"""Check if the dataframe is empty.
Returns:
A boolean indicating whether the dataframe is empty (True) or not (False).
Examples:
>>> import narwhals as nw
>>> import pandas as pd
Expand Down Expand Up @@ -2600,6 +2662,9 @@ def is_unique(self: Self) -> Series[Any]:
def null_count(self: Self) -> Self:
r"""Create a new DataFrame that shows the null counts per column.
Returns:
A dataframe of shape (1, n_columns).
Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
Expand Down Expand Up @@ -2651,6 +2716,13 @@ def null_count(self: Self) -> Self:
def item(self: Self, row: int | None = None, column: int | str | None = None) -> Any:
r"""Return the DataFrame as a scalar, or return the element at the given row/column.
Arguments:
row: The *n*-th row.
column: The column selected via an integer or a string (column name).
Returns:
A scalar or the specified element in the dataframe.
Notes:
If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1).
With row/col, this is equivalent to df[row,col].
Expand Down Expand Up @@ -2682,6 +2754,9 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
def clone(self) -> Self:
r"""Create a copy of this DataFrame.
Returns:
An identical copy of the original dataframe.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
Expand Down Expand Up @@ -2721,6 +2796,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
n: Gather every *n*-th row.
offset: Starting index.
Returns:
The dataframe containing only the selected rows.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
Expand Down Expand Up @@ -2790,6 +2868,9 @@ def pivot(
separator: Used as separator/delimiter in generated column names in case of
multiple `values` columns.
Returns:
A new dataframe.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
Expand Down Expand Up @@ -2841,6 +2922,9 @@ def pivot(
def to_arrow(self: Self) -> pa.Table:
r"""Convert to arrow table.
Returns:
A new PyArrow table.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
Expand Down Expand Up @@ -2890,6 +2974,9 @@ def sample(
seed: Seed for the random number generator. If set to None (default), a random
seed is generated for each sample operation.
Returns:
A new dataframe.
Notes:
The results may not be consistent across libraries.
Expand Down Expand Up @@ -2956,6 +3043,9 @@ def unpivot(
variable_name: Name to give to the `variable` column. Defaults to "variable".
value_name: Name to give to the `value` column. Defaults to "value".
Returns:
The unpivoted dataframe.
Notes:
If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
but with `index` replacing `id_vars` and `on` replacing `value_vars`.
Expand Down

0 comments on commit dfc91a4

Please sign in to comment.