diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index a20e5e6c1..c52f349c4 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,3 +32,5 @@ jobs: run: python -m pip install --upgrade modin[dask] - name: Run pytest run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 + - name: Run doctests + run: pytest narwhals --doctest-modules diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 1b3f9f0d0..300c68ddb 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -69,7 +69,7 @@ def __repr__(self) -> str: # pragma: no cover + "| Use `narwhals.to_native()` to see native output |\n" + "└" + "─" * length - + "┘\n" + + "┘" ) @property @@ -212,15 +212,105 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: # inherited @property def schema(self) -> dict[str, DType]: + r""" + Get a dict[column name, DataType]. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df.schema # doctest: +SKIP + OrderedDict({'foo': Int64, 'bar': Float64, 'ham': String}) + ``` + """ return super().schema @property def columns(self) -> list[str]: + r""" + Get column names. + + Examples: + Get column names. + + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6, 7, 8], + ... "ham": ["a", "b", "c"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df.columns + ['foo', 'bar', 'ham'] + """ return super().columns def with_columns( self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr ) -> Self: + r""" + Add columns to this DataFrame. + + Added columns will replace existing columns with the same name. + + Arguments: + *exprs: Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + + **named_exprs: Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + + Returns: + DataFrame: A new DataFrame with the columns added. + + Note: + Creating a new DataFrame using this method does not create a new copy of + existing data. + + Examples: + Pass an expression to add it as a new column. + + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "a": [1, 2, 3, 4], + ... "b": [0.5, 4, 10, 13], + ... "c": [True, True, False, True], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> dframe = df.with_columns((nw.col("a") * 2).alias("a*2")) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + + >>> nw.to_native(dframe) + shape: (4, 4) + ┌─────┬──────┬───────┬─────┐ + │ a ┆ b ┆ c ┆ a*2 │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ bool ┆ i64 │ + ╞═════╪══════╪═══════╪═════╡ + │ 1 ┆ 0.5 ┆ true ┆ 2 │ + │ 2 ┆ 4.0 ┆ true ┆ 4 │ + │ 3 ┆ 10.0 ┆ false ┆ 6 │ + │ 4 ┆ 13.0 ┆ true ┆ 8 │ + └─────┴──────┴───────┴─────┘ + """ return super().with_columns(*exprs, **named_exprs) def select( @@ -228,9 +318,162 @@ def select( *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr, ) -> Self: + r""" + Select columns from this DataFrame. + + Arguments: + *exprs: Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + + **named_exprs: Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + + Examples: + Pass the name of a column to select that column. + + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6, 7, 8], + ... "ham": ["a", "b", "c"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> dframe = df.select("foo") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 1) + ┌─────┐ + │ foo │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 2 │ + │ 3 │ + └─────┘ + + Multiple columns can be selected by passing a list of column names. + + >>> dframe = df.select(["foo", "bar"]) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 6 │ + │ 2 ┆ 7 │ + │ 3 ┆ 8 │ + └─────┴─────┘ + + Multiple columns can also be selected using positional arguments instead of a + list. Expressions are also accepted. + + >>> dframe = df.select(nw.col("foo"), nw.col("bar") + 1) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 7 │ + │ 2 ┆ 8 │ + │ 3 ┆ 9 │ + └─────┴─────┘ + + Use keyword arguments to easily name your expression inputs. + + >>> dframe = df.select(threshold=nw.col('foo')*2) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 1) + ┌───────────┐ + │ threshold │ + │ --- │ + │ i64 │ + ╞═══════════╡ + │ 2 │ + │ 4 │ + │ 6 │ + └───────────┘ + """ return super().select(*exprs, **named_exprs) def rename(self, mapping: dict[str, str]) -> Self: + r""" + Rename column names. + + Arguments: + mapping: Key value pairs that map from old name to new name, or a function + that takes the old name as input and returns the new name. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} + ... ) + >>> df = nw.DataFrame(df_pl) + >>> dframe = df.rename({"foo": "apple"}) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + + >>> nw.to_native(dframe) + shape: (3, 3) + ┌───────┬─────┬─────┐ + │ apple ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═══════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └───────┴─────┴─────┘ + >>> dframe = df.rename(lambda column_name: "f" + column_name[1:]) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ far ┆ fam │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └─────┴─────┴─────┘ + """ return super().rename(mapping) def head(self, n: int) -> Self: diff --git a/narwhals/pandas_like/group_by.py b/narwhals/pandas_like/group_by.py index 0170f5cde..b579ef0e8 100644 --- a/narwhals/pandas_like/group_by.py +++ b/narwhals/pandas_like/group_by.py @@ -47,8 +47,8 @@ def agg( if expr._output_names is None: msg = ( "Anonymous expressions are not supported in group_by.agg.\n" - "Instead of `pl.all()`, try using a named expression, such as " - "`pl.col('a', 'b')`\n" + "Instead of `nw.all()`, try using a named expression, such as " + "`nw.col('a', 'b')`\n" ) raise ValueError(msg) output_names.extend(expr._output_names) diff --git a/narwhals/pandas_like/typing.py b/narwhals/pandas_like/typing.py index 89b0051b4..1d2c1b9f4 100644 --- a/narwhals/pandas_like/typing.py +++ b/narwhals/pandas_like/typing.py @@ -1,9 +1,10 @@ from __future__ import annotations from typing import TYPE_CHECKING -from typing import TypeAlias if TYPE_CHECKING: + from typing import TypeAlias + from narwhals.pandas_like.expr import PandasExpr from narwhals.pandas_like.series import PandasSeries diff --git a/narwhals/series.py b/narwhals/series.py index fc4c442ed..8621d6158 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -61,7 +61,7 @@ def __repr__(self) -> str: # pragma: no cover + "| Use `narwhals.to_native()` to see native output |\n" + "└" + "─" * length - + "┘\n" + + "┘" ) def alias(self, name: str) -> Self: diff --git a/narwhals/typing.py b/narwhals/typing.py index 2504e85a3..1f06366b5 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -1,8 +1,9 @@ from typing import TYPE_CHECKING -from typing import TypeAlias from typing import TypeVar if TYPE_CHECKING: + from typing import TypeAlias + from narwhals.expression import Expr from narwhals.series import Series