From f73c31d99cb053e3c631ed2f86f41d0e51994850 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Sat, 30 Mar 2024 16:52:10 +0100 Subject: [PATCH 1/5] Add docstring - Add `narwhals.dataframe.DataFrame.unique` docstring --- narwhals/dataframe.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index cf9efea34..7a37821f8 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -631,6 +631,54 @@ def drop(self, *columns: str | Iterable[str]) -> Self: return super().drop(*columns) def unique(self, subset: str | list[str]) -> Self: + r""" + Drop duplicate rows from this dataframe. + + Arguments: + subset: Column name(s) to consider when identifying duplicate rows. + + Returns: + DataFrame: DataFrame with unique rows. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3, 1], + ... "bar": ["a", "a", "a", "a"], + ... "ham": ["b", "b", "b", "b"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> df + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> dframe = df.unique(["bar", "ham"]) + >>> nw.to_native(dframe) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ a ┆ b │ + └─────┴─────┴─────┘ + >>> dframe = df.unique("foo").sort("foo") + >>> nw.to_native(dframe) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ a ┆ b │ + │ 2 ┆ a ┆ b │ + │ 3 ┆ a ┆ b │ + └─────┴─────┴─────┘ + """ return super().unique(subset) def filter(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> Self: From be39e1091cbaccdbe52f8fe80dd017c0eed59583 Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Sat, 30 Mar 2024 17:23:20 +0100 Subject: [PATCH 2/5] Add docstring - Add `narwhals.dataframe.DataFrame.sort` docstring --- narwhals/dataframe.py | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 7a37821f8..818f9fde5 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -863,6 +863,90 @@ def sort( *more_by: str, descending: bool | Sequence[bool] = False, ) -> Self: + r""" + Sort the dataframe by the given columns. + + Arguments: + by: Column(s) names to sort by. + + *more_by: Additional columns to sort by, specified as positional + arguments. + + descending: Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + + Examples: + Pass a single column name to sort by that column. + + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "a": [1, 2, None], + ... "b": [6.0, 5.0, 4.0], + ... "c": ["a", "c", "b"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> dframe = df.sort("a") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞══════╪═════╪═════╡ + │ null ┆ 4.0 ┆ b │ + │ 1 ┆ 6.0 ┆ a │ + │ 2 ┆ 5.0 ┆ c │ + └──────┴─────┴─────┘ + + Sort by multiple columns by passing a list of columns. + + >>> dframe = df.sort(["c", "a"], descending=True) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞══════╪═════╪═════╡ + │ 2 ┆ 5.0 ┆ c │ + │ null ┆ 4.0 ┆ b │ + │ 1 ┆ 6.0 ┆ a │ + └──────┴─────┴─────┘ + + Or use positional arguments to sort by multiple columns in the same way. + + >>> dframe = df.sort("c", "a", descending=[False, True]) + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞══════╪═════╪═════╡ + │ 1 ┆ 6.0 ┆ a │ + │ null ┆ 4.0 ┆ b │ + │ 2 ┆ 5.0 ┆ c │ + └──────┴─────┴─────┘ + """ return super().sort(by, *more_by, descending=descending) def join( From 90930a0918f27a60520f138fb583e07d223f991f Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Sat, 30 Mar 2024 18:16:10 +0100 Subject: [PATCH 3/5] Add docstring - Add `narwhals.dataframe.DataFrame.join` docstring --- narwhals/dataframe.py | 137 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 818f9fde5..d004ee69c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -957,6 +957,143 @@ def join( left_on: str | list[str], right_on: str | list[str], ) -> Self: + r""" + Join in SQL-like fashion. + + Arguments: + other: DataFrame to join with. + + how: {'inner', 'left', 'outer', 'semi', 'anti', 'cross', 'outer_coalesce'} + Join strategy. + + * *inner*: Returns rows that have matching values in both + tables + * *left*: Returns all rows from the left table, and the + matched rows from the right table + * *outer*: Returns all rows when there is a match in either + left or right table + * *outer_coalesce*: Same as 'outer', but coalesces the key + columns + * *cross*: Returns the Cartesian product of rows from both + tables + * *semi*: Filter rows that have a match in the right table. + * *anti*: Filter rows that not have a match in the right + table. + + Note: + A left join preserves the row order of the left DataFrame. + + left_on: Name(s) of the left join column(s). + + right_on: Name(s) of the right join column(s). + + Returns: DataFrame + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame( + ... { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + ... ) + >>> other_df_pl = pl.DataFrame( + ... { + ... "apple": ["x", "y", "z"], + ... "ham": ["a", "b", "d"], + ... } + ... ) + >>> df = nw.DataFrame(df_pl) + >>> other_df = nw.DataFrame(other_df_pl) + >>> dframe = df.join(other_df, left_on="ham", right_on="ham") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (2, 4) + ┌─────┬─────┬─────┬───────┐ + │ foo ┆ bar ┆ ham ┆ apple │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str ┆ str │ + ╞═════╪═════╪═════╪═══════╡ + │ 1 ┆ 6.0 ┆ a ┆ x │ + │ 2 ┆ 7.0 ┆ b ┆ y │ + └─────┴─────┴─────┴───────┘ + + >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="outer") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (4, 5) + ┌──────┬──────┬──────┬───────┬───────────┐ + │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str ┆ str ┆ str │ + ╞══════╪══════╪══════╪═══════╪═══════════╡ + │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │ + │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │ + │ null ┆ null ┆ null ┆ z ┆ d │ + │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │ + └──────┴──────┴──────┴───────┴───────────┘ + + >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="left") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (3, 4) + ┌─────┬─────┬─────┬───────┐ + │ foo ┆ bar ┆ ham ┆ apple │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str ┆ str │ + ╞═════╪═════╪═════╪═══════╡ + │ 1 ┆ 6.0 ┆ a ┆ x │ + │ 2 ┆ 7.0 ┆ b ┆ y │ + │ 3 ┆ 8.0 ┆ c ┆ null │ + └─────┴─────┴─────┴───────┘ + + >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="semi") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6.0 ┆ a │ + │ 2 ┆ 7.0 ┆ b │ + └─────┴─────┴─────┘ + + >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="anti") + >>> dframe + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + | Use `narwhals.to_native()` to see native output | + └─────────────────────────────────────────────────┘ + >>> nw.to_native(dframe) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 3 ┆ 8.0 ┆ c │ + └─────┴─────┴─────┘ + """ return self._from_dataframe( self._dataframe.join( self._extract_native(other), From 36fdf658b79986d4d974c22292e9ef6172bd18be Mon Sep 17 00:00:00 2001 From: Franco Masotti Date: Sat, 30 Mar 2024 18:19:47 +0100 Subject: [PATCH 4/5] Fix - Fix return string --- narwhals/dataframe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index d004ee69c..758a2561c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -987,7 +987,8 @@ def join( right_on: Name(s) of the right join column(s). - Returns: DataFrame + Returns: + A new joined DataFrame Examples: >>> import polars as pl From 34767f4172e2b8d9a5cd259b9142836d45819228 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 31 Mar 2024 19:27:30 +0100 Subject: [PATCH 5/5] only keep inner join for now --- narwhals/dataframe.py | 86 +------------------------------------------ 1 file changed, 1 insertion(+), 85 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 758a2561c..f7074c897 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -963,25 +963,11 @@ def join( Arguments: other: DataFrame to join with. - how: {'inner', 'left', 'outer', 'semi', 'anti', 'cross', 'outer_coalesce'} + how: {'inner'} Join strategy. * *inner*: Returns rows that have matching values in both tables - * *left*: Returns all rows from the left table, and the - matched rows from the right table - * *outer*: Returns all rows when there is a match in either - left or right table - * *outer_coalesce*: Same as 'outer', but coalesces the key - columns - * *cross*: Returns the Cartesian product of rows from both - tables - * *semi*: Filter rows that have a match in the right table. - * *anti*: Filter rows that not have a match in the right - table. - - Note: - A left join preserves the row order of the left DataFrame. left_on: Name(s) of the left join column(s). @@ -1024,76 +1010,6 @@ def join( │ 1 ┆ 6.0 ┆ a ┆ x │ │ 2 ┆ 7.0 ┆ b ┆ y │ └─────┴─────┴─────┴───────┘ - - >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="outer") - >>> dframe - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(dframe) - shape: (4, 5) - ┌──────┬──────┬──────┬───────┬───────────┐ - │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str ┆ str ┆ str │ - ╞══════╪══════╪══════╪═══════╪═══════════╡ - │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │ - │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │ - │ null ┆ null ┆ null ┆ z ┆ d │ - │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │ - └──────┴──────┴──────┴───────┴───────────┘ - - >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="left") - >>> dframe - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(dframe) - shape: (3, 4) - ┌─────┬─────┬─────┬───────┐ - │ foo ┆ bar ┆ ham ┆ apple │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str ┆ str │ - ╞═════╪═════╪═════╪═══════╡ - │ 1 ┆ 6.0 ┆ a ┆ x │ - │ 2 ┆ 7.0 ┆ b ┆ y │ - │ 3 ┆ 8.0 ┆ c ┆ null │ - └─────┴─────┴─────┴───────┘ - - >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="semi") - >>> dframe - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(dframe) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6.0 ┆ a │ - │ 2 ┆ 7.0 ┆ b │ - └─────┴─────┴─────┘ - - >>> dframe = df.join(other_df, left_on="ham", right_on="ham", how="anti") - >>> dframe - ┌─────────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native()` to see native output | - └─────────────────────────────────────────────────┘ - >>> nw.to_native(dframe) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 3 ┆ 8.0 ┆ c │ - └─────┴─────┴─────┘ """ return self._from_dataframe( self._dataframe.join(