diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index ce0cae8ac..4ec42ef59 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -404,6 +404,10 @@ def join( left_on: str | list[str] | None, right_on: str | list[str] | None, ) -> Self: + if isinstance(left_on, str): + left_on = [left_on] + if isinstance(right_on, str): + right_on = [right_on] if how == "cross": if ( self._implementation is Implementation.MODIN diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 6615d5031..6a1985f41 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -3,6 +3,7 @@ import re from datetime import datetime from typing import Any +from typing import Literal import pandas as pd import pytest @@ -14,18 +15,28 @@ def test_inner_join_two_keys(constructor: Any) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]} + data = { + "antananarivo": [1, 3, 2], + "bob": [4, 4, 6], + "zorro": [7.0, 8, 9], + "index": [0, 1, 2], + } df = nw.from_native(constructor(data)) df_right = df - result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") # type: ignore[arg-type] - result_on = df.join(df_right, on=["a", "b"], how="inner") # type: ignore[arg-type] + result = df.join( + df_right, # type: ignore[arg-type] + left_on=["antananarivo", "bob"], + right_on=["antananarivo", "bob"], + how="inner", + ) + result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner") # type: ignore[arg-type] result = result.sort("index").drop("index_right") result_on = result_on.sort("index").drop("index_right") expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8, 9], - "z_right": [7.0, 8, 9], + "antananarivo": [1, 3, 2], + "bob": [4, 4, 6], + "zorro": [7.0, 8, 9], + "zorro_right": [7.0, 8, 9], "index": [0, 1, 2], } compare_dicts(result, expected) @@ -33,19 +44,29 @@ def test_inner_join_two_keys(constructor: Any) -> None: def test_inner_join_single_key(constructor: Any) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]} + data = { + "antananarivo": [1, 3, 2], + "bob": [4, 4, 6], + "zorro": [7.0, 8, 9], + "index": [0, 1, 2], + } df = nw.from_native(constructor(data)) df_right = df - result = df.join(df_right, left_on="a", right_on="a", how="inner").sort("index") # type: ignore[arg-type] - result_on = df.join(df_right, on="a", how="inner").sort("index") # type: ignore[arg-type] + result = df.join( + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + how="inner", + ).sort("index") + result_on = df.join(df_right, on="antananarivo", how="inner").sort("index") # type: ignore[arg-type] result = result.drop("index_right") result_on = result_on.drop("index_right") expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "b_right": [4, 4, 6], - "z": [7.0, 8, 9], - "z_right": [7.0, 8, 9], + "antananarivo": [1, 3, 2], + "bob": [4, 4, 6], + "bob_right": [4, 4, 6], + "zorro": [7.0, 8, 9], + "zorro_right": [7.0, 8, 9], "index": [0, 1, 2], } compare_dicts(result, expected) @@ -53,30 +74,30 @@ def test_inner_join_single_key(constructor: Any) -> None: def test_cross_join(constructor: Any) -> None: - data = {"a": [1, 3, 2]} + data = {"antananarivo": [1, 3, 2]} df = nw.from_native(constructor(data)) - result = df.join(df, how="cross").sort("a", "a_right") # type: ignore[arg-type] + result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") # type: ignore[arg-type] expected = { - "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "a_right": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3], } compare_dicts(result, expected) with pytest.raises( ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join" ): - df.join(df, how="cross", left_on="a") # type: ignore[arg-type] + df.join(df, how="cross", left_on="antananarivo") # type: ignore[arg-type] def test_cross_join_non_pandas() -> None: - data = {"a": [1, 3, 2]} + data = {"antananarivo": [1, 3, 2]} df = nw.from_native(pd.DataFrame(data)) # HACK to force testing for a non-pandas codepath df._compliant_frame._implementation = Implementation.MODIN result = df.join(df, how="cross") # type: ignore[arg-type] expected = { - "a": [1, 1, 1, 3, 3, 3, 2, 2, 2], - "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2], + "antananarivo": [1, 1, 1, 3, 3, 3, 2, 2, 2], + "antananarivo_right": [1, 3, 2, 1, 3, 2, 1, 3, 2], } compare_dicts(result, expected) @@ -84,9 +105,17 @@ def test_cross_join_non_pandas() -> None: @pytest.mark.parametrize( ("join_key", "filter_expr", "expected"), [ - (["a", "b"], (nw.col("b") < 5), {"a": [2], "b": [6], "z": [9]}), - (["b"], (nw.col("b") < 5), {"a": [2], "b": [6], "z": [9]}), - (["b"], (nw.col("b") > 5), {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]}), + ( + ["antananarivo", "bob"], + (nw.col("bob") < 5), + {"antananarivo": [2], "bob": [6], "zorro": [9]}, + ), + (["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zorro": [9]}), + ( + ["bob"], + (nw.col("bob") > 5), + {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7.0, 8.0]}, + ), ], ) def test_anti_join( @@ -95,7 +124,7 @@ def test_anti_join( filter_expr: nw.Expr, expected: dict[str, list[Any]], ) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type] @@ -105,9 +134,21 @@ def test_anti_join( @pytest.mark.parametrize( ("join_key", "filter_expr", "expected"), [ - (["a"], (nw.col("b") > 5), {"a": [2], "b": [6], "z": [9]}), - (["b"], (nw.col("b") < 5), {"a": [1, 3], "b": [4, 4], "z": [7, 8]}), - (["a", "b"], (nw.col("b") < 5), {"a": [1, 3], "b": [4, 4], "z": [7, 8]}), + ( + ["antananarivo"], + (nw.col("bob") > 5), + {"antananarivo": [2], "bob": [6], "zorro": [9]}, + ), + ( + ["bob"], + (nw.col("bob") < 5), + {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]}, + ), + ( + ["antananarivo", "bob"], + (nw.col("bob") < 5), + {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]}, + ), ], ) def test_semi_join( @@ -116,16 +157,18 @@ def test_semi_join( filter_expr: nw.Expr, expected: dict[str, list[Any]], ) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) - result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort("a") # type: ignore[arg-type] + result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type] + "antananarivo" + ) compare_dicts(result, expected) @pytest.mark.parametrize("how", ["right", "full"]) def test_join_not_implemented(constructor: Any, how: str) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -134,24 +177,28 @@ def test_join_not_implemented(constructor: Any, how: str) -> None: f"Only the following join strategies are supported: ('inner', 'left', 'cross', 'anti', 'semi'); found '{how}'." ), ): - df.join(df, left_on="a", right_on="a", how=how) # type: ignore[arg-type] + df.join(df, left_on="antananarivo", right_on="antananarivo", how=how) # type: ignore[arg-type] @pytest.mark.filterwarnings("ignore:the default coalesce behavior") def test_left_join(constructor: Any) -> None: - data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6], "index": [0.0, 1.0, 2.0]} - data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7], "index": [0.0, 1.0, 2.0]} + data_left = { + "antananarivo": [1.0, 2, 3], + "bob": [4.0, 5, 6], + "index": [0.0, 1.0, 2.0], + } + data_right = {"antananarivo": [1.0, 2, 3], "c": [4.0, 5, 7], "index": [0.0, 1.0, 2.0]} df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="b", right_on="c", how="left").select( # type: ignore[arg-type] + result = df_left.join(df_right, left_on="bob", right_on="c", how="left").select( # type: ignore[arg-type] nw.all().fill_null(float("nan")) ) result = result.sort("index") result = result.drop("index_right") expected = { - "a": [1, 2, 3], - "b": [4, 5, 6], - "a_right": [1, 2, float("nan")], + "antananarivo": [1, 2, 3], + "bob": [4, 5, 6], + "antananarivo_right": [1, 2, float("nan")], "index": [0, 1, 2], } compare_dicts(result, expected) @@ -159,54 +206,62 @@ def test_left_join(constructor: Any) -> None: @pytest.mark.filterwarnings("ignore: the default coalesce behavior") def test_left_join_multiple_column(constructor: Any) -> None: - data_left = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]} - data_right = {"a": [1, 2, 3], "c": [4, 5, 6], "index": [0, 1, 2]} + data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]} + data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "index": [0, 1, 2]} df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on=["a", "b"], right_on=["a", "c"], how="left") # type: ignore[arg-type] + result = df_left.join( + df_right, # type: ignore[arg-type] + left_on=["antananarivo", "bob"], + right_on=["antananarivo", "c"], + how="left", + ) result = result.sort("index") result = result.drop("index_right") - expected = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]} + expected = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]} compare_dicts(result, expected) @pytest.mark.filterwarnings("ignore: the default coalesce behavior") def test_left_join_overlapping_column(constructor: Any) -> None: data_left = { - "a": [1.0, 2, 3], - "b": [4.0, 5, 6], + "antananarivo": [1.0, 2, 3], + "bob": [4.0, 5, 6], "d": [1.0, 4, 2], "index": [0.0, 1.0, 2.0], } data_right = { - "a": [1.0, 2, 3], + "antananarivo": [1.0, 2, 3], "c": [4.0, 5, 6], "d": [1.0, 4, 2], "index": [0.0, 1.0, 2.0], } df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="b", right_on="c", how="left").sort("index") # type: ignore[arg-type] + result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("index") # type: ignore[arg-type] result = result.drop("index_right") expected: dict[str, list[Any]] = { - "a": [1, 2, 3], - "b": [4, 5, 6], + "antananarivo": [1, 2, 3], + "bob": [4, 5, 6], "d": [1, 4, 2], - "a_right": [1, 2, 3], + "antananarivo_right": [1, 2, 3], "d_right": [1, 4, 2], "index": [0, 1, 2], } compare_dicts(result, expected) - result = df_left.join(df_right, left_on="a", right_on="d", how="left").select( # type: ignore[arg-type] - nw.all().fill_null(float("nan")) - ) + result = df_left.join( + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="d", + how="left", + ).select(nw.all().fill_null(float("nan"))) result = result.sort("index") result = result.drop("index_right") expected = { - "a": [1, 2, 3], - "b": [4, 5, 6], + "antananarivo": [1, 2, 3], + "bob": [4, 5, 6], "d": [1, 4, 2], - "a_right": [1.0, 3.0, float("nan")], + "antananarivo_right": [1.0, 3.0, float("nan")], "c": [4.0, 6.0, float("nan")], "index": [0, 1, 2], } @@ -215,7 +270,7 @@ def test_left_join_overlapping_column(constructor: Any) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) def test_join_keys_exceptions(constructor: Any, how: str) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -227,17 +282,17 @@ def test_join_keys_exceptions(constructor: Any, how: str) -> None: ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, left_on="a") # type: ignore[arg-type] + df.join(df, how=how, left_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, right_on="a") # type: ignore[arg-type] + df.join(df, how=how, right_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=f"If `on` is specified, `left_on` and `right_on` should be None for {how}.", ): - df.join(df, how=how, on="a", right_on="a") # type: ignore[arg-type] + df.join(df, how=how, on="antananarivo", right_on="antananarivo") # type: ignore[arg-type] def test_joinasof_numeric(constructor: Any, request: Any) -> None: @@ -247,28 +302,44 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None: ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor({"a": [1, 5, 10], "val": ["a", "b", "c"]})).sort("a") + df = nw.from_native( + constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) + ).sort("antananarivo") df_right = nw.from_native( - constructor({"a": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("a") - result_backward = df.join_asof(df_right, left_on="a", right_on="a") # type: ignore[arg-type] - result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward") # type: ignore[arg-type] - result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest") # type: ignore[arg-type] - result_backward_on = df.join_asof(df_right, on="a") # type: ignore[arg-type] - result_forward_on = df.join_asof(df_right, on="a", strategy="forward") # type: ignore[arg-type] - result_nearest_on = df.join_asof(df_right, on="a", strategy="nearest") # type: ignore[arg-type] + constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) + ).sort("antananarivo") + result_backward = df.join_asof( + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + ) + result_forward = df.join_asof( + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + strategy="forward", + ) + result_nearest = df.join_asof( + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + strategy="nearest", + ) + result_backward_on = df.join_asof(df_right, on="antananarivo") # type: ignore[arg-type] + result_forward_on = df.join_asof(df_right, on="antananarivo", strategy="forward") # type: ignore[arg-type] + result_nearest_on = df.join_asof(df_right, on="antananarivo", strategy="nearest") # type: ignore[arg-type] expected_backward = { - "a": [1, 5, 10], + "antananarivo": [1, 5, 10], "val": ["a", "b", "c"], "val_right": [1, 3, 7], } expected_forward = { - "a": [1, 5, 10], + "antananarivo": [1, 5, 10], "val": ["a", "b", "c"], "val_right": [1, 6, float("nan")], } expected_nearest = { - "a": [1, 5, 10], + "antananarivo": [1, 5, 10], "val": ["a", "b", "c"], "val_right": [1, 6, 7], } @@ -366,16 +437,24 @@ def test_joinasof_by(constructor: Any, request: Any) -> None: ): request.applymarker(pytest.mark.xfail) df = nw.from_native( - constructor({"a": [1, 5, 7, 10], "b": ["D", "D", "C", "A"], "c": [9, 2, 1, 1]}) - ).sort("a") + constructor( + { + "antananarivo": [1, 5, 7, 10], + "bob": ["D", "D", "C", "A"], + "c": [9, 2, 1, 1], + } + ) + ).sort("antananarivo") df_right = nw.from_native( - constructor({"a": [1, 4, 5, 8], "b": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]}) - ).sort("a") - result = df.join_asof(df_right, on="a", by_left="b", by_right="b") # type: ignore[arg-type] - result_by = df.join_asof(df_right, on="a", by="b") # type: ignore[arg-type] + constructor( + {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} + ) + ).sort("antananarivo") + result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") # type: ignore[arg-type] + result_by = df.join_asof(df_right, on="antananarivo", by="bob") # type: ignore[arg-type] expected = { - "a": [1, 5, 7, 10], - "b": ["D", "D", "C", "A"], + "antananarivo": [1, 5, 7, 10], + "bob": ["D", "D", "C", "A"], "c": [9, 2, 1, 1], "d": [1, 3, float("nan"), 4], } @@ -384,31 +463,38 @@ def test_joinasof_by(constructor: Any, request: Any) -> None: @pytest.mark.parametrize("strategy", ["back", "furthest"]) -def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} +def test_joinasof_not_implemented( + constructor: Any, strategy: Literal["backward", "forward"] +) -> None: + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( NotImplementedError, match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.", ): - df.join_asof(df, left_on="a", right_on="a", strategy=strategy) # type: ignore[arg-type] + df.join_asof( + df, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + strategy=strategy, + ) def test_joinasof_keys_exceptions(constructor: Any) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, left_on="a") # type: ignore[arg-type] + df.join_asof(df, left_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, right_on="a") # type: ignore[arg-type] + df.join_asof(df, right_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", @@ -418,48 +504,53 @@ def test_joinasof_keys_exceptions(constructor: Any) -> None: ValueError, match="If `on` is specified, `left_on` and `right_on` should be None.", ): - df.join_asof(df, left_on="a", right_on="a", on="a") # type: ignore[arg-type] + df.join_asof( + df, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + on="antananarivo", + ) with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None.", ): - df.join_asof(df, left_on="a", on="a") # type: ignore[arg-type] + df.join_asof(df, left_on="antananarivo", on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None.", ): - df.join_asof(df, right_on="a", on="a") # type: ignore[arg-type] + df.join_asof(df, right_on="antananarivo", on="antananarivo") # type: ignore[arg-type] def test_joinasof_by_exceptions(constructor: Any) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None.", ): - df.join_asof(df, on="a", by_left="b", by_right="b", by="b") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="a", by_left="b") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="a", by_right="b") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_right="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None.", ): - df.join_asof(df, on="a", by_left="b", by="b") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob", by="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None.", ): - df.join_asof(df, on="a", by_right="b", by="b") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_right="bob", by="bob") # type: ignore[arg-type]