Skip to content

Commit

Permalink
add vertical dataframe concat
Browse files Browse the repository at this point in the history
  • Loading branch information
raisa committed Mar 28, 2024
1 parent fb75c28 commit 7dc0368
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 11 deletions.
8 changes: 5 additions & 3 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@


def concat(
items: Iterable[DataFrame | LazyFrame], *, how: Literal["horizontal"]
items: Iterable[DataFrame | LazyFrame], *, how: Literal["horizontal", "vertical"]
) -> DataFrame | LazyFrame:
if how != "horizontal":
raise NotImplementedError("Only horizontal concatenation is supported")
if how not in ("horizontal", "vertical"):
raise NotImplementedError(
"Only horizontal and vertical concatenations are supported"
)
if not items:
raise ValueError("No items to concatenate")
items = list(items)
Expand Down
19 changes: 12 additions & 7 deletions narwhals/pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from narwhals.pandas_like.utils import horizontal_concat
from narwhals.pandas_like.utils import parse_into_exprs
from narwhals.pandas_like.utils import series_from_iterable
from narwhals.pandas_like.utils import vertical_concat
from narwhals.utils import flatten_str

if TYPE_CHECKING:
Expand Down Expand Up @@ -186,10 +187,14 @@ def concat(
if len(kind) > 1:
msg = "Can only concat DataFrames or LazyFrames, not mixtures of the two"
raise TypeError(msg)
if how != "horizontal":
msg = "Only horizontal concatenation is supported for now"
raise TypeError(msg)
return PandasDataFrame(
horizontal_concat(dfs, implementation=self._implementation),
implementation=self._implementation,
)
if how == "horizontal":
return PandasDataFrame(
horizontal_concat(dfs, implementation=self._implementation),
implementation=self._implementation,
)
if how == "vertical":
return PandasDataFrame(
vertical_concat(dfs, implementation=self._implementation),
implementation=self._implementation,
)
raise NotImplementedError
33 changes: 32 additions & 1 deletion narwhals/pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str])

def horizontal_concat(dfs: list[Any], implementation: str) -> Any:
"""
Concatenate (native) DataFrames.
Concatenate (native) DataFrames horizontally.
Should be in namespace.
"""
Expand All @@ -275,6 +275,37 @@ def horizontal_concat(dfs: list[Any], implementation: str) -> Any:
raise TypeError(msg)


def vertical_concat(dfs: list[Any], implementation: str) -> Any:
"""
Concatenate (native) DataFrames vertically.
Should be in namespace.
"""
if not dfs:
msg = "No dataframes to concatenate"
raise TypeError(msg)
cols = set(dfs[0].columns)
for df in dfs:
cols_current = set(df.columns)
if cols_current != cols:
msg = "Unable to vstack, column names don't match"
raise TypeError(msg)
if implementation == "pandas":
import pandas as pd

return pd.concat(dfs, axis=0, copy=False)
if implementation == "cudf":
import cudf

return cudf.concat(dfs, axis=0)
if implementation == "modin":
import modin.pandas as mpd

return mpd.concat(dfs, axis=0)
msg = f"Unknown implementation: {implementation}"
raise TypeError(msg)


def dataframe_from_dict(data: dict[str, Any], implementation: str) -> Any:
"""Return native dataframe."""
if implementation == "pandas":
Expand Down
32 changes: 32 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]})
df_lazy_na = pl.LazyFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]})
df_right_pandas = pd.DataFrame({"c": [6, 12, -1], "d": [0, -4, 2]})
df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]})

if os.environ.get("CI", None):
import modin.pandas as mpd
Expand Down Expand Up @@ -365,3 +367,33 @@ def test_drop_nulls(df_raw: Any) -> None:
result = nw.to_native(df.select(nw.col("a").drop_nulls()))
expected = {"a": [3, 2]}
compare_dicts(result, expected)


@pytest.mark.parametrize(
("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)]
)
def test_concat_horizontal(df_raw: Any, df_raw_right: Any) -> None:
df_left = nw.LazyFrame(df_raw)
df_right = nw.LazyFrame(df_raw_right)
result = nw.concat([df_left, df_right], how="horizontal")
result_native = nw.to_native(result)
expected = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"z": [7.0, 8, 9],
"c": [6, 12, -1],
"d": [0, -4, 2],
}
compare_dicts(result_native, expected)


@pytest.mark.parametrize(
("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)]
)
def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None:
df_left = nw.LazyFrame(df_raw).rename({"a": "c", "b": "d"}).drop("z")
df_right = nw.LazyFrame(df_raw_right)
result = nw.concat([df_left, df_right], how="vertical")
result_native = nw.to_native(result)
expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]}
compare_dicts(result_native, expected)

0 comments on commit 7dc0368

Please sign in to comment.