Skip to content

Commit

Permalink
chore: split eager-only and eager+lazy tests a bit (#652)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Jul 28, 2024
1 parent 946d9d7 commit d70e0b3
Show file tree
Hide file tree
Showing 47 changed files with 412 additions and 220 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ jobs:
key: ${{ runner.os }}-build-${{ matrix.python-version }}
- name: install-reqs
run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
- name: install-modin
run: python -m pip install --upgrade modin[dask]
- name: show-deps
run: pip freeze
- name: Run pytest
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,15 @@ def __lt__(self, other: ArrowExpr | Any) -> Self:
def __and__(self, other: ArrowExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__and__", other=other)

def __rand__(self, other: ArrowExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__rand__", other=other)

def __or__(self, other: ArrowExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__or__", other=other)

def __ror__(self, other: ArrowExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__ror__", other=other)

def __add__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__add__", other)

Expand Down
12 changes: 12 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,24 @@ def __and__(self, other: Any) -> Self:
other = validate_column_comparand(other)
return self._from_native_series(pc.and_kleene(ser, other))

def __rand__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
ser = self._native_series
other = validate_column_comparand(other)
return self._from_native_series(pc.and_kleene(other, ser))

def __or__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
ser = self._native_series
other = validate_column_comparand(other)
return self._from_native_series(pc.or_kleene(ser, other))

def __ror__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
ser = self._native_series
other = validate_column_comparand(other)
return self._from_native_series(pc.or_kleene(other, ser))

def __add__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
other = validate_column_comparand(other)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ filterwarnings = [
'ignore:np.find_common_type is deprecated',
'ignore:is_sparse is deprecated and will be removed',
'ignore:Passing a BlockManager to DataFrame is deprecated',
'ignore:.*defaulting to pandas implementation',
'ignore:.*implementation has mismatches with pandas',
]
xfail_strict = true
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
Expand Down
23 changes: 20 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
from typing import Any
from typing import Callable

Expand All @@ -6,10 +7,17 @@
import pyarrow as pa
import pytest

from narwhals.dependencies import get_dask_dataframe
from narwhals.dependencies import get_modin
from narwhals.typing import IntoDataFrame
from narwhals.typing import IntoFrame
from narwhals.utils import parse_version

with contextlib.suppress(ImportError):
import modin.pandas # noqa: F401
with contextlib.suppress(ImportError):
import dask.dataframe # noqa: F401


def pytest_addoption(parser: Any) -> None:
parser.addoption(
Expand Down Expand Up @@ -45,7 +53,7 @@ def pandas_pyarrow_constructor(obj: Any) -> IntoDataFrame:

def modin_constructor(obj: Any) -> IntoDataFrame: # pragma: no cover
mpd = get_modin()
return mpd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") # type: ignore[no-any-return]
return mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") # type: ignore[no-any-return]


def polars_eager_constructor(obj: Any) -> IntoDataFrame:
Expand All @@ -56,6 +64,11 @@ def polars_lazy_constructor(obj: Any) -> pl.LazyFrame:
return pl.LazyFrame(obj)


def dask_lazy_constructor(obj: Any) -> IntoFrame: # pragma: no cover
dd = get_dask_dataframe()
return dd.from_pandas(pd.DataFrame(obj)) # type: ignore[no-any-return]


def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
return pa.table(obj) # type: ignore[no-any-return]

Expand All @@ -70,16 +83,20 @@ def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
eager_constructors = [pandas_constructor]

eager_constructors.extend([polars_eager_constructor, pyarrow_table_constructor])
lazy_constructors = [polars_lazy_constructor]

if get_modin() is not None: # pragma: no cover
eager_constructors.append(modin_constructor)
# TODO(unassigned): when Dask gets better support, remove the "False and" part
if False and get_dask_dataframe() is not None: # pragma: no cover # noqa: SIM223
lazy_constructors.append(dask_lazy_constructor)


@pytest.fixture(params=eager_constructors)
def constructor(request: Any) -> Callable[[Any], IntoDataFrame]:
return request.param # type: ignore[no-any-return]


@pytest.fixture(params=[*eager_constructors, polars_lazy_constructor])
def constructor_with_lazy(request: Any) -> Callable[[Any], Any]:
@pytest.fixture(params=[*eager_constructors, *lazy_constructors])
def constructor_lazy(request: Any) -> Callable[[Any], Any]:
return request.param # type: ignore[no-any-return]
11 changes: 8 additions & 3 deletions tests/expr_and_series/abs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
from tests.utils import compare_dicts


def test_abs(constructor: Any) -> None:
df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}), eager_only=True)
def test_abs(constructor_lazy: Any) -> None:
df = nw.from_native(constructor_lazy({"a": [1, 2, 3, -4, 5]}))
result = df.select(b=nw.col("a").abs())
expected = {"b": [1, 2, 3, 4, 5]}
compare_dicts(result, expected)
result = df.select(b=df["a"].abs())


def test_abs_series(constructor: Any) -> None:
df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}), eager_only=True)
result = {"b": df["a"].abs()}
expected = {"b": [1, 2, 3, 4, 5]}
compare_dicts(result, expected)
20 changes: 14 additions & 6 deletions tests/expr_and_series/all_horizontal_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
from typing import Any

import numpy as np
import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts


@pytest.mark.parametrize("col_expr", [np.array([False, False, True]), nw.col("a"), "a"])
def test_allh(constructor: Any, col_expr: Any) -> None:
def test_allh(constructor_lazy: Any) -> None:
data = {
"a": [False, False, True],
"b": [False, True, True],
}
df = nw.from_native(constructor_lazy(data))
result = df.select(all=nw.all_horizontal(nw.col("a"), nw.col("b")))

expected = {"all": [False, False, True]}
compare_dicts(result, expected)


def test_allh_series(constructor: Any) -> None:
data = {
"a": [False, False, True],
"b": [False, True, True],
}
df = nw.from_native(constructor(data), eager_only=True)
result = df.select(all=nw.all_horizontal(col_expr, nw.col("b")))
result = df.select(all=nw.all_horizontal(df["a"], df["b"]))

expected = {"all": [False, False, True]}
compare_dicts(result, expected)
4 changes: 2 additions & 2 deletions tests/expr_and_series/any_all_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from tests.utils import compare_dicts


def test_any_all(constructor: Any) -> None:
def test_any_all(constructor_lazy: Any) -> None:
df = nw.from_native(
constructor(
constructor_lazy(
{
"a": [True, False, True],
"b": [True, True, True],
Expand Down
10 changes: 3 additions & 7 deletions tests/expr_and_series/any_horizontal_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
from typing import Any

import numpy as np
import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts


@pytest.mark.parametrize("col_expr", [np.array([False, False, True]), nw.col("a"), "a"])
def test_anyh(constructor: Any, col_expr: Any) -> None:
def test_anyh(constructor_lazy: Any) -> None:
data = {
"a": [False, False, True],
"b": [False, True, True],
}
df = nw.from_native(constructor(data), eager_only=True)
result = df.select(any=nw.any_horizontal(col_expr, nw.col("b")))
df = nw.from_native(constructor_lazy(data))
result = df.select(any=nw.any_horizontal(nw.col("a"), nw.col("b")))

expected = {"any": [False, True, True]}
compare_dicts(result, expected)
9 changes: 7 additions & 2 deletions tests/expr_and_series/arg_true_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
from tests.utils import compare_dicts


def test_arg_true(constructor: Any) -> None:
df = nw.from_native(constructor({"a": [1, None, None, 3]}), eager_only=True)
def test_arg_true(constructor_lazy: Any) -> None:
df = nw.from_native(constructor_lazy({"a": [1, None, None, 3]}))
result = df.select(nw.col("a").is_null().arg_true())
expected = {"a": [1, 2]}
compare_dicts(result, expected)


def test_arg_true_series(constructor: Any) -> None:
df = nw.from_native(constructor({"a": [1, None, None, 3]}), eager_only=True)
result = df.select(df["a"].is_null().arg_true())
expected = {"a": [1, 2]}
compare_dicts(result, expected)
assert "a" in df # cheeky test to hit `__contains__` method
76 changes: 62 additions & 14 deletions tests/expr_and_series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,22 @@
("__sub__", 1, [0, 1, 2]),
("__mul__", 2, [2, 4, 6]),
("__truediv__", 2.0, [0.5, 1.0, 1.5]),
("__truediv__", 1, [1, 2, 3]),
("__floordiv__", 2, [0, 1, 1]),
("__mod__", 2, [1, 0, 1]),
("__pow__", 2, [1, 4, 9]),
],
)
def test_arithmetic(
attr: str, rhs: Any, expected: list[Any], constructor: Any, request: Any
attr: str, rhs: Any, expected: list[Any], constructor_lazy: Any, request: Any
) -> None:
if "pandas_pyarrow" in str(constructor) and attr == "__mod__":
request.applymarker(pytest.mark.xfail)

# pyarrow case
if "pyarrow_table" in str(constructor) and attr == "__mod__":
if attr == "__mod__" and any(
x in str(constructor_lazy) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3]}
df = nw.from_native(constructor(data))
data = {"a": [1.0, 2, 3]}
df = nw.from_native(constructor_lazy(data))
result = df.select(getattr(nw.col("a"), attr)(rhs))
compare_dicts(result, {"a": expected})

Expand All @@ -49,18 +48,67 @@ def test_arithmetic(
],
)
def test_right_arithmetic(
attr: str, rhs: Any, expected: list[Any], constructor: Any, request: Any
attr: str, rhs: Any, expected: list[Any], constructor_lazy: Any, request: Any
) -> None:
if "pandas_pyarrow" in str(constructor) and attr in {"__rmod__"}:
if attr == "__rmod__" and any(
x in str(constructor_lazy) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
request.applymarker(pytest.mark.xfail)

# pyarrow case
if "table" in str(constructor) and attr in {"__rmod__"}:
data = {"a": [1, 2, 3]}
df = nw.from_native(constructor_lazy(data))
result = df.select(a=getattr(nw.col("a"), attr)(rhs))
compare_dicts(result, {"a": expected})


@pytest.mark.parametrize(
("attr", "rhs", "expected"),
[
("__add__", 1, [2, 3, 4]),
("__sub__", 1, [0, 1, 2]),
("__mul__", 2, [2, 4, 6]),
("__truediv__", 2.0, [0.5, 1.0, 1.5]),
("__truediv__", 1, [1, 2, 3]),
("__floordiv__", 2, [0, 1, 1]),
("__mod__", 2, [1, 0, 1]),
("__pow__", 2, [1, 4, 9]),
],
)
def test_arithmetic_series(
attr: str, rhs: Any, expected: list[Any], constructor: Any, request: Any
) -> None:
if attr == "__mod__" and any(
x in str(constructor) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3]}
df = nw.from_native(constructor(data))
result = df.select(a=getattr(nw.col("a"), attr)(rhs))
df = nw.from_native(constructor(data), eager_only=True)
result = df.select(getattr(df["a"], attr)(rhs))
compare_dicts(result, {"a": expected})


@pytest.mark.parametrize(
("attr", "rhs", "expected"),
[
("__radd__", 1, [2, 3, 4]),
("__rsub__", 1, [0, -1, -2]),
("__rmul__", 2, [2, 4, 6]),
("__rtruediv__", 2.0, [2, 1, 2 / 3]),
("__rfloordiv__", 2, [2, 1, 0]),
("__rmod__", 2, [0, 0, 2]),
("__rpow__", 2, [2, 4, 8]),
],
)
def test_right_arithmetic_series(
attr: str, rhs: Any, expected: list[Any], constructor: Any, request: Any
) -> None:
if attr == "__rmod__" and any(
x in str(constructor) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3]}
df = nw.from_native(constructor(data), eager_only=True)
result = df.select(a=getattr(df["a"], attr)(rhs))
compare_dicts(result, {"a": expected})
Loading

0 comments on commit d70e0b3

Please sign in to comment.