From 33f85c804ef15d73147af03fca018ba730d60e17 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:59:00 +0300 Subject: [PATCH 01/11] coverage --- narwhals/_pandas_like/utils.py | 14 ++------------ narwhals/series.py | 3 --- tests/test_common.py | 8 ++++++++ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index c5b1b5d42..be620c19c 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -80,13 +80,7 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any: ) raise ValueError(msg) return other._series - if isinstance(other, list) and len(other) > 1: - # e.g. `plx.all() + plx.all()` - msg = "Multi-output expressions are not supported in this context" - raise ValueError(msg) - if isinstance(other, list): - other = other[0] - return other + raise AssertionError("Please report a bug") def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any: @@ -101,12 +95,8 @@ def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any: def parse_into_exprs( implementation: str, *exprs: IntoPandasExpr | Iterable[IntoPandasExpr], - **named_exprs: IntoPandasExpr, ) -> list[PandasExpr]: - out = [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)] - for name, expr in named_exprs.items(): - out.append(parse_into_expr(implementation, expr).alias(name)) - return out + return [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)] def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExpr: diff --git a/narwhals/series.py b/narwhals/series.py index bb5898817..c69211899 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -78,9 +78,6 @@ def dtype(self) -> Any: def shape(self) -> tuple[int]: return self._series.shape # type: ignore[no-any-return] - def rename(self, name: str) -> Self: - return self._from_series(self._series.rename(name)) - def cast( self, dtype: Any, diff --git a/tests/test_common.py b/tests/test_common.py index a570e5303..a35dbd414 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -509,3 +509,11 @@ def test_reindex(df_raw: Any) -> None: df = nw.DataFrame(df_raw) with pytest.raises(RuntimeError, match="implicit index alignment"): df.select("a", df["b"].sort()) + + s = df["a"] + with pytest.raises(ValueError, match="index alignment"): + nw.to_native(s > s.sort()) + with pytest.raises(ValueError, match="index alignment"): + nw.to_native(df.with_columns(s.sort())) + with pytest.raises(ValueError, match="Multi-output expressions are not supported"): + nw.to_native(df.with_columns(nw.all() + nw.all())) From 7fc48679b8e8f8c6835b6dd7c5d68a39a1eca247 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 15:17:15 +0300 Subject: [PATCH 02/11] coverage --- narwhals/_pandas_like/utils.py | 12 ++++++------ tests/test_common.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index be620c19c..310cb1225 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -112,8 +112,8 @@ def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExp return plx._create_expr_from_series(into_expr) if isinstance(into_expr, str): return plx.col(into_expr) - msg = f"Expected IntoExpr, got {type(into_expr)}" - raise TypeError(msg) + msg = f"Expected IntoExpr, got {type(into_expr)}" # pragma: no cover + raise AssertionError(msg) def evaluate_into_expr( @@ -140,8 +140,8 @@ def evaluate_into_exprs( for name, expr in named_exprs.items(): evaluated_expr = evaluate_into_expr(df, expr) if len(evaluated_expr) > 1: - msg = "Named expressions must return a single column" - raise ValueError(msg) + msg = "Named expressions must return a single column" # pragma: no cover + raise AssertionError(msg) series.append(evaluated_expr[0].alias(name)) return series @@ -194,8 +194,8 @@ def func(df: PandasDataFrame) -> list[PandasSeries]: def item(s: Any) -> Any: # cuDF doesn't have Series.item(). if len(s) != 1: - msg = "Can only convert a Series of length 1 to a scalar" - raise ValueError(msg) + msg = "Can only convert a Series of length 1 to a scalar" # pragma: no cover + raise AssertionError(msg) return s.iloc[0] diff --git a/tests/test_common.py b/tests/test_common.py index a35dbd414..fc5ce1606 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -113,6 +113,10 @@ def test_double(df_raw: Any) -> None: result_native = nw.to_native(result) expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} compare_dicts(result_native, expected) + result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2) + result_native = nw.to_native(result) + expected = {"o": [1, 3, 2], "a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} + compare_dicts(result_native, expected) @pytest.mark.parametrize( @@ -171,6 +175,14 @@ def test_double_selected(df_raw: Any) -> None: result_native = nw.to_native(result) expected = {"a": [2, 6, 4], "b": [8, 8, 12]} compare_dicts(result_native, expected) + result = df.select("z", nw.col("a", "b") * 2) + result_native = nw.to_native(result) + expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]} + compare_dicts(result_native, expected) + result = df.select("a").select(nw.col("a") + nw.all()) + result_native = nw.to_native(result) + expected = {"a": [2, 6, 4]} + compare_dicts(result_native, expected) @pytest.mark.parametrize("df_raw", [df_pandas, df_lazy]) From d6e535036b1e65186c35fa7c332f3502d3db7ffd Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 15:25:50 +0300 Subject: [PATCH 03/11] coverage --- narwhals/_pandas_like/utils.py | 37 ---------------------------------- 1 file changed, 37 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 310cb1225..96ed0bf3e 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -9,7 +9,6 @@ from narwhals.utils import flatten from narwhals.utils import isinstance_or_issubclass from narwhals.utils import parse_version -from narwhals.utils import remove_prefix T = TypeVar("T") @@ -209,42 +208,6 @@ def is_simple_aggregation(expr: PandasExpr) -> bool: ) -def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) -> Any: - """ - Use fastpath for simple aggregations if possible. - - If an aggregation is simple (e.g. `pl.col('a').mean()`), then pandas-like - implementations have a fastpath we can use. - - For example, `df.group_by('a').agg(pl.col('b').mean())` can be evaluated - as `df.groupby('a')['b'].mean()`, whereas - `df.group_by('a').agg(mean=(pl.col('b') - pl.col('c').mean()).mean())` - requires a lambda function, which is slower. - - Returns naive DataFrame. - """ - if expr._depth == 0: - # e.g. agg(pl.len()) - df = getattr(grouped, expr._function_name.replace("len", "size"))() - df = ( - df.drop(columns=keys) - if len(df.shape) > 1 - else df.reset_index(drop=True).to_frame("size") - ) - return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index] - if expr._root_names is None or expr._output_names is None: - msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug." - raise AssertionError(msg) - if len(expr._root_names) != len(expr._output_names): - msg = "Expected expr to have same number of root_names and output_names, but they are different. Please report a bug." - raise AssertionError(msg) - new_names = dict(zip(expr._root_names, expr._output_names)) - function_name = remove_prefix(expr._function_name, "col->") - return getattr(grouped[expr._root_names], function_name)()[expr._root_names].rename( - columns=new_names - ) - - def horizontal_concat(dfs: list[Any], implementation: str) -> Any: """ Concatenate (native) DataFrames horizontally. From a907fa8b10e27caab2f670d3ab72842413c93a32 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:00:13 +0300 Subject: [PATCH 04/11] coverage --- narwhals/_pandas_like/namespace.py | 1 + narwhals/_pandas_like/utils.py | 14 ++++---- narwhals/series.py | 12 ++++++- tests/test_series.py | 54 ++++++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index eb2de0705..547d40eb1 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -33,6 +33,7 @@ class PandasNamespace: Float32 = dtypes.Float32 Boolean = dtypes.Boolean String = dtypes.String + Datetime = dtypes.Datetime def make_native_series(self, name: str, data: list[Any], index: Any) -> Any: if self._implementation == "pandas": diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 96ed0bf3e..4fd9017ef 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -316,8 +316,8 @@ def translate_dtype(dtype: Any) -> DType: return dtypes.Object() if str(dtype).startswith("datetime64"): return dtypes.Datetime() - msg = f"Unknown dtype: {dtype}" - raise TypeError(msg) + msg = f"Unknown dtype: {dtype}" # pragma: no cover + raise AssertionError(msg) def reverse_translate_dtype(dtype: DType | type[DType]) -> Any: @@ -333,8 +333,8 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any: return "int32" if isinstance_or_issubclass(dtype, dtypes.Int16): return "int16" - if isinstance_or_issubclass(dtype, dtypes.UInt8): - return "uint8" + if isinstance_or_issubclass(dtype, dtypes.Int8): + return "int8" if isinstance_or_issubclass(dtype, dtypes.UInt64): return "uint64" if isinstance_or_issubclass(dtype, dtypes.UInt32): @@ -347,8 +347,10 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any: return "object" if isinstance_or_issubclass(dtype, dtypes.Boolean): return "bool" - msg = f"Unknown dtype: {dtype}" - raise TypeError(msg) + if isinstance_or_issubclass(dtype, dtypes.Datetime): + return "datetime64[us]" + msg = f"Unknown dtype: {dtype}" # pragma: no cover + raise AssertionError(msg) def validate_indices(series: list[PandasSeries]) -> list[PandasSeries]: diff --git a/narwhals/series.py b/narwhals/series.py index c69211899..b878f6acc 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -4,6 +4,7 @@ from typing import Any from narwhals.dtypes import to_narwhals_dtype +from narwhals.dtypes import translate_dtype from narwhals.translate import get_pandas from narwhals.translate import get_polars @@ -36,6 +37,13 @@ def __init__( msg = f"Expected pandas or Polars Series, got: {type(series)}" # pragma: no cover raise TypeError(msg) # pragma: no cover + def __narwhals_namespace__(self) -> Any: + if self._is_polars: + import polars as pl + + return pl + return self._series.__narwhals_namespace__() + def _extract_native(self, arg: Any) -> Any: from narwhals.series import Series @@ -82,7 +90,9 @@ def cast( self, dtype: Any, ) -> Self: - return self._from_series(self._series.cast(dtype)) + return self._from_series( + self._series.cast(translate_dtype(self.__narwhals_namespace__(), dtype)) + ) def item(self) -> Any: return self._series.item() diff --git a/tests/test_series.py b/tests/test_series.py index 7ced67eed..bb6d17e02 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -104,7 +104,7 @@ def test_dtypes() -> None: def test_cast() -> None: - df = pl.DataFrame( + df_raw = pl.DataFrame( { "a": [1], "b": [1], @@ -136,7 +136,7 @@ def test_cast() -> None: "m": pl.Boolean, }, ) - df = nw.DataFrame(df).select( # type: ignore[assignment] + df = nw.DataFrame(df_raw).select( nw.col("a").cast(nw.Int32), nw.col("b").cast(nw.Int16), nw.col("c").cast(nw.Int8), @@ -172,3 +172,53 @@ def test_cast() -> None: assert result == expected result_pd = nw.from_native(df.to_pandas()).schema assert result_pd == expected + result = df.select( + df["a"].cast(nw.Int32), + df["b"].cast(nw.Int16), + df["c"].cast(nw.Int8), + df["d"].cast(nw.Int64), + df["e"].cast(nw.UInt32), + df["f"].cast(nw.UInt16), + df["g"].cast(nw.UInt8), + df["h"].cast(nw.UInt64), + df["i"].cast(nw.Float32), + df["j"].cast(nw.Float64), + df["k"].cast(nw.String), + df["l"].cast(nw.Datetime), + df["m"].cast(nw.Int8), + n=df["m"].cast(nw.Boolean), + ).schema + expected = { + "a": nw.Int32, + "b": nw.Int16, + "c": nw.Int8, + "d": nw.Int64, + "e": nw.UInt32, + "f": nw.UInt16, + "g": nw.UInt8, + "h": nw.UInt64, + "i": nw.Float32, + "j": nw.Float64, + "k": nw.String, + "l": nw.Datetime, + "m": nw.Int8, + "n": nw.Boolean, + } + df = nw.from_native(df.to_pandas()) # type: ignore[assignment] + result_pd = df.select( + df["a"].cast(nw.Int32), + df["b"].cast(nw.Int16), + df["c"].cast(nw.Int8), + df["d"].cast(nw.Int64), + df["e"].cast(nw.UInt32), + df["f"].cast(nw.UInt16), + df["g"].cast(nw.UInt8), + df["h"].cast(nw.UInt64), + df["i"].cast(nw.Float32), + df["j"].cast(nw.Float64), + df["k"].cast(nw.String), + df["l"].cast(nw.Datetime), + df["m"].cast(nw.Int8), + n=df["m"].cast(nw.Boolean), + ).schema + assert result == expected From facf0b1a99682d4450f1cd27225ad23adf2767d9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:07:07 +0300 Subject: [PATCH 05/11] coverage --- narwhals/_pandas_like/utils.py | 6 +++--- tests/test_common.py | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 4fd9017ef..7e4b99ff8 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -239,13 +239,13 @@ def vertical_concat(dfs: list[Any], implementation: str) -> Any: Should be in namespace. """ if not dfs: - msg = "No dataframes to concatenate" - raise TypeError(msg) + msg = "No dataframes to concatenate" # pragma: no cover + raise AssertionError(msg) cols = set(dfs[0].columns) for df in dfs: cols_current = set(df.columns) if cols_current != cols: - msg = "Unable to vstack, column names don't match" + msg = "unable to vstack, column names don't match" raise TypeError(msg) if implementation == "pandas": import pandas as pd diff --git a/tests/test_common.py b/tests/test_common.py index fc5ce1606..00b2d98f3 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -468,6 +468,10 @@ def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None: result_native = nw.to_native(result) expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]} compare_dicts(result_native, expected) + with pytest.raises(ValueError, match="No items"): + nw.concat([], how="vertical") + with pytest.raises(Exception, match="unable to vstack"): + nw.concat([df_left, df_right.rename({"d": "i"})], how="vertical").collect() # type: ignore[union-attr] @pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) From 3bea8ceab769ae7ced52f9ec470ba42cda719e25 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:08:29 +0300 Subject: [PATCH 06/11] coverage --- narwhals/_pandas_like/utils.py | 2 -- narwhals/dtypes.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 7e4b99ff8..196951e9a 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -312,8 +312,6 @@ def translate_dtype(dtype: Any) -> DType: return dtypes.String() if dtype in ("bool", "boolean"): return dtypes.Boolean() - if dtype == "object": - return dtypes.Object() if str(dtype).startswith("datetime64"): return dtypes.Datetime() msg = f"Unknown dtype: {dtype}" # pragma: no cover diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 14fa73bd2..b674d305d 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -63,10 +63,6 @@ class String(DType): ... class Boolean(DType): ... -class Object(DType): # todo: do we really want this one? - ... - - class Datetime(TemporalType): ... From 9d48c2658a345adbbe9b195bc0cd583bce80e467 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:09:57 +0300 Subject: [PATCH 07/11] coverage --- narwhals/dtypes.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index b674d305d..db373e261 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -96,10 +96,8 @@ def translate_dtype(plx: Any, dtype: DType) -> Any: return plx.Boolean if dtype == Datetime: return plx.Datetime - if dtype == Date: - return plx.Date - msg = f"Unknown dtype: {dtype}" - raise TypeError(msg) + msg = f"Unknown dtype: {dtype}" # pragma: no cover + raise AssertionError(msg) def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType: @@ -133,7 +131,5 @@ def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType: return Boolean() if dtype == pl.Datetime: return Datetime() - if dtype == pl.Date: - return Date() - msg = f"Unexpected dtype, got: {type(dtype)}" - raise TypeError(msg) + msg = f"Unexpected dtype, got: {type(dtype)}" # pragma: no cover + raise AssertionError(msg) From ed680c22af3fb65b247f86c6ace6146385107c0c Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:10:29 +0300 Subject: [PATCH 08/11] coverage --- narwhals/series.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/narwhals/series.py b/narwhals/series.py index b878f6acc..99b8c2be4 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -68,9 +68,6 @@ def __repr__(self) -> str: # pragma: no cover + "┘" ) - def alias(self, name: str) -> Self: - return self._from_series(self._series.alias(name)) - def __len__(self) -> int: return len(self._series) From f7686613c2f662aab7382de487a480dc172c0504 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:18:47 +0300 Subject: [PATCH 09/11] enforce 100 --- .github/workflows/pytest.yml | 2 +- README.md | 2 +- narwhals/_pandas_like/series.py | 46 +++------------------------ narwhals/series.py | 55 --------------------------------- tests/test_series.py | 11 +++++++ 5 files changed, 17 insertions(+), 99 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c52f349c4..9f9ad09e4 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -31,6 +31,6 @@ jobs: - name: install-modin run: python -m pip install --upgrade modin[dask] - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 - name: Run doctests run: pytest narwhals --doctest-modules diff --git a/README.md b/README.md index 9dae03651..f37c5bb22 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Seamlessly support all, without depending on any! - ✅ **No dependencies** (not even Polars), keep your library lightweight - ✅ Separate **lazy** and eager APIs - ✅ Use Polars **Expressions** -- ✅ Tested against pandas and Polars nightly builds! +- ✅ 100% branch coverage, tested against pandas and Polars nightly builds! ## Installation diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 39ffd4c2d..c41802385 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -4,8 +4,6 @@ from typing import Any from typing import Sequence -from pandas.api.types import is_extension_array_dtype - from narwhals._pandas_like.utils import item from narwhals._pandas_like.utils import reverse_translate_dtype from narwhals._pandas_like.utils import translate_dtype @@ -255,34 +253,10 @@ def sum(self) -> Any: ser = self._series return ser.sum() - def prod(self) -> Any: - ser = self._series - return ser.prod() - - def median(self) -> Any: - ser = self._series - return ser.median() - def mean(self) -> Any: ser = self._series return ser.mean() - def std( - self, - *, - correction: float = 1.0, - ) -> Any: - ser = self._series - return ser.std(ddof=correction) - - def var( - self, - *, - correction: float = 1.0, - ) -> Any: - ser = self._series - return ser.var(ddof=correction) - def len(self) -> Any: return len(self._series) @@ -300,12 +274,6 @@ def n_unique(self) -> int: ser = self._series return ser.nunique() # type: ignore[no-any-return] - def zip_with(self, mask: PandasSeries, other: PandasSeries) -> PandasSeries: - mask = validate_column_comparand(self._series.index, mask) - other = validate_column_comparand(self._series.index, other) - ser = self._series - return self._from_series(ser.where(mask, other)) - def sample( self, n: int | None = None, @@ -327,12 +295,6 @@ def unique(self) -> PandasSeries: ) ) - def is_nan(self) -> PandasSeries: - ser = self._series - if is_extension_array_dtype(ser.dtype): - return self._from_series((ser != ser).fillna(False)) # noqa: PLR0124 - return self._from_series(ser.isna()) - def sort( self, *, @@ -353,9 +315,9 @@ def to_numpy(self) -> Any: def to_pandas(self) -> Any: if self._implementation == "pandas": return self._series - elif self._implementation == "cudf": + elif self._implementation == "cudf": # pragma: no cover return self._series.to_pandas() - elif self._implementation == "modin": + elif self._implementation == "modin": # pragma: no cover return self._series._to_pandas() - msg = f"Unknown implementation: {self._implementation}" - raise TypeError(msg) + msg = f"Unknown implementation: {self._implementation}" # pragma: no cover + raise AssertionError(msg) diff --git a/narwhals/series.py b/narwhals/series.py index 99b8c2be4..66c3fcf51 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -71,18 +71,10 @@ def __repr__(self) -> str: # pragma: no cover def __len__(self) -> int: return len(self._series) - @property - def name(self) -> str: - return self._series.name # type: ignore[no-any-return] - @property def dtype(self) -> Any: return to_narwhals_dtype(self._series.dtype, is_polars=self._is_polars) - @property - def shape(self) -> tuple[int]: - return self._series.shape # type: ignore[no-any-return] - def cast( self, dtype: Any, @@ -91,64 +83,17 @@ def cast( self._series.cast(translate_dtype(self.__narwhals_namespace__(), dtype)) ) - def item(self) -> Any: - return self._series.item() - - def is_between( - self, lower_bound: Any, upper_bound: Any, closed: str = "both" - ) -> Self: - return self._from_series( - self._series.is_between(lower_bound, upper_bound, closed) - ) - def is_in(self, other: Any) -> Self: return self._from_series(self._series.is_in(self._extract_native(other))) - def is_null(self) -> Self: - return self._from_series(self._series.is_null()) - - def drop_nulls(self) -> Self: - return self._from_series(self._series.drop_nulls()) - - def n_unique(self) -> int: - return self._series.n_unique() # type: ignore[no-any-return] - - def unique(self) -> Self: - return self._from_series(self._series.unique()) - def sort(self) -> Self: return self._from_series(self._series.sort()) - def zip_with(self, mask: Self, other: Self) -> Self: - return self._from_series( - self._series.zip_with(self._extract_native(mask), self._extract_native(other)) - ) - - def sample( - self, - n: int | None = None, - fraction: float | None = None, - *, - with_replacement: bool = False, - ) -> Self: - return self._from_series( - self._series.sample(n=n, fraction=fraction, with_replacement=with_replacement) - ) - def to_numpy(self) -> Any: return self._series.to_numpy() def to_pandas(self) -> Any: return self._series.to_pandas() - def mean(self) -> Any: - return self._series.mean() - - def std(self) -> Any: - return self._series.std() - def __gt__(self, other: Any) -> Series: return self._from_series(self._series.__gt__(self._extract_native(other))) - - def __lt__(self, other: Any) -> Series: - return self._from_series(self._series.__lt__(self._extract_native(other))) diff --git a/tests/test_series.py b/tests/test_series.py index bb6d17e02..6417f0de9 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2,9 +2,12 @@ from typing import Any +import numpy as np import pandas as pd import polars as pl import pytest +from numpy.testing import assert_array_equal +from pandas.testing import assert_series_equal import narwhals as nw @@ -49,6 +52,14 @@ def test_dtype(df_raw: Any) -> None: assert result.is_numeric() +@pytest.mark.parametrize("df_raw", [df_pandas, df_lazy]) +def test_convert(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw).collect()["a"].to_numpy() + assert_array_equal(result, np.array([1, 3, 2])) + result = nw.LazyFrame(df_raw).collect()["a"].to_pandas() + assert_series_equal(result, pd.Series([1, 3, 2], name="a")) + + def test_dtypes() -> None: df = pl.DataFrame( { From 7d821f48d041559e642f966507f3a46fbfff06df Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:27:49 +0300 Subject: [PATCH 10/11] enforce 100 --- .github/workflows/pytest.yml | 33 +++++++++++++++++++++++++++++-- narwhals/_pandas_like/group_by.py | 2 +- tests/test_common.py | 2 +- tests/test_str.py | 2 +- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9f9ad09e4..e9f5f9ba9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,10 +6,39 @@ on: branches: [main] jobs: - tox: + pytest-38: strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8"] + os: [windows-latest, ubuntu-latest] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Cache multiple paths + uses: actions/cache@v3 + with: + path: | + ~/.cache/pip + $RUNNER_TOOL_CACHE/Python/* + ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-build-${{ matrix.python-version }} + - name: install-reqs + run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt + - name: install-modin + run: python -m pip install --upgrade modin[dask] + - name: Run pytest + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=90 + - name: Run doctests + run: pytest narwhals --doctest-modules + + pytest-coverage: + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] os: [windows-latest, ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index d8491ccd6..8f9917f62 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -154,7 +154,7 @@ def func(df: Any) -> Any: if parse_version(pd.__version__) < parse_version("2.2.0"): # pragma: no cover result_complex = grouped.apply(func) - else: + else: # pragma: no cover result_complex = grouped.apply(func, include_groups=False) else: # pragma: no cover result_complex = grouped.apply(func) diff --git a/tests/test_common.py b/tests/test_common.py index 00b2d98f3..6d5fd0b4e 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -30,7 +30,7 @@ df_mpd = mpd.DataFrame( pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) ) -else: +else: # pragma: no cover df_mpd = df_pandas.copy() diff --git a/tests/test_str.py b/tests/test_str.py index 55c3819b0..46bd76ab4 100644 --- a/tests/test_str.py +++ b/tests/test_str.py @@ -20,7 +20,7 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) df_mpd = mpd.DataFrame({"a": ["fdas", "edfas"]}) -else: +else: # pragma: no cover df_mpd = df_pandas.copy() From 712b76c29e25f0a82a098a508172094797050912 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:32:28 +0300 Subject: [PATCH 11/11] enforce 100 --- tests/tpch_q1_test.py | 4 ++-- tests/utils.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index 1200637cd..5e18fc700 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -21,7 +21,7 @@ def test_q1(library: str) -> None: if library == "pandas": df_raw = pd.read_parquet("tests/data/lineitem.parquet") df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"]) - elif library == "polars": + else: df_raw = pl.scan_parquet("tests/data/lineitem.parquet") var_1 = datetime(1998, 9, 2) df = nw.LazyFrame(df_raw) @@ -88,7 +88,7 @@ def test_q1_w_generic_funcs(library: str) -> None: if library == "pandas": df_raw = pd.read_parquet("tests/data/lineitem.parquet") df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"]) - elif library == "polars": + else: df_raw = pl.read_parquet("tests/data/lineitem.parquet") var_1 = datetime(1998, 9, 2) df = nw.DataFrame(df_raw) diff --git a/tests/utils.py b/tests/utils.py index c64384bce..31b2072cc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,7 +7,9 @@ def zip_longest(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]: if len(left) != len(right): - raise ValueError("left len != right len", len(left), len(right)) + raise ValueError( + "left len != right len", len(left), len(right) + ) # pragma: no cover return zip(left, right)