Merge pull request #40 from MarcoGorelli/coverage

enforce 100% Coverage
narwhals-dev · Apr 2, 2024 · 6a6725f · 6a6725f
2 parents 05ab4f4 + 712b76c
commit 6a6725f
Show file tree

Hide file tree

Showing 13 changed files with 165 additions and 192 deletions.
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -6,10 +6,10 @@ on:
     branches: [main]
 
 jobs:
-  tox:
+  pytest-38:
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.8"]
         os: [windows-latest, ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
@@ -31,6 +31,35 @@ jobs:
       - name: install-modin
         run: python -m pip install --upgrade modin[dask]
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50
+        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=90
+      - name: Run doctests
+        run: pytest narwhals --doctest-modules
+
+  pytest-coverage:
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        os: [windows-latest, ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache multiple paths
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/pip
+            $RUNNER_TOOL_CACHE/Python/*
+            ~\AppData\Local\pip\Cache
+          key: ${{ runner.os }}-build-${{ matrix.python-version }}
+      - name: install-reqs
+        run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
+      - name: install-modin
+        run: python -m pip install --upgrade modin[dask]
+      - name: Run pytest
+        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100
       - name: Run doctests
         run: pytest narwhals --doctest-modules
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Seamlessly support all, without depending on any!
 - ✅ **No dependencies** (not even Polars), keep your library lightweight
 - ✅ Separate **lazy** and eager APIs
 - ✅ Use Polars **Expressions**
-- ✅ Tested against pandas and Polars nightly builds!
+- ✅ 100% branch coverage, tested against pandas and Polars nightly builds!
 
 ## Installation
 

diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
@@ -154,7 +154,7 @@ def func(df: Any) -> Any:
 
         if parse_version(pd.__version__) < parse_version("2.2.0"):  # pragma: no cover
             result_complex = grouped.apply(func)
-        else:
+        else:  # pragma: no cover
             result_complex = grouped.apply(func, include_groups=False)
     else:  # pragma: no cover
         result_complex = grouped.apply(func)

diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
@@ -33,6 +33,7 @@ class PandasNamespace:
     Float32 = dtypes.Float32
     Boolean = dtypes.Boolean
     String = dtypes.String
+    Datetime = dtypes.Datetime
 
     def make_native_series(self, name: str, data: list[Any], index: Any) -> Any:
         if self._implementation == "pandas":

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -4,8 +4,6 @@
 from typing import Any
 from typing import Sequence
 
-from pandas.api.types import is_extension_array_dtype
-
 from narwhals._pandas_like.utils import item
 from narwhals._pandas_like.utils import reverse_translate_dtype
 from narwhals._pandas_like.utils import translate_dtype
@@ -255,34 +253,10 @@ def sum(self) -> Any:
         ser = self._series
         return ser.sum()
 
-    def prod(self) -> Any:
-        ser = self._series
-        return ser.prod()
-
-    def median(self) -> Any:
-        ser = self._series
-        return ser.median()
-
     def mean(self) -> Any:
         ser = self._series
         return ser.mean()
 
-    def std(
-        self,
-        *,
-        correction: float = 1.0,
-    ) -> Any:
-        ser = self._series
-        return ser.std(ddof=correction)
-
-    def var(
-        self,
-        *,
-        correction: float = 1.0,
-    ) -> Any:
-        ser = self._series
-        return ser.var(ddof=correction)
-
     def len(self) -> Any:
         return len(self._series)
 
@@ -300,12 +274,6 @@ def n_unique(self) -> int:
         ser = self._series
         return ser.nunique()  # type: ignore[no-any-return]
 
-    def zip_with(self, mask: PandasSeries, other: PandasSeries) -> PandasSeries:
-        mask = validate_column_comparand(self._series.index, mask)
-        other = validate_column_comparand(self._series.index, other)
-        ser = self._series
-        return self._from_series(ser.where(mask, other))
-
     def sample(
         self,
         n: int | None = None,
@@ -327,12 +295,6 @@ def unique(self) -> PandasSeries:
             )
         )
 
-    def is_nan(self) -> PandasSeries:
-        ser = self._series
-        if is_extension_array_dtype(ser.dtype):
-            return self._from_series((ser != ser).fillna(False))  # noqa: PLR0124
-        return self._from_series(ser.isna())
-
     def sort(
         self,
         *,
@@ -353,9 +315,9 @@ def to_numpy(self) -> Any:
     def to_pandas(self) -> Any:
         if self._implementation == "pandas":
             return self._series
-        elif self._implementation == "cudf":
+        elif self._implementation == "cudf":  # pragma: no cover
             return self._series.to_pandas()
-        elif self._implementation == "modin":
+        elif self._implementation == "modin":  # pragma: no cover
             return self._series._to_pandas()
-        msg = f"Unknown implementation: {self._implementation}"
-        raise TypeError(msg)
+        msg = f"Unknown implementation: {self._implementation}"  # pragma: no cover
+        raise AssertionError(msg)
diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
@@ -9,7 +9,6 @@
 from narwhals.utils import flatten
 from narwhals.utils import isinstance_or_issubclass
 from narwhals.utils import parse_version
-from narwhals.utils import remove_prefix
 
 T = TypeVar("T")
 
@@ -80,13 +79,7 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
             )
             raise ValueError(msg)
         return other._series
-    if isinstance(other, list) and len(other) > 1:
-        # e.g. `plx.all() + plx.all()`
-        msg = "Multi-output expressions are not supported in this context"
-        raise ValueError(msg)
-    if isinstance(other, list):
-        other = other[0]
-    return other
+    raise AssertionError("Please report a bug")
 
 
 def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
@@ -101,12 +94,8 @@ def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
 def parse_into_exprs(
     implementation: str,
     *exprs: IntoPandasExpr | Iterable[IntoPandasExpr],
-    **named_exprs: IntoPandasExpr,
 ) -> list[PandasExpr]:
-    out = [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]
-    for name, expr in named_exprs.items():
-        out.append(parse_into_expr(implementation, expr).alias(name))
-    return out
+    return [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]
 
 
 def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExpr:
@@ -122,8 +111,8 @@ def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExp
         return plx._create_expr_from_series(into_expr)
     if isinstance(into_expr, str):
         return plx.col(into_expr)
-    msg = f"Expected IntoExpr, got {type(into_expr)}"
-    raise TypeError(msg)
+    msg = f"Expected IntoExpr, got {type(into_expr)}"  # pragma: no cover
+    raise AssertionError(msg)
 
 
 def evaluate_into_expr(
@@ -150,8 +139,8 @@ def evaluate_into_exprs(
     for name, expr in named_exprs.items():
         evaluated_expr = evaluate_into_expr(df, expr)
         if len(evaluated_expr) > 1:
-            msg = "Named expressions must return a single column"
-            raise ValueError(msg)
+            msg = "Named expressions must return a single column"  # pragma: no cover
+            raise AssertionError(msg)
         series.append(evaluated_expr[0].alias(name))
     return series
 
@@ -204,8 +193,8 @@ def func(df: PandasDataFrame) -> list[PandasSeries]:
 def item(s: Any) -> Any:
     # cuDF doesn't have Series.item().
     if len(s) != 1:
-        msg = "Can only convert a Series of length 1 to a scalar"
-        raise ValueError(msg)
+        msg = "Can only convert a Series of length 1 to a scalar"  # pragma: no cover
+        raise AssertionError(msg)
     return s.iloc[0]
 
 
@@ -219,42 +208,6 @@ def is_simple_aggregation(expr: PandasExpr) -> bool:
     )
 
 
-def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) -> Any:
-    """
-    Use fastpath for simple aggregations if possible.
-
-    If an aggregation is simple (e.g. `pl.col('a').mean()`), then pandas-like
-    implementations have a fastpath we can use.
-
-    For example, `df.group_by('a').agg(pl.col('b').mean())` can be evaluated
-    as `df.groupby('a')['b'].mean()`, whereas
-    `df.group_by('a').agg(mean=(pl.col('b') - pl.col('c').mean()).mean())`
-    requires a lambda function, which is slower.
-
-    Returns naive DataFrame.
-    """
-    if expr._depth == 0:
-        # e.g. agg(pl.len())
-        df = getattr(grouped, expr._function_name.replace("len", "size"))()
-        df = (
-            df.drop(columns=keys)
-            if len(df.shape) > 1
-            else df.reset_index(drop=True).to_frame("size")
-        )
-        return df.rename(columns={"size": expr._output_names[0]})  # type: ignore[index]
-    if expr._root_names is None or expr._output_names is None:
-        msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug."
-        raise AssertionError(msg)
-    if len(expr._root_names) != len(expr._output_names):
-        msg = "Expected expr to have same number of root_names and output_names, but they are different. Please report a bug."
-        raise AssertionError(msg)
-    new_names = dict(zip(expr._root_names, expr._output_names))
-    function_name = remove_prefix(expr._function_name, "col->")
-    return getattr(grouped[expr._root_names], function_name)()[expr._root_names].rename(
-        columns=new_names
-    )
-
-
 def horizontal_concat(dfs: list[Any], implementation: str) -> Any:
     """
     Concatenate (native) DataFrames horizontally.
@@ -286,13 +239,13 @@ def vertical_concat(dfs: list[Any], implementation: str) -> Any:
     Should be in namespace.
     """
     if not dfs:
-        msg = "No dataframes to concatenate"
-        raise TypeError(msg)
+        msg = "No dataframes to concatenate"  # pragma: no cover
+        raise AssertionError(msg)
     cols = set(dfs[0].columns)
     for df in dfs:
         cols_current = set(df.columns)
         if cols_current != cols:
-            msg = "Unable to vstack, column names don't match"
+            msg = "unable to vstack, column names don't match"
             raise TypeError(msg)
     if implementation == "pandas":
         import pandas as pd
@@ -359,12 +312,10 @@ def translate_dtype(dtype: Any) -> DType:
         return dtypes.String()
     if dtype in ("bool", "boolean"):
         return dtypes.Boolean()
-    if dtype == "object":
-        return dtypes.Object()
     if str(dtype).startswith("datetime64"):
         return dtypes.Datetime()
-    msg = f"Unknown dtype: {dtype}"
-    raise TypeError(msg)
+    msg = f"Unknown dtype: {dtype}"  # pragma: no cover
+    raise AssertionError(msg)
 
 
 def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
@@ -380,8 +331,8 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
         return "int32"
     if isinstance_or_issubclass(dtype, dtypes.Int16):
         return "int16"
-    if isinstance_or_issubclass(dtype, dtypes.UInt8):
-        return "uint8"
+    if isinstance_or_issubclass(dtype, dtypes.Int8):
+        return "int8"
     if isinstance_or_issubclass(dtype, dtypes.UInt64):
         return "uint64"
     if isinstance_or_issubclass(dtype, dtypes.UInt32):
@@ -394,8 +345,10 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
         return "object"
     if isinstance_or_issubclass(dtype, dtypes.Boolean):
         return "bool"
-    msg = f"Unknown dtype: {dtype}"
-    raise TypeError(msg)
+    if isinstance_or_issubclass(dtype, dtypes.Datetime):
+        return "datetime64[us]"
+    msg = f"Unknown dtype: {dtype}"  # pragma: no cover
+    raise AssertionError(msg)
 
 
 def validate_indices(series: list[PandasSeries]) -> list[PandasSeries]:

diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py
@@ -63,10 +63,6 @@ class String(DType): ...
 class Boolean(DType): ...
 
 
-class Object(DType):  # todo: do we really want this one?
-    ...
-
-
 class Datetime(TemporalType): ...
 
 
@@ -100,10 +96,8 @@ def translate_dtype(plx: Any, dtype: DType) -> Any:
         return plx.Boolean
     if dtype == Datetime:
         return plx.Datetime
-    if dtype == Date:
-        return plx.Date
-    msg = f"Unknown dtype: {dtype}"
-    raise TypeError(msg)
+    msg = f"Unknown dtype: {dtype}"  # pragma: no cover
+    raise AssertionError(msg)
 
 
 def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
@@ -137,7 +131,5 @@ def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
         return Boolean()
     if dtype == pl.Datetime:
         return Datetime()
-    if dtype == pl.Date:
-        return Date()
-    msg = f"Unexpected dtype, got: {type(dtype)}"
-    raise TypeError(msg)
+    msg = f"Unexpected dtype, got: {type(dtype)}"  # pragma: no cover
+    raise AssertionError(msg)