Skip to content

Commit

Permalink
Merge pull request #40 from MarcoGorelli/coverage
Browse files Browse the repository at this point in the history
enforce 100% Coverage
  • Loading branch information
MarcoGorelli authored Apr 2, 2024
2 parents 05ab4f4 + 712b76c commit 6a6725f
Show file tree
Hide file tree
Showing 13 changed files with 165 additions and 192 deletions.
35 changes: 32 additions & 3 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ on:
branches: [main]

jobs:
tox:
pytest-38:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.8"]
os: [windows-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}
Expand All @@ -31,6 +31,35 @@ jobs:
- name: install-modin
run: python -m pip install --upgrade modin[dask]
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=90
- name: Run doctests
run: pytest narwhals --doctest-modules

pytest-coverage:
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
os: [windows-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache multiple paths
uses: actions/cache@v3
with:
path: |
~/.cache/pip
$RUNNER_TOOL_CACHE/Python/*
~\AppData\Local\pip\Cache
key: ${{ runner.os }}-build-${{ matrix.python-version }}
- name: install-reqs
run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
- name: install-modin
run: python -m pip install --upgrade modin[dask]
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100
- name: Run doctests
run: pytest narwhals --doctest-modules
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Seamlessly support all, without depending on any!
-**No dependencies** (not even Polars), keep your library lightweight
- ✅ Separate **lazy** and eager APIs
- ✅ Use Polars **Expressions**
-Tested against pandas and Polars nightly builds!
-100% branch coverage, tested against pandas and Polars nightly builds!

## Installation

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def func(df: Any) -> Any:

if parse_version(pd.__version__) < parse_version("2.2.0"): # pragma: no cover
result_complex = grouped.apply(func)
else:
else: # pragma: no cover
result_complex = grouped.apply(func, include_groups=False)
else: # pragma: no cover
result_complex = grouped.apply(func)
Expand Down
1 change: 1 addition & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class PandasNamespace:
Float32 = dtypes.Float32
Boolean = dtypes.Boolean
String = dtypes.String
Datetime = dtypes.Datetime

def make_native_series(self, name: str, data: list[Any], index: Any) -> Any:
if self._implementation == "pandas":
Expand Down
46 changes: 4 additions & 42 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from typing import Any
from typing import Sequence

from pandas.api.types import is_extension_array_dtype

from narwhals._pandas_like.utils import item
from narwhals._pandas_like.utils import reverse_translate_dtype
from narwhals._pandas_like.utils import translate_dtype
Expand Down Expand Up @@ -255,34 +253,10 @@ def sum(self) -> Any:
ser = self._series
return ser.sum()

def prod(self) -> Any:
ser = self._series
return ser.prod()

def median(self) -> Any:
ser = self._series
return ser.median()

def mean(self) -> Any:
ser = self._series
return ser.mean()

def std(
self,
*,
correction: float = 1.0,
) -> Any:
ser = self._series
return ser.std(ddof=correction)

def var(
self,
*,
correction: float = 1.0,
) -> Any:
ser = self._series
return ser.var(ddof=correction)

def len(self) -> Any:
return len(self._series)

Expand All @@ -300,12 +274,6 @@ def n_unique(self) -> int:
ser = self._series
return ser.nunique() # type: ignore[no-any-return]

def zip_with(self, mask: PandasSeries, other: PandasSeries) -> PandasSeries:
mask = validate_column_comparand(self._series.index, mask)
other = validate_column_comparand(self._series.index, other)
ser = self._series
return self._from_series(ser.where(mask, other))

def sample(
self,
n: int | None = None,
Expand All @@ -327,12 +295,6 @@ def unique(self) -> PandasSeries:
)
)

def is_nan(self) -> PandasSeries:
ser = self._series
if is_extension_array_dtype(ser.dtype):
return self._from_series((ser != ser).fillna(False)) # noqa: PLR0124
return self._from_series(ser.isna())

def sort(
self,
*,
Expand All @@ -353,9 +315,9 @@ def to_numpy(self) -> Any:
def to_pandas(self) -> Any:
if self._implementation == "pandas":
return self._series
elif self._implementation == "cudf":
elif self._implementation == "cudf": # pragma: no cover
return self._series.to_pandas()
elif self._implementation == "modin":
elif self._implementation == "modin": # pragma: no cover
return self._series._to_pandas()
msg = f"Unknown implementation: {self._implementation}"
raise TypeError(msg)
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)
85 changes: 19 additions & 66 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from narwhals.utils import flatten
from narwhals.utils import isinstance_or_issubclass
from narwhals.utils import parse_version
from narwhals.utils import remove_prefix

T = TypeVar("T")

Expand Down Expand Up @@ -80,13 +79,7 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
)
raise ValueError(msg)
return other._series
if isinstance(other, list) and len(other) > 1:
# e.g. `plx.all() + plx.all()`
msg = "Multi-output expressions are not supported in this context"
raise ValueError(msg)
if isinstance(other, list):
other = other[0]
return other
raise AssertionError("Please report a bug")


def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
Expand All @@ -101,12 +94,8 @@ def maybe_evaluate_expr(df: PandasDataFrame, arg: Any) -> Any:
def parse_into_exprs(
implementation: str,
*exprs: IntoPandasExpr | Iterable[IntoPandasExpr],
**named_exprs: IntoPandasExpr,
) -> list[PandasExpr]:
out = [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]
for name, expr in named_exprs.items():
out.append(parse_into_expr(implementation, expr).alias(name))
return out
return [parse_into_expr(implementation, into_expr) for into_expr in flatten(exprs)]


def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExpr:
Expand All @@ -122,8 +111,8 @@ def parse_into_expr(implementation: str, into_expr: IntoPandasExpr) -> PandasExp
return plx._create_expr_from_series(into_expr)
if isinstance(into_expr, str):
return plx.col(into_expr)
msg = f"Expected IntoExpr, got {type(into_expr)}"
raise TypeError(msg)
msg = f"Expected IntoExpr, got {type(into_expr)}" # pragma: no cover
raise AssertionError(msg)


def evaluate_into_expr(
Expand All @@ -150,8 +139,8 @@ def evaluate_into_exprs(
for name, expr in named_exprs.items():
evaluated_expr = evaluate_into_expr(df, expr)
if len(evaluated_expr) > 1:
msg = "Named expressions must return a single column"
raise ValueError(msg)
msg = "Named expressions must return a single column" # pragma: no cover
raise AssertionError(msg)
series.append(evaluated_expr[0].alias(name))
return series

Expand Down Expand Up @@ -204,8 +193,8 @@ def func(df: PandasDataFrame) -> list[PandasSeries]:
def item(s: Any) -> Any:
# cuDF doesn't have Series.item().
if len(s) != 1:
msg = "Can only convert a Series of length 1 to a scalar"
raise ValueError(msg)
msg = "Can only convert a Series of length 1 to a scalar" # pragma: no cover
raise AssertionError(msg)
return s.iloc[0]


Expand All @@ -219,42 +208,6 @@ def is_simple_aggregation(expr: PandasExpr) -> bool:
)


def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) -> Any:
"""
Use fastpath for simple aggregations if possible.
If an aggregation is simple (e.g. `pl.col('a').mean()`), then pandas-like
implementations have a fastpath we can use.
For example, `df.group_by('a').agg(pl.col('b').mean())` can be evaluated
as `df.groupby('a')['b'].mean()`, whereas
`df.group_by('a').agg(mean=(pl.col('b') - pl.col('c').mean()).mean())`
requires a lambda function, which is slower.
Returns naive DataFrame.
"""
if expr._depth == 0:
# e.g. agg(pl.len())
df = getattr(grouped, expr._function_name.replace("len", "size"))()
df = (
df.drop(columns=keys)
if len(df.shape) > 1
else df.reset_index(drop=True).to_frame("size")
)
return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index]
if expr._root_names is None or expr._output_names is None:
msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug."
raise AssertionError(msg)
if len(expr._root_names) != len(expr._output_names):
msg = "Expected expr to have same number of root_names and output_names, but they are different. Please report a bug."
raise AssertionError(msg)
new_names = dict(zip(expr._root_names, expr._output_names))
function_name = remove_prefix(expr._function_name, "col->")
return getattr(grouped[expr._root_names], function_name)()[expr._root_names].rename(
columns=new_names
)


def horizontal_concat(dfs: list[Any], implementation: str) -> Any:
"""
Concatenate (native) DataFrames horizontally.
Expand Down Expand Up @@ -286,13 +239,13 @@ def vertical_concat(dfs: list[Any], implementation: str) -> Any:
Should be in namespace.
"""
if not dfs:
msg = "No dataframes to concatenate"
raise TypeError(msg)
msg = "No dataframes to concatenate" # pragma: no cover
raise AssertionError(msg)
cols = set(dfs[0].columns)
for df in dfs:
cols_current = set(df.columns)
if cols_current != cols:
msg = "Unable to vstack, column names don't match"
msg = "unable to vstack, column names don't match"
raise TypeError(msg)
if implementation == "pandas":
import pandas as pd
Expand Down Expand Up @@ -359,12 +312,10 @@ def translate_dtype(dtype: Any) -> DType:
return dtypes.String()
if dtype in ("bool", "boolean"):
return dtypes.Boolean()
if dtype == "object":
return dtypes.Object()
if str(dtype).startswith("datetime64"):
return dtypes.Datetime()
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
Expand All @@ -380,8 +331,8 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
return "int32"
if isinstance_or_issubclass(dtype, dtypes.Int16):
return "int16"
if isinstance_or_issubclass(dtype, dtypes.UInt8):
return "uint8"
if isinstance_or_issubclass(dtype, dtypes.Int8):
return "int8"
if isinstance_or_issubclass(dtype, dtypes.UInt64):
return "uint64"
if isinstance_or_issubclass(dtype, dtypes.UInt32):
Expand All @@ -394,8 +345,10 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
return "object"
if isinstance_or_issubclass(dtype, dtypes.Boolean):
return "bool"
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
if isinstance_or_issubclass(dtype, dtypes.Datetime):
return "datetime64[us]"
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def validate_indices(series: list[PandasSeries]) -> list[PandasSeries]:
Expand Down
16 changes: 4 additions & 12 deletions narwhals/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ class String(DType): ...
class Boolean(DType): ...


class Object(DType): # todo: do we really want this one?
...


class Datetime(TemporalType): ...


Expand Down Expand Up @@ -100,10 +96,8 @@ def translate_dtype(plx: Any, dtype: DType) -> Any:
return plx.Boolean
if dtype == Datetime:
return plx.Datetime
if dtype == Date:
return plx.Date
msg = f"Unknown dtype: {dtype}"
raise TypeError(msg)
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)


def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
Expand Down Expand Up @@ -137,7 +131,5 @@ def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType:
return Boolean()
if dtype == pl.Datetime:
return Datetime()
if dtype == pl.Date:
return Date()
msg = f"Unexpected dtype, got: {type(dtype)}"
raise TypeError(msg)
msg = f"Unexpected dtype, got: {type(dtype)}" # pragma: no cover
raise AssertionError(msg)
Loading

0 comments on commit 6a6725f

Please sign in to comment.