Skip to content

Commit

Permalink
support in over context
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Dec 26, 2024
2 parents bb458f8 + ecde246 commit 165507a
Show file tree
Hide file tree
Showing 18 changed files with 87 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: local-install
run: uv pip install -e ".[dev]" --system
run: uv pip install -e ".[dev, dask]" --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ jobs:
matrix:
python-version: ["3.8"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -87,7 +86,6 @@ jobs:
matrix:
python-version: ["3.9"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -117,7 +115,6 @@ jobs:
echo "$DEPS" | grep 'scipy==1.8.0'
echo "$DEPS" | grep 'scikit-learn==1.3.0'
echo "$DEPS" | grep 'dask==2024.7'
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow

Expand Down
21 changes: 7 additions & 14 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ jobs:
matrix:
python-version: ["3.8"]
os: [windows-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand All @@ -25,6 +24,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
# Python3.8 is technically at end-of-life, so we don't test everything
run: uv pip install -e ".[dev]" --system
- name: show-deps
run: uv pip freeze
Expand All @@ -36,7 +36,6 @@ jobs:
matrix:
python-version: ["3.10", "3.12"]
os: [windows-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand All @@ -50,22 +49,17 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev]" --system
- name: install-dask
run: uv pip install --upgrade dask[dataframe] --system
- name: install-modin
run: uv pip install --upgrade modin --system
run: uv pip install -e ".[dev, extra]" --system
- name: show-deps
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95

pytest-coverage:
pytest-full-coverage:
strategy:
matrix:
python-version: ["3.9", "3.11", "3.13"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand All @@ -79,18 +73,17 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev]" --system
- name: install-modin
run: uv pip install --upgrade modin[dask] --system
- name: show-deps
run: uv pip freeze
run: uv pip install -e ".[dev, extra]" --system
- name: install ibis
run: uv pip install ibis-framework>=6.0.0 rich packaging pyarrow_hotfix --system
# Ibis puts upper bounds on dependencies, and requires Python3.10+,
# which messes with other dependencies on lower Python versions
if: matrix.python-version == '3.11'
- name: show-deps
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow
- name: Run doctests
# reprs differ between versions, so we only run doctests on the latest Python
if: matrix.python-version == '3.13'
run: pytest narwhals --doctest-modules
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ Here's how you can set up your local development environment to contribute.
uv venv -p 3.12 --seed
```
4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
2. Install Narwhals: `uv pip install -e ".[dev, docs]"`
2. Install Narwhals: `uv pip install -e ".[dev, docs]"`. If you also want to test Dask , PySpark, and
Modin, you can install them too with `uv pip install -e ".[dev, docs, extra]"`.
3. Install a fork of griffe:
```
uv pip install git+https://github.com/MarcoGorelli/griffe.git@no-overloads
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def filter(self: Self, *predicates: IntoArrowExpr, **constraints: Any) -> Self:
predicates, (plx.col(name) == v for name, v in constraints.items())
)
)
# Safety: all_horizontal's expression only returns a single column.
# `[0]` is safe as all_horizontal's expression only returns a single column
mask = expr._call(self)[0]._native_series
return self._from_native_frame(self._native_frame.filter(mask))

Expand Down
4 changes: 1 addition & 3 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,9 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
import pyarrow as pa
import pyarrow.compute as pc

from narwhals._arrow.namespace import ArrowNamespace
from narwhals._expression_parsing import parse_into_expr

plx = ArrowNamespace(backend_version=self._backend_version, version=self._version)

plx = df.__narwhals_namespace__()
condition = parse_into_expr(self._condition, namespace=plx)(df)[0]
try:
value_series = parse_into_expr(self._then_value, namespace=plx)(df)[0]
Expand Down
6 changes: 2 additions & 4 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,11 @@ def filter(self, *predicates: DaskExpr, **constraints: Any) -> Self:
)
raise NotImplementedError(msg)

from narwhals._dask.namespace import DaskNamespace

plx = DaskNamespace(backend_version=self._backend_version, version=self._version)
plx = self.__narwhals_namespace__()
expr = plx.all_horizontal(
*chain(predicates, (plx.col(name) == v for name, v in constraints.items()))
)
# Safety: all_horizontal's expression only returns a single column.
# `[0]` is safe as all_horizontal's expression only returns a single column
mask = expr._call(self)[0]
return self._from_native_frame(self._native_frame.loc[mask])

Expand Down
4 changes: 1 addition & 3 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,11 +416,9 @@ def __init__(
self._version = version

def __call__(self, df: DaskLazyFrame) -> Sequence[dask_expr.Series]:
from narwhals._dask.namespace import DaskNamespace
from narwhals._expression_parsing import parse_into_expr

plx = DaskNamespace(backend_version=self._backend_version, version=self._version)

plx = df.__narwhals_namespace__()
condition = parse_into_expr(self._condition, namespace=plx)(df)[0]
condition = cast("dask_expr.Series", condition)
try:
Expand Down
3 changes: 1 addition & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def filter(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> Self:
predicates, (plx.col(name) == v for name, v in constraints.items())
)
)
# Safety: all_horizontal's expression only returns a single column.
# `[0]` is safe as all_horizontal's expression only returns a single column
mask = expr._call(self)[0]
_mask = validate_dataframe_comparand(self._native_frame.index, mask)
return self._from_native_frame(self._native_frame.loc[_mask])
Expand Down Expand Up @@ -1006,7 +1006,6 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel
]

plx = self.__native_namespace__()

return self._from_native_frame(
plx.concat([exploded_frame, *exploded_series], axis=1)[original_columns]
)
19 changes: 15 additions & 4 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from narwhals.utils import Implementation
from narwhals.utils import Version

CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT = {
MANY_TO_MANY_AGG_FUNCTIONS_TO_PANDAS_EQUIVALENT = {
"col->cum_sum": "cumsum",
"col->cum_min": "cummin",
"col->cum_max": "cummax",
Expand All @@ -33,6 +33,7 @@
# Pandas cumcount counts nulls while Polars does not
# So, instead of using "cumcount" we use "cumsum" on notna() to get the same result
"col->cum_count": "cumsum",
"col->shift": "shift",
"col->rank": "rank",
}

Expand Down Expand Up @@ -418,7 +419,7 @@ def alias(self, name: str) -> Self:
)

def over(self: Self, keys: list[str]) -> Self:
if self._function_name in CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT:
if self._function_name in MANY_TO_MANY_AGG_FUNCTIONS_TO_PANDAS_EQUIVALENT:

def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
if (
Expand All @@ -444,12 +445,22 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
plx = self.__narwhals_namespace__()
df = df.with_columns(~plx.col(*self._root_names).is_null())

if self._function_name == "col->shift":
kwargs = {"periods": self._kwargs.get("n", 1)}
elif self._function_name == "col->rank":
kwargs = {
"method": self._kwargs.get("method", "average"),
"ascending": not self._kwargs.get("descending", False),
}
else: # Cumulative operation
kwargs = {"skipna": True}

res_native = getattr(
df._native_frame.groupby(list(keys), as_index=False)[
self._root_names
],
CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT[self._function_name],
)(skipna=True)
MANY_TO_MANY_AGG_FUNCTIONS_TO_PANDAS_EQUIVALENT[self._function_name],
)(**kwargs)

result_frame = df._from_native_frame(
rename(
Expand Down
8 changes: 1 addition & 7 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,15 +503,9 @@ def __init__(

def __call__(self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
from narwhals._expression_parsing import parse_into_expr
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals._pandas_like.utils import broadcast_align_and_extract_native

plx = PandasLikeNamespace(
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)

plx = df.__narwhals_namespace__()
condition = parse_into_expr(self._condition, namespace=plx)(df)[0]
try:
value_series = parse_into_expr(self._then_value, namespace=plx)(df)[0]
Expand Down
8 changes: 2 additions & 6 deletions narwhals/_spark_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,16 @@ def select(
return self._from_native_frame(self._native_frame.select(*new_columns_list))

def filter(self, *predicates: SparkLikeExpr) -> Self:
from narwhals._spark_like.namespace import SparkLikeNamespace

if (
len(predicates) == 1
and isinstance(predicates[0], list)
and all(isinstance(x, bool) for x in predicates[0])
):
msg = "`LazyFrame.filter` is not supported for PySpark backend with boolean masks."
raise NotImplementedError(msg)
plx = SparkLikeNamespace(
backend_version=self._backend_version, version=self._version
)
plx = self.__narwhals_namespace__()
expr = plx.all_horizontal(*predicates)
# Safety: all_horizontal's expression only returns a single column.
# `[0]` is safe as all_horizontal's expression only returns a single column
condition = expr._call(self)[0]
spark_df = self._native_frame.where(condition)
return self._from_native_frame(spark_df)
Expand Down
12 changes: 12 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,9 @@ def any(self) -> Any:
Notes:
Only works on Series of data type Boolean.
Returns:
A boolean indicating if any values in the Series are True.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -870,6 +873,9 @@ def any(self) -> Any:
def all(self) -> Any:
"""Return whether all values in the Series are True.
Returns:
A boolean indicating if all values in the Series are True.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1334,6 +1340,9 @@ def drop_nulls(self) -> Self:
def abs(self) -> Self:
"""Calculate the absolute value of each element.
Returns:
A new Series with the absolute values of the original elements.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -1631,6 +1640,9 @@ def alias(self, name: str) -> Self:
Arguments:
name: The new name.
Returns:
A new Series with the updated name.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down
13 changes: 8 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,21 @@ dev = [
"duckdb",
"pandas",
"polars",
"pre-commit",
"pyarrow",
"pyspark; python_version >= '3.9' and python_version < '3.12'",
"pyarrow-stubs",
"pytest",
"pytest-cov",
"pytest-randomly",
"pytest-env",
"hypothesis",
"hypothesis[numpy]",
"scikit-learn",
"typing_extensions",
]
extra = [ # heavier dependencies we don't necessarily need in every testing job
"scikit-learn",
"pyspark; python_version >= '3.9' and python_version < '3.12'",
"dask[dataframe]; python_version >= '3.9'",
"modin",
]
docs = [
"black", # required by mkdocstrings_handlers
Expand Down Expand Up @@ -175,6 +178,7 @@ env = [
plugins = ["covdefaults"]

[tool.coverage.report]
fail_under = 80 # This is just for local development, in CI we set it to 100
omit = [
'narwhals/typing.py',
'narwhals/stable/v1/typing.py',
Expand All @@ -183,14 +187,13 @@ omit = [
'narwhals/_ibis/*',
# the latest pyspark (3.5) doesn't officially support Python 3.12 and 3.13
'narwhals/_spark_like/*',
# we don't run these in every environment
'tests/spark_like_test.py',
]
exclude_also = [
"if sys.version_info() <",
"if (:?self._)?implementation is Implementation.MODIN",
"if .*implementation is Implementation.CUDF",
'request.applymarker\(pytest.mark.xfail',
'\w+._backend_version < ',
'backend_version <',
'if "cudf" in str\(constructor'
]
Expand Down
9 changes: 1 addition & 8 deletions tests/dtypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def test_pandas_fixed_offset_1302() -> None:
def test_huge_int() -> None:
df = pl.DataFrame({"a": [1, 2, 3]})
if POLARS_VERSION >= (1, 18): # pragma: no cover
result = nw.from_native(df).schema
result = nw.from_native(df.select(pl.col("a").cast(pl.Int128))).schema
assert result["a"] == nw.Int128
else: # pragma: no cover
# Int128 was not available yet
Expand All @@ -221,13 +221,6 @@ def test_huge_int() -> None:
result = nw.from_native(rel).schema
assert result["a"] == nw.UInt128

if POLARS_VERSION >= (1, 18): # pragma: no cover
result = nw.from_native(df).schema
assert result["a"] == nw.UInt128
else: # pragma: no cover
# UInt128 was not available yet
pass

# TODO(unassigned): once other libraries support Int128/UInt128,
# add tests for them too

Expand Down
Loading

0 comments on commit 165507a

Please sign in to comment.