From 2525c44728fc627d28d66c7845c011df1ecbbb37 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Sun, 3 Nov 2024 15:21:42 -0500 Subject: [PATCH] ci: use duckdb instead of ibis to test interchange-only support (#3672) Co-authored-by: dangotbanned <125183946+dangotbanned@users.noreply.github.com> (cherry picked from commit c5d3bdfdd6200002f81f8aea20fb762bc76b6b22) --- altair/utils/data.py | 5 +- pyproject.toml | 2 +- tests/__init__.py | 249 +++++++++++++++++++++++++ tests/utils/test_to_values_narwhals.py | 20 +- tests/utils/test_utils.py | 5 +- tests/vegalite/v5/test_api.py | 120 ++++++++++-- 6 files changed, 358 insertions(+), 43 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 42c87ece4b..c2adccba76 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -68,8 +68,9 @@ class SupportsGeoInterface(Protocol): def is_data_type(obj: Any) -> TypeIs[DataType]: - return _is_pandas_dataframe(obj) or isinstance( - obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame) + return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance( + nw.from_native(obj, eager_or_interchange_only=True, strict=False), + nw.DataFrame, ) diff --git a/pyproject.toml b/pyproject.toml index f3cbfb6b3e..a1e904b176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ all = [ dev = [ "hatch>=1.13.0", "ruff>=0.6.0", - "ibis-framework[polars]", + "duckdb>=1.0", "ipython[kernel]", "pandas>=0.25.3", "pytest", diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2..1cda564382 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,249 @@ +from __future__ import annotations + +import pkgutil +import re +import sys +from importlib.util import find_spec +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import pytest + +from tests import examples_arguments_syntax, examples_methods_syntax + +if TYPE_CHECKING: + from collections.abc import Callable, Collection, Iterator, Mapping + from re import Pattern + + if sys.version_info >= (3, 11): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias + from _pytest.mark import ParameterSet + + MarksType: TypeAlias = ( + "pytest.MarkDecorator | Collection[pytest.MarkDecorator | pytest.Mark]" + ) + + +def windows_has_tzdata() -> bool: + """ + From PyArrow: python/pyarrow/tests/util.py. + + This is the default location where tz.cpp will look for (until we make + this configurable at run-time) + + Skip test on Windows when the tz database is not configured. + + See https://github.com/vega/altair/issues/3050. + """ + return (Path.home() / "Downloads" / "tzdata").exists() + + +slow: pytest.MarkDecorator = pytest.mark.slow() +""" +Custom ``pytest.mark`` decorator. + +By default **all** tests are run. + +Slow tests can be **excluded** using:: + + >>> hatch run test-fast # doctest: +SKIP + +To run **only** slow tests use:: + + >>> hatch run test-slow # doctest: +SKIP + +Either script can accept ``pytest`` args:: + + >>> hatch run test-slow --durations=25 # doctest: +SKIP +""" + + +skip_requires_vl_convert: pytest.MarkDecorator = pytest.mark.skipif( + find_spec("vl_convert") is None, reason="`vl_convert` not installed." +) +""" +``pytest.mark.skipif`` decorator. + +Applies when `vl-convert`_ import would fail. + +.. _vl-convert: + https://github.com/vega/vl-convert +""" + +skip_requires_vegafusion: pytest.MarkDecorator = pytest.mark.skipif( + find_spec("vegafusion") is None, reason="`vegafusion` not installed." +) +""" +``pytest.mark.skipif`` decorator. + +Applies when `vegafusion`_ import would fail. + +.. _vegafusion: + https://github.com/vega/vegafusion +""" + + +def skip_requires_pyarrow( + fn: Callable[..., Any] | None = None, /, *, requires_tzdata: bool = False +) -> Callable[..., Any]: + """ + ``pytest.mark.skipif`` decorator. + + Applies when `pyarrow`_ import would fail. + + Additionally, we mark as expected to fail on `Windows`. + + https://github.com/vega/altair/issues/3050 + + .. _pyarrow: + https://pypi.org/project/pyarrow/ + """ + composed = pytest.mark.skipif( + find_spec("pyarrow") is None, reason="`pyarrow` not installed." + ) + if requires_tzdata: + composed = pytest.mark.xfail( + sys.platform == "win32" and not windows_has_tzdata(), + reason="Timezone database is not installed on Windows", + )(composed) + + def wrap(test_fn: Callable[..., Any], /) -> Callable[..., Any]: + return composed(test_fn) + + if fn is None: + return wrap + else: + return wrap(fn) + + +def id_func_str_only(val) -> str: + """ + Ensures the generated test-id name uses only `filename` and not `source`. + + Without this, the name is repr(source code)-filename + """ + if not isinstance(val, str): + return "" + else: + return val + + +def _wrap_mark_specs( + pattern_marks: Mapping[Pattern[str] | str, MarksType], / +) -> dict[Pattern[str], MarksType]: + return { + (re.compile(p) if not isinstance(p, re.Pattern) else p): marks + for p, marks in pattern_marks.items() + } + + +def _fill_marks( + mark_specs: dict[Pattern[str], MarksType], string: str, / +) -> MarksType | tuple[()]: + it = (v for k, v in mark_specs.items() if k.search(string)) + return next(it, ()) + + +def _distributed_examples( + *exclude_prefixes: str, marks: Mapping[Pattern[str] | str, MarksType] | None = None +) -> Iterator[ParameterSet]: + """ + Yields ``pytest.mark.parametrize`` arguments for all examples. + + Parameters + ---------- + *exclude_prefixes + Any file starting with these will be **skipped**. + marks + Mapping of ``re.search(..., )`` patterns to ``pytest.param(marks=...)``. + + The **first** match (if any) will be inserted into ``marks``. + """ + RE_NAME: Pattern[str] = re.compile(r"^tests\.(.*)") + mark_specs = _wrap_mark_specs(marks) if marks else {} + + for pkg in [examples_arguments_syntax, examples_methods_syntax]: + pkg_name = pkg.__name__ + if match := RE_NAME.match(pkg_name): + pkg_name_unqual: str = match.group(1) + else: + msg = f"Failed to match pattern {RE_NAME.pattern!r} against {pkg_name!r}" + raise ValueError(msg) + for _, mod_name, is_pkg in pkgutil.iter_modules(pkg.__path__): + if not (is_pkg or mod_name.startswith(exclude_prefixes)): + file_name = f"{mod_name}.py" + msg_name = f"{pkg_name_unqual}.{file_name}" + if source := pkgutil.get_data(pkg_name, file_name): + yield pytest.param( + source, msg_name, marks=_fill_marks(mark_specs, msg_name) + ) + else: + msg = ( + f"Failed to get source data from `{pkg_name}.{file_name}`.\n" + f"pkgutil.get_data(...) returned: {pkgutil.get_data(pkg_name, file_name)!r}" + ) + raise TypeError(msg) + + +ignore_DataFrameGroupBy: pytest.MarkDecorator = pytest.mark.filterwarnings( + "ignore:DataFrameGroupBy.apply.*:DeprecationWarning" +) +""" +``pytest.mark.filterwarnings`` decorator. + +Hides ``pandas`` warning(s):: + + "ignore:DataFrameGroupBy.apply.*:DeprecationWarning" +""" + + +distributed_examples: pytest.MarkDecorator = pytest.mark.parametrize( + ("source", "filename"), + tuple( + _distributed_examples( + "_", + "interval_selection_map_quakes", + marks={ + "beckers_barley.+facet": slow, + "lasagna_plot": slow, + "line_chart_with_cumsum_faceted": slow, + "layered_bar_chart": slow, + "multiple_interactions": slow, + "layered_histogram": slow, + "stacked_bar_chart_with_text": slow, + "bar_chart_with_labels": slow, + "interactive_cross_highlight": slow, + "wind_vector_map": slow, + r"\.point_map\.py": slow, + "line_chart_with_color_datum": slow, + }, + ) + ), + ids=id_func_str_only, +) +""" +``pytest.mark.parametrize`` decorator. + +Provides **all** examples, using both `arguments` & `methods` syntax. + +The decorated test can evaluate each resulting chart via:: + + from altair.utils.execeval import eval_block + + @distributed_examples + def test_some_stuff(source: Any, filename: str) -> None: + chart: ChartType | None = eval_block(source) + ... # Perform any assertions + +Notes +----- +- See `#3431 comment`_ for performance benefit. +- `interval_selection_map_quakes` requires `#3418`_ fix + +.. _#3431 comment: + https://github.com/vega/altair/pull/3431#issuecomment-2168508048 +.. _#3418: + https://github.com/vega/altair/issues/3418 +""" diff --git a/tests/utils/test_to_values_narwhals.py b/tests/utils/test_to_values_narwhals.py index 1a96c67751..b1be4c5714 100644 --- a/tests/utils/test_to_values_narwhals.py +++ b/tests/utils/test_to_values_narwhals.py @@ -1,6 +1,6 @@ +import re import sys from datetime import datetime -from pathlib import Path import narwhals.stable.v1 as nw import pandas as pd @@ -14,23 +14,7 @@ from altair.utils.data import to_values -def windows_has_tzdata(): - """ - From PyArrow: python/pyarrow/tests/util.py. - - This is the default location where tz.cpp will look for (until we make - this configurable at run-time) - """ - return Path.home().joinpath("Downloads", "tzdata").exists() - - -# Skip test on Windows when the tz database is not configured. -# See https://github.com/vega/altair/issues/3050. -@pytest.mark.skipif( - sys.platform == "win32" and not windows_has_tzdata(), - reason="Timezone database is not installed on Windows", -) -@pytest.mark.skipif(pa is None, reason="pyarrow not installed") +@skip_requires_pyarrow(requires_tzdata=True) def test_arrow_timestamp_conversion(): """Test that arrow timestamp values are converted to ISO-8601 strings.""" data = { diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 0e22ec7e9c..511b796992 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -128,10 +128,7 @@ def test_sanitize_dataframe_arrow_columns(): json.dumps(records) -@pytest.mark.skipif(pa is None, reason="pyarrow not installed") -@pytest.mark.xfail( - sys.platform == "win32", reason="Timezone database is not installed on Windows" -) +@skip_requires_pyarrow def test_sanitize_pyarrow_table_columns() -> None: # create a dataframe with various types df = pd.DataFrame( diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py index 241d473788..9fa767cf45 100644 --- a/tests/vegalite/v5/test_api.py +++ b/tests/vegalite/v5/test_api.py @@ -13,7 +13,7 @@ from datetime import date from importlib.metadata import version as importlib_version -import ibis +import duckdb import jsonschema import narwhals.stable.v1 as nw import pandas as pd @@ -22,7 +22,9 @@ from packaging.version import Version import altair as alt -from altair.utils.schemapi import Optional, Undefined +from altair.utils.core import use_signature +from altair.utils.schemapi import Optional, SchemaValidationError, Undefined +from tests import skip_requires_pyarrow, skip_requires_vl_convert, slow try: import vl_convert as vlc @@ -1528,26 +1530,108 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch): assert "numpy" not in sys.modules -@pytest.mark.skipif( - sys.version_info < (3, 9), - reason="The maximum `ibis` version installable on Python 3.8 is `ibis==5.1.0`," - " which doesn't support the dataframe interchange protocol.", -) -@pytest.mark.skipif( - Version("1.5") > PANDAS_VERSION, - reason="A warning is thrown on old pandas versions", -) -@pytest.mark.xfail( - sys.platform == "win32", reason="Timezone database is not installed on Windows" -) -def test_ibis_with_date_32(): - df = pl.DataFrame( +@skip_requires_pyarrow(requires_tzdata=True) +def test_interchange_with_date_32(): + # Test that objects which Narwhals only supports at the interchange + # level can be plotted when they contain date32 columns. + df = pl.DataFrame( # noqa: F841 {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} ) - tbl = ibis.memtable(df) - result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict() + rel = duckdb.sql("select * from df") + result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict() assert next(iter(result["datasets"].values())) == [ {"a": 1, "b": "2020-01-01T00:00:00"}, {"a": 2, "b": "2020-01-02T00:00:00"}, {"a": 3, "b": "2020-01-03T00:00:00"}, ] + + +@skip_requires_pyarrow(requires_tzdata=True) +def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch): + # Test that objects which Narwhals only supports at the interchange + # level don't get converted to PyArrow unnecessarily when plotted + # with the vegafusion transformer. + # TODO: this test can be drastically simplified when some level of + # DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)` + # without DuckDBWithInterchangeSupport. + df = pl.DataFrame( # noqa: F841 + { + "a": [1, 2, 3], + "b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)], + } + ) + rel = duckdb.sql("select * from df") + + class DuckDBWithInterchangeSupport: + """ + DuckDB doesn't (yet?) support the interchange protocol. + + So, we create duckdb wrapper which defers to PyArrow's + implementation of the protocol. + """ + + def __init__(self, rel: duckdb.DuckDBPyRelation) -> None: + self._rel = rel + + def __dataframe__(self, allow_copy: bool = True) -> object: + return self._rel.to_arrow_table().__dataframe__() + + rel_df = DuckDBWithInterchangeSupport(rel) + # "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called + # if we use the vegafusion transformer + monkeypatch.setattr( + "altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0 + ) + + # Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport, + # so we need to overwrite `to_native` + def to_native(df, strict): + if isinstance(df, nw.DataFrame): + return rel_df + return df + + monkeypatch.setattr("narwhals.stable.v1.to_native", to_native) + + with alt.data_transformers.enable("vegafusion"): + result = ( + alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega") + ) + assert next(iter(result["data"]))["values"] == [ + {"a": 1, "b": "2020-01-01T00:00:00.000"}, + {"a": 2, "b": "2020-01-02T00:00:00.000"}, + {"a": 3, "b": "2020-01-03T00:00:00.000"}, + ] + + +def test_binding() -> None: + @use_signature(alt.Binding) + def old_binding(input: Any, **kwargs: Any) -> alt.Binding: + """A generic binding.""" + return alt.Binding(input=input, **kwargs) + + # NOTE: `mypy` doesn't complain, but `pyright` does + old = old_binding(input="search", placeholder="Country", name="Search") # pyright: ignore[reportCallIssue] + old_positional = old_binding("search", placeholder="Country", name="Search") + + new = alt.binding(input="search", placeholder="Country", name="Search") + new_positional = alt.binding("search", placeholder="Country", name="Search") + + assert ( + old.to_dict() + == old_positional.to_dict() + == new.to_dict() + == new_positional.to_dict() + ) + assert all( + isinstance(x, alt.Binding) for x in (old, old_positional, new, new_positional) + ) + + MISSING_INPUT = r"missing 1 required positional argument: 'input" + + # NOTE: `mypy` doesn't complain, but `pyright` does (Again) + with pytest.raises(TypeError, match=MISSING_INPUT): + old_binding(placeholder="Country", name="Search") # pyright: ignore[reportCallIssue] + + # NOTE: Both type checkers can detect the issue on the new signature + with pytest.raises(TypeError, match=MISSING_INPUT): + alt.binding(placeholder="Country", name="Search") # type: ignore[call-arg]