From 2525c44728fc627d28d66c7845c011df1ecbbb37 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Sun, 3 Nov 2024 15:21:42 -0500
Subject: [PATCH] ci: use duckdb instead of ibis to test interchange-only
 support (#3672)

Co-authored-by: dangotbanned <125183946+dangotbanned@users.noreply.github.com>

(cherry picked from commit c5d3bdfdd6200002f81f8aea20fb762bc76b6b22)
---
 altair/utils/data.py                   |   5 +-
 pyproject.toml                         |   2 +-
 tests/__init__.py                      | 249 +++++++++++++++++++++++++
 tests/utils/test_to_values_narwhals.py |  20 +-
 tests/utils/test_utils.py              |   5 +-
 tests/vegalite/v5/test_api.py          | 120 ++++++++++--
 6 files changed, 358 insertions(+), 43 deletions(-)

diff --git a/altair/utils/data.py b/altair/utils/data.py
index 42c87ece4b..c2adccba76 100644
--- a/altair/utils/data.py
+++ b/altair/utils/data.py
@@ -68,8 +68,9 @@ class SupportsGeoInterface(Protocol):
 
 
 def is_data_type(obj: Any) -> TypeIs[DataType]:
-    return _is_pandas_dataframe(obj) or isinstance(
-        obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame)
+    return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance(
+        nw.from_native(obj, eager_or_interchange_only=True, strict=False),
+        nw.DataFrame,
     )
 
 
diff --git a/pyproject.toml b/pyproject.toml
index f3cbfb6b3e..a1e904b176 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ all = [
 dev = [
     "hatch>=1.13.0",
     "ruff>=0.6.0",
-    "ibis-framework[polars]",
+    "duckdb>=1.0",
     "ipython[kernel]",
     "pandas>=0.25.3",
     "pytest",
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb2..1cda564382 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,249 @@
+from __future__ import annotations
+
+import pkgutil
+import re
+import sys
+from importlib.util import find_spec
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+from tests import examples_arguments_syntax, examples_methods_syntax
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Collection, Iterator, Mapping
+    from re import Pattern
+
+    if sys.version_info >= (3, 11):
+        from typing import TypeAlias
+    else:
+        from typing_extensions import TypeAlias
+    from _pytest.mark import ParameterSet
+
+    MarksType: TypeAlias = (
+        "pytest.MarkDecorator | Collection[pytest.MarkDecorator | pytest.Mark]"
+    )
+
+
+def windows_has_tzdata() -> bool:
+    """
+    From PyArrow: python/pyarrow/tests/util.py.
+
+    This is the default location where tz.cpp will look for (until we make
+    this configurable at run-time)
+
+    Skip test on Windows when the tz database is not configured.
+
+    See https://github.com/vega/altair/issues/3050.
+    """
+    return (Path.home() / "Downloads" / "tzdata").exists()
+
+
+slow: pytest.MarkDecorator = pytest.mark.slow()
+"""
+Custom ``pytest.mark`` decorator.
+
+By default **all** tests are run.
+
+Slow tests can be **excluded** using::
+
+    >>> hatch run test-fast  # doctest: +SKIP
+
+To run **only** slow tests use::
+
+    >>> hatch run test-slow  # doctest: +SKIP
+
+Either script can accept ``pytest`` args::
+
+    >>> hatch run test-slow --durations=25  # doctest: +SKIP
+"""
+
+
+skip_requires_vl_convert: pytest.MarkDecorator = pytest.mark.skipif(
+    find_spec("vl_convert") is None, reason="`vl_convert` not installed."
+)
+"""
+``pytest.mark.skipif`` decorator.
+
+Applies when `vl-convert`_ import would fail.
+
+.. _vl-convert:
+   https://github.com/vega/vl-convert
+"""
+
+skip_requires_vegafusion: pytest.MarkDecorator = pytest.mark.skipif(
+    find_spec("vegafusion") is None, reason="`vegafusion` not installed."
+)
+"""
+``pytest.mark.skipif`` decorator.
+
+Applies when `vegafusion`_ import would fail.
+
+.. _vegafusion:
+    https://github.com/vega/vegafusion
+"""
+
+
+def skip_requires_pyarrow(
+    fn: Callable[..., Any] | None = None, /, *, requires_tzdata: bool = False
+) -> Callable[..., Any]:
+    """
+    ``pytest.mark.skipif`` decorator.
+
+    Applies when `pyarrow`_ import would fail.
+
+    Additionally, we mark as expected to fail on `Windows`.
+
+    https://github.com/vega/altair/issues/3050
+
+    .. _pyarrow:
+    https://pypi.org/project/pyarrow/
+    """
+    composed = pytest.mark.skipif(
+        find_spec("pyarrow") is None, reason="`pyarrow` not installed."
+    )
+    if requires_tzdata:
+        composed = pytest.mark.xfail(
+            sys.platform == "win32" and not windows_has_tzdata(),
+            reason="Timezone database is not installed on Windows",
+        )(composed)
+
+    def wrap(test_fn: Callable[..., Any], /) -> Callable[..., Any]:
+        return composed(test_fn)
+
+    if fn is None:
+        return wrap
+    else:
+        return wrap(fn)
+
+
+def id_func_str_only(val) -> str:
+    """
+    Ensures the generated test-id name uses only `filename` and not `source`.
+
+    Without this, the name is repr(source code)-filename
+    """
+    if not isinstance(val, str):
+        return ""
+    else:
+        return val
+
+
+def _wrap_mark_specs(
+    pattern_marks: Mapping[Pattern[str] | str, MarksType], /
+) -> dict[Pattern[str], MarksType]:
+    return {
+        (re.compile(p) if not isinstance(p, re.Pattern) else p): marks
+        for p, marks in pattern_marks.items()
+    }
+
+
+def _fill_marks(
+    mark_specs: dict[Pattern[str], MarksType], string: str, /
+) -> MarksType | tuple[()]:
+    it = (v for k, v in mark_specs.items() if k.search(string))
+    return next(it, ())
+
+
+def _distributed_examples(
+    *exclude_prefixes: str, marks: Mapping[Pattern[str] | str, MarksType] | None = None
+) -> Iterator[ParameterSet]:
+    """
+    Yields ``pytest.mark.parametrize`` arguments for all examples.
+
+    Parameters
+    ----------
+    *exclude_prefixes
+        Any file starting with these will be **skipped**.
+    marks
+        Mapping of ``re.search(..., )`` patterns to ``pytest.param(marks=...)``.
+
+        The **first** match (if any) will be inserted into ``marks``.
+    """
+    RE_NAME: Pattern[str] = re.compile(r"^tests\.(.*)")
+    mark_specs = _wrap_mark_specs(marks) if marks else {}
+
+    for pkg in [examples_arguments_syntax, examples_methods_syntax]:
+        pkg_name = pkg.__name__
+        if match := RE_NAME.match(pkg_name):
+            pkg_name_unqual: str = match.group(1)
+        else:
+            msg = f"Failed to match pattern {RE_NAME.pattern!r} against {pkg_name!r}"
+            raise ValueError(msg)
+        for _, mod_name, is_pkg in pkgutil.iter_modules(pkg.__path__):
+            if not (is_pkg or mod_name.startswith(exclude_prefixes)):
+                file_name = f"{mod_name}.py"
+                msg_name = f"{pkg_name_unqual}.{file_name}"
+                if source := pkgutil.get_data(pkg_name, file_name):
+                    yield pytest.param(
+                        source, msg_name, marks=_fill_marks(mark_specs, msg_name)
+                    )
+                else:
+                    msg = (
+                        f"Failed to get source data from `{pkg_name}.{file_name}`.\n"
+                        f"pkgutil.get_data(...) returned: {pkgutil.get_data(pkg_name, file_name)!r}"
+                    )
+                    raise TypeError(msg)
+
+
+ignore_DataFrameGroupBy: pytest.MarkDecorator = pytest.mark.filterwarnings(
+    "ignore:DataFrameGroupBy.apply.*:DeprecationWarning"
+)
+"""
+``pytest.mark.filterwarnings`` decorator.
+
+Hides ``pandas`` warning(s)::
+
+    "ignore:DataFrameGroupBy.apply.*:DeprecationWarning"
+"""
+
+
+distributed_examples: pytest.MarkDecorator = pytest.mark.parametrize(
+    ("source", "filename"),
+    tuple(
+        _distributed_examples(
+            "_",
+            "interval_selection_map_quakes",
+            marks={
+                "beckers_barley.+facet": slow,
+                "lasagna_plot": slow,
+                "line_chart_with_cumsum_faceted": slow,
+                "layered_bar_chart": slow,
+                "multiple_interactions": slow,
+                "layered_histogram": slow,
+                "stacked_bar_chart_with_text": slow,
+                "bar_chart_with_labels": slow,
+                "interactive_cross_highlight": slow,
+                "wind_vector_map": slow,
+                r"\.point_map\.py": slow,
+                "line_chart_with_color_datum": slow,
+            },
+        )
+    ),
+    ids=id_func_str_only,
+)
+"""
+``pytest.mark.parametrize`` decorator.
+
+Provides **all** examples, using both `arguments` & `methods` syntax.
+
+The decorated test can evaluate each resulting chart via::
+
+    from altair.utils.execeval import eval_block
+
+    @distributed_examples
+    def test_some_stuff(source: Any, filename: str) -> None:
+        chart: ChartType | None = eval_block(source)
+        ... # Perform any assertions
+
+Notes
+-----
+- See `#3431 comment`_ for performance benefit.
+- `interval_selection_map_quakes` requires `#3418`_ fix
+
+.. _#3431 comment:
+   https://github.com/vega/altair/pull/3431#issuecomment-2168508048
+.. _#3418:
+   https://github.com/vega/altair/issues/3418
+"""
diff --git a/tests/utils/test_to_values_narwhals.py b/tests/utils/test_to_values_narwhals.py
index 1a96c67751..b1be4c5714 100644
--- a/tests/utils/test_to_values_narwhals.py
+++ b/tests/utils/test_to_values_narwhals.py
@@ -1,6 +1,6 @@
+import re
 import sys
 from datetime import datetime
-from pathlib import Path
 
 import narwhals.stable.v1 as nw
 import pandas as pd
@@ -14,23 +14,7 @@
 from altair.utils.data import to_values
 
 
-def windows_has_tzdata():
-    """
-    From PyArrow: python/pyarrow/tests/util.py.
-
-    This is the default location where tz.cpp will look for (until we make
-    this configurable at run-time)
-    """
-    return Path.home().joinpath("Downloads", "tzdata").exists()
-
-
-# Skip test on Windows when the tz database is not configured.
-# See https://github.com/vega/altair/issues/3050.
-@pytest.mark.skipif(
-    sys.platform == "win32" and not windows_has_tzdata(),
-    reason="Timezone database is not installed on Windows",
-)
-@pytest.mark.skipif(pa is None, reason="pyarrow not installed")
+@skip_requires_pyarrow(requires_tzdata=True)
 def test_arrow_timestamp_conversion():
     """Test that arrow timestamp values are converted to ISO-8601 strings."""
     data = {
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index 0e22ec7e9c..511b796992 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -128,10 +128,7 @@ def test_sanitize_dataframe_arrow_columns():
     json.dumps(records)
 
 
-@pytest.mark.skipif(pa is None, reason="pyarrow not installed")
-@pytest.mark.xfail(
-    sys.platform == "win32", reason="Timezone database is not installed on Windows"
-)
+@skip_requires_pyarrow
 def test_sanitize_pyarrow_table_columns() -> None:
     # create a dataframe with various types
     df = pd.DataFrame(
diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py
index 241d473788..9fa767cf45 100644
--- a/tests/vegalite/v5/test_api.py
+++ b/tests/vegalite/v5/test_api.py
@@ -13,7 +13,7 @@
 from datetime import date
 from importlib.metadata import version as importlib_version
 
-import ibis
+import duckdb
 import jsonschema
 import narwhals.stable.v1 as nw
 import pandas as pd
@@ -22,7 +22,9 @@
 from packaging.version import Version
 
 import altair as alt
-from altair.utils.schemapi import Optional, Undefined
+from altair.utils.core import use_signature
+from altair.utils.schemapi import Optional, SchemaValidationError, Undefined
+from tests import skip_requires_pyarrow, skip_requires_vl_convert, slow
 
 try:
     import vl_convert as vlc
@@ -1528,26 +1530,108 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch):
     assert "numpy" not in sys.modules
 
 
-@pytest.mark.skipif(
-    sys.version_info < (3, 9),
-    reason="The maximum `ibis` version installable on Python 3.8 is `ibis==5.1.0`,"
-    " which doesn't support the dataframe interchange protocol.",
-)
-@pytest.mark.skipif(
-    Version("1.5") > PANDAS_VERSION,
-    reason="A warning is thrown on old pandas versions",
-)
-@pytest.mark.xfail(
-    sys.platform == "win32", reason="Timezone database is not installed on Windows"
-)
-def test_ibis_with_date_32():
-    df = pl.DataFrame(
+@skip_requires_pyarrow(requires_tzdata=True)
+def test_interchange_with_date_32():
+    # Test that objects which Narwhals only supports at the interchange
+    # level can be plotted when they contain date32 columns.
+    df = pl.DataFrame(  # noqa: F841
         {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}
     )
-    tbl = ibis.memtable(df)
-    result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict()
+    rel = duckdb.sql("select * from df")
+    result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict()
     assert next(iter(result["datasets"].values())) == [
         {"a": 1, "b": "2020-01-01T00:00:00"},
         {"a": 2, "b": "2020-01-02T00:00:00"},
         {"a": 3, "b": "2020-01-03T00:00:00"},
     ]
+
+
+@skip_requires_pyarrow(requires_tzdata=True)
+def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
+    # Test that objects which Narwhals only supports at the interchange
+    # level don't get converted to PyArrow unnecessarily when plotted
+    # with the vegafusion transformer.
+    # TODO: this test can be drastically simplified when some level of
+    # DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)`
+    # without DuckDBWithInterchangeSupport.
+    df = pl.DataFrame(  # noqa: F841
+        {
+            "a": [1, 2, 3],
+            "b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
+        }
+    )
+    rel = duckdb.sql("select * from df")
+
+    class DuckDBWithInterchangeSupport:
+        """
+        DuckDB doesn't (yet?) support the interchange protocol.
+
+        So, we create duckdb wrapper which defers to PyArrow's
+        implementation of the protocol.
+        """
+
+        def __init__(self, rel: duckdb.DuckDBPyRelation) -> None:
+            self._rel = rel
+
+        def __dataframe__(self, allow_copy: bool = True) -> object:
+            return self._rel.to_arrow_table().__dataframe__()
+
+    rel_df = DuckDBWithInterchangeSupport(rel)
+    # "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called
+    # if we use the vegafusion transformer
+    monkeypatch.setattr(
+        "altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0
+    )
+
+    # Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport,
+    # so we need to overwrite `to_native`
+    def to_native(df, strict):
+        if isinstance(df, nw.DataFrame):
+            return rel_df
+        return df
+
+    monkeypatch.setattr("narwhals.stable.v1.to_native", to_native)
+
+    with alt.data_transformers.enable("vegafusion"):
+        result = (
+            alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega")
+        )
+    assert next(iter(result["data"]))["values"] == [
+        {"a": 1, "b": "2020-01-01T00:00:00.000"},
+        {"a": 2, "b": "2020-01-02T00:00:00.000"},
+        {"a": 3, "b": "2020-01-03T00:00:00.000"},
+    ]
+
+
+def test_binding() -> None:
+    @use_signature(alt.Binding)
+    def old_binding(input: Any, **kwargs: Any) -> alt.Binding:
+        """A generic binding."""
+        return alt.Binding(input=input, **kwargs)
+
+    # NOTE: `mypy` doesn't complain, but `pyright` does
+    old = old_binding(input="search", placeholder="Country", name="Search")  # pyright: ignore[reportCallIssue]
+    old_positional = old_binding("search", placeholder="Country", name="Search")
+
+    new = alt.binding(input="search", placeholder="Country", name="Search")
+    new_positional = alt.binding("search", placeholder="Country", name="Search")
+
+    assert (
+        old.to_dict()
+        == old_positional.to_dict()
+        == new.to_dict()
+        == new_positional.to_dict()
+    )
+    assert all(
+        isinstance(x, alt.Binding) for x in (old, old_positional, new, new_positional)
+    )
+
+    MISSING_INPUT = r"missing 1 required positional argument: 'input"
+
+    # NOTE: `mypy` doesn't complain, but `pyright` does (Again)
+    with pytest.raises(TypeError, match=MISSING_INPUT):
+        old_binding(placeholder="Country", name="Search")  # pyright: ignore[reportCallIssue]
+
+    # NOTE: Both type checkers can detect the issue on the new signature
+    with pytest.raises(TypeError, match=MISSING_INPUT):
+        alt.binding(placeholder="Country", name="Search")  # type: ignore[call-arg]