From 1bd455206d5898800ae87d7c22cafba05c9c012e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Nov 2024 12:34:02 +0000 Subject: [PATCH] revert: Remove `"polars[pyarrow]" backend Partially related to https://github.com/vega/altair/pull/3631#issuecomment-2484826592 After some thought, this backend didn't add support for any unique dependency configs. I've only ever used `use_pyarrow=True` for `pl.DataFrame.write_parquet` to resolve an issue with invalid headers in `"polars<1.0.0;>=0.19.0"` --- altair/datasets/_loader.py | 5 ++--- altair/datasets/_readers.py | 32 +++----------------------------- tests/test_datasets.py | 5 ++--- 3 files changed, 7 insertions(+), 35 deletions(-) diff --git a/altair/datasets/_loader.py b/altair/datasets/_loader.py index 5d8c1ec8b..3e31aea2e 100644 --- a/altair/datasets/_loader.py +++ b/altair/datasets/_loader.py @@ -55,7 +55,7 @@ class Loader(Generic[IntoDataFrameT, IntoFrameT]): @overload @classmethod def from_backend( - cls, backend_name: Literal["polars", "polars[pyarrow]"], / + cls, backend_name: Literal["polars"], / ) -> Loader[pl.DataFrame, pl.LazyFrame]: ... @overload @@ -81,7 +81,6 @@ def from_backend(cls, backend_name: _Backend, /) -> Loader[Any, Any]: DataFrame package/config used to return data. * *polars*: Using `polars defaults`_ - * *polars[pyarrow]*: Using ``use_pyarrow=True`` * *pandas*: Using `pandas defaults`_. * *pandas[pyarrow]*: Using ``dtype_backend="pyarrow"`` * *pyarrow*: (*Experimental*) @@ -347,7 +346,7 @@ def __call__( suffix: Extension | None = ..., /, tag: Version | None = ..., - backend: Literal["polars", "polars[pyarrow]"] = ..., + backend: Literal["polars"] = ..., **kwds: Any, ) -> pl.DataFrame: ... @overload diff --git a/altair/datasets/_readers.py b/altair/datasets/_readers.py index e93fb55e1..f7b8aecf5 100644 --- a/altair/datasets/_readers.py +++ b/altair/datasets/_readers.py @@ -73,9 +73,8 @@ _Pandas: TypeAlias = Literal["pandas"] _PyArrow: TypeAlias = Literal["pyarrow"] _ConcreteT = TypeVar("_ConcreteT", _Polars, _Pandas, _PyArrow) - _PolarsAny: TypeAlias = Literal[_Polars, "polars[pyarrow]"] _PandasAny: TypeAlias = Literal[_Pandas, "pandas[pyarrow]"] - _Backend: TypeAlias = Literal[_PolarsAny, _PandasAny, _PyArrow] + _Backend: TypeAlias = Literal[_Polars, _PandasAny, _PyArrow] __all__ = ["backend"] @@ -332,25 +331,6 @@ def __init__(self, name: _Polars, /) -> None: self._scan_fn = {".parquet": pl.scan_parquet} -class _PolarsPyArrowReader(_Reader["pl.DataFrame", "pl.LazyFrame"]): - def __init__(self, name: Literal["polars[pyarrow]"], /) -> None: - _pl, _pa = _requirements(name) - self._name = name - if not TYPE_CHECKING: - pl = self._import(_pl) - pa = self._import(_pa) # noqa: F841 - self._read_fn = { - ".csv": partial(pl.read_csv, use_pyarrow=True, try_parse_dates=True), - ".json": _pl_read_json_roundtrip, - ".tsv": partial( - pl.read_csv, separator="\t", use_pyarrow=True, try_parse_dates=True - ), - ".arrow": partial(pl.read_ipc, use_pyarrow=True), - ".parquet": partial(pl.read_parquet, use_pyarrow=True), - } - self._scan_fn = {".parquet": pl.scan_parquet} - - class _PyArrowReader(_Reader["pa.Table", "pa.Table"]): """ Reader backed by `pyarrow.Table`_. @@ -509,7 +489,7 @@ def infer_backend( @overload -def backend(name: _PolarsAny, /) -> _Reader[pl.DataFrame, pl.LazyFrame]: ... +def backend(name: _Polars, /) -> _Reader[pl.DataFrame, pl.LazyFrame]: ... @overload @@ -524,8 +504,6 @@ def backend(name: _Backend, /) -> _Reader[Any, Any]: """Reader initialization dispatcher.""" if name == "polars": return _PolarsReader(name) - elif name == "polars[pyarrow]": - return _PolarsPyArrowReader(name) elif name == "pandas[pyarrow]": return _PandasPyArrowReader(name) elif name == "pandas": @@ -548,10 +526,6 @@ def _requirements(s: _ConcreteT, /) -> _ConcreteT: ... def _requirements(s: Literal["pandas[pyarrow]"], /) -> tuple[_Pandas, _PyArrow]: ... -@overload -def _requirements(s: Literal["polars[pyarrow]"], /) -> tuple[_Polars, _PyArrow]: ... - - def _requirements(s: _Backend, /): concrete: set[Literal[_Polars, _Pandas, _PyArrow]] = {"polars", "pandas", "pyarrow"} if s in concrete: @@ -560,7 +534,7 @@ def _requirements(s: _Backend, /): from packaging.requirements import Requirement req = Requirement(s) - supports_extras: set[Literal[_Polars, _Pandas]] = {"polars", "pandas"} + supports_extras: set[Literal[_Pandas]] = {"pandas"} if req.name in supports_extras: name = req.name if (extras := req.extras) and extras == {"pyarrow"}: diff --git a/tests/test_datasets.py b/tests/test_datasets.py index a4bbe40c4..e31f7990e 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -68,7 +68,6 @@ class DatasetSpec(TypedDict, total=False): ), ), ), - pytest.param("polars[pyarrow]", marks=requires_pyarrow), pytest.param("pandas[pyarrow]", marks=requires_pyarrow), pytest.param("pyarrow", marks=requires_pyarrow), ], @@ -302,7 +301,7 @@ def test_loader_call(backend: _Backend, monkeypatch: pytest.MonkeyPatch) -> None def test_missing_dependency_single( backend: _Backend, monkeypatch: pytest.MonkeyPatch ) -> None: - if backend in {"polars[pyarrow]", "pandas[pyarrow]"}: + if backend == "pandas[pyarrow]": pytest.skip("Testing single dependency backends only") monkeypatch.setitem(sys.modules, backend, None) @@ -317,7 +316,7 @@ def test_missing_dependency_single( Loader.from_backend(backend) -@pytest.mark.parametrize("backend", ["polars[pyarrow]", "pandas[pyarrow]"]) +@pytest.mark.parametrize("backend", ["pandas[pyarrow]"]) @skip_requires_pyarrow def test_missing_dependency_multi( backend: _Backend, monkeypatch: pytest.MonkeyPatch