From fe0ae88201cc699b32ee1e9c07b602d9d7a8d439 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 10 Nov 2024 20:56:22 +0000 Subject: [PATCH] fix(DRAFT): Treat `polars` as exception, invalidate cache Possibly fix https://github.com/vega/altair/actions/runs/11768349827/job/32778071725?pr=3631 --- altair/datasets/_readers.py | 13 ++++++++----- tests/test_datasets.py | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/altair/datasets/_readers.py b/altair/datasets/_readers.py index f58fcd56d..eea9f18db 100644 --- a/altair/datasets/_readers.py +++ b/altair/datasets/_readers.py @@ -99,8 +99,8 @@ def scan_fn(self, source: StrPath, /) -> Callable[..., IntoFrameT]: return self._scan_fn[suffix] def _response_hook(self, f): - # HACK: pyarrow wants the file obj - return f.read() + # HACK: `pyarrow` + `pandas` wants the file obj + return f def dataset( self, @@ -273,6 +273,9 @@ def __init__(self, name: _Polars, /) -> None: } self._scan_fn = {".parquet": pl.scan_parquet} + def _response_hook(self, f): + return f.read() + class _PolarsPyArrowReader(_Reader["pl.DataFrame", "pl.LazyFrame"]): def __init__(self, name: Literal["polars[pyarrow]"], /) -> None: @@ -289,6 +292,9 @@ def __init__(self, name: Literal["polars[pyarrow]"], /) -> None: } self._scan_fn = {".parquet": pl.scan_parquet} + def _response_hook(self, f): + return f.read() + class _PyArrowReader(_Reader["pa.Table", "pa.Table"]): """ @@ -333,9 +339,6 @@ def __init__(self, name: _PyArrow, /) -> None: } self._scan_fn = {".parquet": pa_read_parquet} - def _response_hook(self, f): - return f - def _filter_reduce(predicates: tuple[Any, ...], constraints: Metadata, /) -> nw.Expr: """ diff --git a/tests/test_datasets.py b/tests/test_datasets.py index a15fb9411..c37bc0046 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -39,6 +39,7 @@ def test_loader_url(backend: _Backend) -> None: @backends def test_loader_call(backend: _Backend) -> None: data = Loader.with_backend(backend) + data.cache_dir = "" frame = data("stocks", ".csv") assert is_into_dataframe(frame) nw_frame = nw.from_native(frame)