From dead70f4c5a1f293a3c4b57e178a762bd9975c37 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 29 Dec 2024 15:03:14 +0100 Subject: [PATCH] friendlier error messages for missing chunk managers (#9676) * raise an error message while guessing if there's no chunkmanager available * don't skip the no chunkmanager test if dask is not installed * whats-new * ensure at least one chunk manager is available * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove additional blank line from a bad merge * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * improve the wording Co-authored-by: Tom Nicholas * switch to ImportError * raise a helpful `ImportError` for known chunk managers * make sure the new `ImportError` is actually raised * check that the more specific error message is preferred * prefer the more specific error * also use `ImportError` as indicator for `chunks=None` * move and improve the whats-new entry * captialize global variable KNOWN_CHUNKMANAGERS * chunkmanagers -> available_chunkmanagers * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * also use the string repr for printing `manager` * reword * more repr * reflow * adapt the test to the new error message --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tom Nicholas Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 2 +- xarray/namedarray/parallelcompat.py | 33 +++++++++++++++++++++++------ xarray/tests/test_parallelcompat.py | 31 +++++++++++++++++++++------ 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aab51d71b09..086f4bf1084 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,8 @@ v.2024.12.0 (unreleased) New Features ~~~~~~~~~~~~ +- Improve the error message raised when using chunked-array methods if no chunk manager is available or if the requested chunk manager is missing (:pull:`9676`) + By `Justus Magin `_. (:pull:`9676`) - Better support wrapping additional array types (e.g. ``cupy`` or ``jax``) by calling generalized duck array operations throughout more xarray methods. (:issue:`7848`, :pull:`9798`). By `Sam Levang `_. diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e0a4a042634..f7f30272941 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1471,7 +1471,7 @@ def open_zarr( ) # attempt to import that parallel backend chunks = {} - except ValueError: + except (ValueError, ImportError): chunks = None if kwargs: diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 69dd4ab5f93..c1fe5999ecb 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -46,6 +46,12 @@ def compute( T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) +KNOWN_CHUNKMANAGERS = { + "dask": "dask", + "cubed": "cubed-xarray", + "arkouda": "arkouda-xarray", +} + @functools.lru_cache(maxsize=1) def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: @@ -95,29 +101,42 @@ def guess_chunkmanager( Else use whatever is installed, defaulting to dask if there are multiple options. """ - chunkmanagers = list_chunkmanagers() + available_chunkmanagers = list_chunkmanagers() if manager is None: - if len(chunkmanagers) == 1: + if len(available_chunkmanagers) == 1: # use the only option available - manager = next(iter(chunkmanagers.keys())) + manager = next(iter(available_chunkmanagers.keys())) else: # use the one in options (default dask) manager = OPTIONS["chunk_manager"] if isinstance(manager, str): - if manager not in chunkmanagers: + if manager not in available_chunkmanagers and manager in KNOWN_CHUNKMANAGERS: + raise ImportError( + f"chunk manager {manager!r} is not available." + f" Please make sure {KNOWN_CHUNKMANAGERS[manager]!r} is installed" + " and importable." + ) + elif len(available_chunkmanagers) == 0: + raise ImportError( + "no chunk managers available. Try installing `dask` or another package" + " that provides a chunk manager." + ) + elif manager not in available_chunkmanagers: raise ValueError( - f"unrecognized chunk manager {manager} - must be one of: {list(chunkmanagers)}" + f"unrecognized chunk manager {manager!r} - must be one of the installed" + f" chunk managers: {list(available_chunkmanagers)}" ) - return chunkmanagers[manager] + return available_chunkmanagers[manager] elif isinstance(manager, ChunkManagerEntrypoint): # already a valid ChunkManager so just pass through return manager else: raise TypeError( - f"manager must be a string or instance of ChunkManagerEntrypoint, but received type {type(manager)}" + "manager must be a string or instance of ChunkManagerEntrypoint," + f" but received type {type(manager)}" ) diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index 67c68aac534..67e3d00cfbe 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -11,13 +11,14 @@ from xarray.namedarray._typing import _Chunks from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( + KNOWN_CHUNKMANAGERS, ChunkManagerEntrypoint, get_chunked_array_type, guess_chunkmanager, list_chunkmanagers, load_chunkmanagers, ) -from xarray.tests import has_dask, requires_dask +from xarray.tests import requires_dask class DummyChunkedArray(np.ndarray): @@ -158,8 +159,19 @@ def test_get_chunkmanger_via_set_options(self, register_dummy_chunkmanager) -> N chunkmanager = guess_chunkmanager(None) assert isinstance(chunkmanager, DummyChunkManager) - def test_fail_on_nonexistent_chunkmanager(self) -> None: - with pytest.raises(ValueError, match="unrecognized chunk manager foo"): + def test_fail_on_known_but_missing_chunkmanager( + self, register_dummy_chunkmanager, monkeypatch + ) -> None: + monkeypatch.setitem(KNOWN_CHUNKMANAGERS, "test", "test-package") + with pytest.raises( + ImportError, match="chunk manager 'test' is not available.+test-package" + ): + guess_chunkmanager("test") + + def test_fail_on_nonexistent_chunkmanager( + self, register_dummy_chunkmanager + ) -> None: + with pytest.raises(ValueError, match="unrecognized chunk manager 'foo'"): guess_chunkmanager("foo") @requires_dask @@ -167,9 +179,16 @@ def test_get_dask_if_installed(self) -> None: chunkmanager = guess_chunkmanager(None) assert isinstance(chunkmanager, DaskManager) - @pytest.mark.skipif(has_dask, reason="requires dask not to be installed") - def test_dont_get_dask_if_not_installed(self) -> None: - with pytest.raises(ValueError, match="unrecognized chunk manager dask"): + def test_no_chunk_manager_available(self, monkeypatch) -> None: + monkeypatch.setattr("xarray.namedarray.parallelcompat.list_chunkmanagers", dict) + with pytest.raises(ImportError, match="no chunk managers available"): + guess_chunkmanager("foo") + + def test_no_chunk_manager_available_but_known_manager_requested( + self, monkeypatch + ) -> None: + monkeypatch.setattr("xarray.namedarray.parallelcompat.list_chunkmanagers", dict) + with pytest.raises(ImportError, match="chunk manager 'dask' is not available"): guess_chunkmanager("dask") @requires_dask