From e611c979340148afced3eb30fc40656f593651a4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 18 Oct 2023 04:40:38 -0600 Subject: [PATCH] Remove np.asarray in formatting.py (#8100) --- xarray/core/formatting.py | 42 +++++++++++++++++++++++++++------------ xarray/core/pycompat.py | 34 +++++++++++++++++++++++++++++++ xarray/core/variable.py | 19 ++---------------- 3 files changed, 65 insertions(+), 30 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 942bf5891ca..96a767f95ac 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -16,10 +16,10 @@ import pandas as pd from pandas.errors import OutOfBoundsDatetime -from xarray.core.duck_array_ops import array_equiv -from xarray.core.indexing import ExplicitlyIndexed, MemoryCachedArray +from xarray.core.duck_array_ops import array_equiv, astype +from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default -from xarray.core.pycompat import array_type +from xarray.core.pycompat import array_type, to_duck_array, to_numpy from xarray.core.utils import is_duck_array if TYPE_CHECKING: @@ -68,6 +68,8 @@ def first_n_items(array, n_desired): # might not be a numpy.ndarray. Moreover, access to elements of the array # could be very expensive (e.g. if it's only available over DAP), so go out # of our way to get them in a single call to __getitem__ using only slices. + from xarray.core.variable import Variable + if n_desired < 1: raise ValueError("must request at least one item") @@ -78,7 +80,14 @@ def first_n_items(array, n_desired): if n_desired < array.size: indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False) array = array[indexer] - return np.asarray(array).flat[:n_desired] + + # We pass variable objects in to handle indexing + # with indexer above. It would not work with our + # lazy indexing classes at the moment, so we cannot + # pass Variable._data + if isinstance(array, Variable): + array = array._data + return np.ravel(to_duck_array(array))[:n_desired] def last_n_items(array, n_desired): @@ -87,13 +96,22 @@ def last_n_items(array, n_desired): # might not be a numpy.ndarray. Moreover, access to elements of the array # could be very expensive (e.g. if it's only available over DAP), so go out # of our way to get them in a single call to __getitem__ using only slices. + from xarray.core.variable import Variable + if (n_desired == 0) or (array.size == 0): return [] if n_desired < array.size: indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True) array = array[indexer] - return np.asarray(array).flat[-n_desired:] + + # We pass variable objects in to handle indexing + # with indexer above. It would not work with our + # lazy indexing classes at the moment, so we cannot + # pass Variable._data + if isinstance(array, Variable): + array = array._data + return np.ravel(to_duck_array(array))[-n_desired:] def last_item(array): @@ -103,7 +121,8 @@ def last_item(array): return [] indexer = (slice(-1, None),) * array.ndim - return np.ravel(np.asarray(array[indexer])).tolist() + # to_numpy since dask doesn't support tolist + return np.ravel(to_numpy(array[indexer])).tolist() def calc_max_rows_first(max_rows: int) -> int: @@ -171,10 +190,10 @@ def format_item(x, timedelta_format=None, quote_strings=True): def format_items(x): """Returns a succinct summaries of all items in a sequence as strings""" - x = np.asarray(x) + x = to_duck_array(x) timedelta_format = "datetime" if np.issubdtype(x.dtype, np.timedelta64): - x = np.asarray(x, dtype="timedelta64[ns]") + x = astype(x, dtype="timedelta64[ns]") day_part = x[~pd.isnull(x)].astype("timedelta64[D]").astype("timedelta64[ns]") time_needed = x[~pd.isnull(x)] != day_part day_needed = day_part != np.timedelta64(0, "ns") @@ -584,12 +603,9 @@ def limit_lines(string: str, *, limit: int): def short_array_repr(array): from xarray.core.common import AbstractArray - if isinstance(array, ExplicitlyIndexed): - array = array.get_duck_array() - elif isinstance(array, AbstractArray): + if isinstance(array, AbstractArray): array = array.data - if not is_duck_array(array): - array = np.asarray(array) + array = to_duck_array(array) # default to lower precision so a full (abbreviated) line can fit on # one line with the default display_width diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 9af5d693170..bc8b61164f1 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -101,3 +101,37 @@ def is_chunked_array(x) -> bool: def is_0d_dask_array(x): return is_duck_dask_array(x) and is_scalar(x) + + +def to_numpy(data) -> np.ndarray: + from xarray.core.indexing import ExplicitlyIndexed + from xarray.core.parallelcompat import get_chunked_array_type + + if isinstance(data, ExplicitlyIndexed): + data = data.get_duck_array() + + # TODO first attempt to call .to_numpy() once some libraries implement it + if hasattr(data, "chunks"): + chunkmanager = get_chunked_array_type(data) + data, *_ = chunkmanager.compute(data) + if isinstance(data, array_type("cupy")): + data = data.get() + # pint has to be imported dynamically as pint imports xarray + if isinstance(data, array_type("pint")): + data = data.magnitude + if isinstance(data, array_type("sparse")): + data = data.todense() + data = np.asarray(data) + + return data + + +def to_duck_array(data): + from xarray.core.indexing import ExplicitlyIndexed + + if isinstance(data, ExplicitlyIndexed): + return data.get_duck_array() + elif is_duck_array(data): + return data + else: + return np.asarray(data) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 7c4c4d9bd7d..576535eea2b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,11 +28,11 @@ from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import ( - array_type, integer_types, is_0d_dask_array, is_chunked_array, is_duck_dask_array, + to_numpy, ) from xarray.core.utils import ( OrderedSet, @@ -1093,22 +1093,7 @@ def chunk( def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? - data = self.data - - # TODO first attempt to call .to_numpy() once some libraries implement it - if hasattr(data, "chunks"): - chunkmanager = get_chunked_array_type(data) - data, *_ = chunkmanager.compute(data) - if isinstance(data, array_type("cupy")): - data = data.get() - # pint has to be imported dynamically as pint imports xarray - if isinstance(data, array_type("pint")): - data = data.magnitude - if isinstance(data, array_type("sparse")): - data = data.todense() - data = np.asarray(data) - - return data + return to_numpy(self._data) def as_numpy(self) -> Self: """Coerces wrapped data into a numpy array, returning a Variable."""