Cache pre-existing Zarr arrays in Zarr backend #15788
143 fail, 2 321 skipped, 18 706 pass in 1h 20m 32s
11 files 11 suites 1h 20m 32s ⏱️
21 170 tests 18 706 ✅ 2 321 💤 143 ❌
180 091 runs 154 917 ✅ 24 155 💤 1 019 ❌
Results for commit dcecf43.
Annotations
Check warning on line 0 in xarray.tests.test_backends.TestNCZarr
github-actions / Test Results
9 out of 10 runs failed: test_open_nczarr (xarray.tests.test_backends.TestNCZarr)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.10/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestNCZarr object at 0x7efc78180460>
def test_open_nczarr(self) -> None:
with create_tmp_file(suffix=".zarr") as tmp:
expected = self._create_nczarr(tmp)
> actual = xr.open_zarr(tmp, consolidated=False)
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:6070:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp_tomczjb/temp-67.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_read_non_consolidated_warning[2] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
Failed: DID NOT WARN. No warnings of type (<class 'RuntimeWarning'>,) were emitted.
Emitted warnings: [].
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f098d0>
def test_read_non_consolidated_warning(self) -> None:
expected = create_test_data()
with self.create_zarr_target() as store:
self.save(
expected, store_target=store, consolidated=False, **self.version_kwargs
)
with pytest.warns(
RuntimeWarning,
match="Failed to open Zarr store with consolidated",
):
> with xr.open_zarr(store, **self.version_kwargs) as ds:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2335:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp0y8segy3/temp-95.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
#x1B[33mDuring handling of the above exception, another exception occurred:#x1B[0m
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f098d0>
def test_read_non_consolidated_warning(self) -> None:
expected = create_test_data()
with self.create_zarr_target() as store:
self.save(
expected, store_target=store, consolidated=False, **self.version_kwargs
)
> with pytest.warns(
RuntimeWarning,
match="Failed to open Zarr store with consolidated",
):
#x1B[1m#x1B[31mE Failed: DID NOT WARN. No warnings of type (<class 'RuntimeWarning'>,) were emitted.#x1B[0m
#x1B[1m#x1B[31mE Emitted warnings: [].#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2331: Failed
Check warning on line 0 in xarray.tests.test_backends.TestNCZarr
github-actions / Test Results
9 out of 10 runs failed: test_overwriting_nczarr (xarray.tests.test_backends.TestNCZarr)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.10/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestNCZarr object at 0x7efc78181090>
def test_overwriting_nczarr(self) -> None:
with create_tmp_file(suffix=".zarr") as tmp:
ds = self._create_nczarr(tmp)
expected = ds[["var1"]]
expected.to_zarr(tmp, mode="w")
> actual = xr.open_zarr(tmp, consolidated=False)
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:6078:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpq9j1tszn/temp-68.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_non_existent_store[2] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f0a290>
def test_non_existent_store(self) -> None:
with pytest.raises(
FileNotFoundError, match="(No such file or directory|Unable to find group)"
):
> xr.open_zarr(f"{uuid.uuid4()}")
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2342:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '4156c587-91fd-46a5-bcb2-70d66ef1b81c', engine = 'zarr'
chunks = {}, cache = False, decode_cf = True, mask_and_scale = True
decode_times = True, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-False-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f0b670>
consolidated = False, compute = False, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpv172rkvu/temp-166.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-False-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014d81930>
consolidated = True, compute = False, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmplkh9pb4c/temp-167.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-False-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014d80610>
consolidated = None, compute = False, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpl7xbcibe/temp-168.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-False-False-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa01518f670>
consolidated = False, compute = True, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp49rcii3k/temp-169.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-False-False-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb0220>
consolidated = True, compute = True, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpnk3zyucs/temp-170.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-False-False-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb0550>
consolidated = None, compute = True, use_dask = False, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpvxswq6mb/temp-171.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb14b0>
consolidated = False, compute = False, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmphkeqheen/temp-172.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51270>
consolidated = True, compute = False, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpwer2gfck/temp-173.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51360>
consolidated = None, compute = False, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp7wavz0i2/temp-174.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51450>
consolidated = False, compute = True, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpxtteohao/temp-175.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51540>
consolidated = True, compute = True, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpnkt4g4vf/temp-176.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-False-True-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c516c0>
consolidated = None, compute = True, use_dask = True, write_empty = False
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpn5xx9y_r/temp-177.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-False-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51600>
consolidated = False, compute = False, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp79sovq0s/temp-178.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-False-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c52830>
consolidated = True, compute = False, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpp4xqg680/temp-179.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-False-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c518d0>
consolidated = None, compute = False, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp2ejz27yn/temp-180.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-True-False-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51a80>
consolidated = False, compute = True, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpvugrhpw2/temp-181.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-True-False-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51b10>
consolidated = True, compute = True, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpuaz5qvir/temp-182.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
7 out of 9 runs failed: test_write_region[2-True-False-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ba0>
consolidated = None, compute = True, use_dask = False, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp45ow98us/temp-183.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-True-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51e10>
consolidated = False, compute = False, use_dask = True, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp8v7qn7to/temp-184.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-True-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ed0>
consolidated = True, compute = False, use_dask = True, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpjabsu1wg/temp-185.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
6 out of 9 runs failed: test_write_region[2-True-True-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
Raw output
TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ff0>
consolidated = None, compute = False, use_dask = True, write_empty = True
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if (use_dask or not compute) and not has_dask:
pytest.skip("requires dask")
zeros = Dataset({"u": (("x",), np.zeros(10))})
nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
if use_dask:
zeros = zeros.chunk(2)
nonzeros = nonzeros.chunk(2)
with self.create_zarr_target() as store:
zeros.to_zarr(
store,
consolidated=consolidated,
compute=compute,
encoding={"u": dict(chunks=2)},
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
region = {"x": slice(i, i + 2)}
nonzeros.isel(region).to_zarr(
store,
region=region,
consolidated=consolidated,
write_empty_chunks=write_empty,
**self.version_kwargs,
)
> with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
) as actual:
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpobam1lid/temp-186.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, default: None
If provided, used to load the data into dask arrays.
- ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
- ``chunks=None`` skips using dask, which is generally faster for
small arrays.
- ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
- ``chunks={}`` loads the data with dask using the engine's preferred chunk
size, generally identical to the format's chunk size. If not available, a
single chunk for all arrays.
See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
**decoders,
**kwargs,
)
#x1B[1m#x1B[31mE TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError