CI Additional

Cache pre-existing Zarr arrays in Zarr backend #15788

GitHub Actions / Test Results failed Dec 17, 2024 in 0s

143 fail, 2 321 skipped, 18 706 pass in 1h 20m 32s

11 files 11 suites 1h 20m 32s ⏱️
21 170 tests 18 706 ✅ 2 321 💤 143 ❌
180 091 runs 154 917 ✅ 24 155 💤 1 019 ❌

Results for commit dcecf43.

Annotations

Check warning on line 0 in xarray.tests.test_backends.TestNCZarr

github-actions / Test Results

9 out of 10 runs failed: test_open_nczarr (xarray.tests.test_backends.TestNCZarr)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.10/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestNCZarr object at 0x7efc78180460>

    def test_open_nczarr(self) -> None:
        with create_tmp_file(suffix=".zarr") as tmp:
            expected = self._create_nczarr(tmp)
>           actual = xr.open_zarr(tmp, consolidated=False)

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:6070: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp_tomczjb/temp-67.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_read_non_consolidated_warning[2] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            Failed: DID NOT WARN. No warnings of type (<class 'RuntimeWarning'>,) were emitted.
 Emitted warnings: [].
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f098d0>

    def test_read_non_consolidated_warning(self) -> None:
        expected = create_test_data()
        with self.create_zarr_target() as store:
            self.save(
                expected, store_target=store, consolidated=False, **self.version_kwargs
            )
            with pytest.warns(
                RuntimeWarning,
                match="Failed to open Zarr store with consolidated",
            ):
>               with xr.open_zarr(store, **self.version_kwargs) as ds:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2335: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp0y8segy3/temp-95.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

#x1B[33mDuring handling of the above exception, another exception occurred:#x1B[0m

self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f098d0>

    def test_read_non_consolidated_warning(self) -> None:
        expected = create_test_data()
        with self.create_zarr_target() as store:
            self.save(
                expected, store_target=store, consolidated=False, **self.version_kwargs
            )
>           with pytest.warns(
                RuntimeWarning,
                match="Failed to open Zarr store with consolidated",
            ):
#x1B[1m#x1B[31mE           Failed: DID NOT WARN. No warnings of type (<class 'RuntimeWarning'>,) were emitted.#x1B[0m
#x1B[1m#x1B[31mE            Emitted warnings: [].#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2331: Failed

Check warning on line 0 in xarray.tests.test_backends.TestNCZarr

github-actions / Test Results

9 out of 10 runs failed: test_overwriting_nczarr (xarray.tests.test_backends.TestNCZarr)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.10/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestNCZarr object at 0x7efc78181090>

    def test_overwriting_nczarr(self) -> None:
        with create_tmp_file(suffix=".zarr") as tmp:
            ds = self._create_nczarr(tmp)
            expected = ds[["var1"]]
            expected.to_zarr(tmp, mode="w")
>           actual = xr.open_zarr(tmp, consolidated=False)

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:6078: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpq9j1tszn/temp-68.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_non_existent_store[2] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f0a290>

    def test_non_existent_store(self) -> None:
        with pytest.raises(
            FileNotFoundError, match="(No such file or directory|Unable to find group)"
        ):
>           xr.open_zarr(f"{uuid.uuid4()}")

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:2342: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '4156c587-91fd-46a5-bcb2-70d66ef1b81c', engine = 'zarr'
chunks = {}, cache = False, decode_cf = True, mask_and_scale = True
decode_times = True, decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-False-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015f0b670>
consolidated = False, compute = False, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpv172rkvu/temp-166.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-False-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014d81930>
consolidated = True, compute = False, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmplkh9pb4c/temp-167.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-False-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014d80610>
consolidated = None, compute = False, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpl7xbcibe/temp-168.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-False-False-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa01518f670>
consolidated = False, compute = True, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp49rcii3k/temp-169.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-False-False-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb0220>
consolidated = True, compute = True, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpnk3zyucs/temp-170.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-False-False-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb0550>
consolidated = None, compute = True, use_dask = False, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpvxswq6mb/temp-171.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa015cb14b0>
consolidated = False, compute = False, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmphkeqheen/temp-172.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51270>
consolidated = True, compute = False, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpwer2gfck/temp-173.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51360>
consolidated = None, compute = False, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp7wavz0i2/temp-174.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51450>
consolidated = False, compute = True, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpxtteohao/temp-175.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51540>
consolidated = True, compute = True, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpnkt4g4vf/temp-176.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-False-True-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c516c0>
consolidated = None, compute = True, use_dask = True, write_empty = False

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpn5xx9y_r/temp-177.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-False-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51600>
consolidated = False, compute = False, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp79sovq0s/temp-178.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-False-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c52830>
consolidated = True, compute = False, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpp4xqg680/temp-179.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-False-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c518d0>
consolidated = None, compute = False, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp2ejz27yn/temp-180.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-True-False-True-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51a80>
consolidated = False, compute = True, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpvugrhpw2/temp-181.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-True-False-True-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51b10>
consolidated = True, compute = True, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpuaz5qvir/temp-182.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

7 out of 9 runs failed: test_write_region[2-True-False-True-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ba0>
consolidated = None, compute = True, use_dask = False, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
>               with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3015: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp45ow98us/temp-183.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-True-False-False] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51e10>
consolidated = False, compute = False, use_dask = True, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmp8v7qn7to/temp-184.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-True-False-True] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ed0>
consolidated = True, compute = False, use_dask = True, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpjabsu1wg/temp-185.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty

github-actions / Test Results

6 out of 9 runs failed: test_write_region[2-True-True-False-None] (xarray.tests.test_backends.TestZarrWriteEmpty)

artifacts/Test results for Linux-3.10 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12 all-but-numba/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.10/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]

Raw output


            TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7fa014c51ff0>
consolidated = None, compute = False, use_dask = True, write_empty = True

    @pytest.mark.parametrize("consolidated", [False, True, None])
    @pytest.mark.parametrize("compute", [False, True])
    @pytest.mark.parametrize("use_dask", [False, True])
    @pytest.mark.parametrize("write_empty", [False, True, None])
    def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
        if (use_dask or not compute) and not has_dask:
            pytest.skip("requires dask")
    
        zeros = Dataset({"u": (("x",), np.zeros(10))})
        nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
    
        if use_dask:
            zeros = zeros.chunk(2)
            nonzeros = nonzeros.chunk(2)
    
        with self.create_zarr_target() as store:
            zeros.to_zarr(
                store,
                consolidated=consolidated,
                compute=compute,
                encoding={"u": dict(chunks=2)},
                **self.version_kwargs,
            )
            if compute:
                with xr.open_zarr(
                    store, consolidated=consolidated, **self.version_kwargs
                ) as actual:
                    assert_identical(actual, zeros)
            for i in range(0, 10, 2):
                region = {"x": slice(i, i + 2)}
                nonzeros.isel(region).to_zarr(
                    store,
                    region=region,
                    consolidated=consolidated,
                    write_empty_chunks=write_empty,
                    **self.version_kwargs,
                )
>           with xr.open_zarr(
                store, consolidated=consolidated, **self.version_kwargs
            ) as actual:

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/tests/test_backends.py#x1B[0m:3028: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/zarr.py#x1B[0m:1494: in open_zarr
    ds = open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

filename_or_obj = '/tmp/tmpobam1lid/temp-186.zarr', engine = 'zarr', chunks = {}
cache = False, decode_cf = True, mask_and_scale = True, decode_times = True
decode_timedelta = None, use_cftime = None

    def open_dataset(
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        engine: T_Engine = None,
        chunks: T_Chunks = None,
        cache: bool | None = None,
        decode_cf: bool | None = None,
        mask_and_scale: bool | Mapping[str, bool] | None = None,
        decode_times: bool | Mapping[str, bool] | None = None,
        decode_timedelta: bool | Mapping[str, bool] | None = None,
        use_cftime: bool | Mapping[str, bool] | None = None,
        concat_characters: bool | Mapping[str, bool] | None = None,
        decode_coords: Literal["coordinates", "all"] | bool | None = None,
        drop_variables: str | Iterable[str] | None = None,
        inline_array: bool = False,
        chunked_array_type: str | None = None,
        from_array_kwargs: dict[str, Any] | None = None,
        backend_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> Dataset:
        """Open and decode a dataset from a file or file-like object.
    
        Parameters
        ----------
        filename_or_obj : str, Path, file-like or DataStore
            Strings and Path objects are interpreted as a path to a netCDF file
            or an OpenDAP URL and opened with python-netCDF4, unless the filename
            ends with .gz, in which case the file is gunzipped and opened with
            scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
            objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
        engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
            , installed backend \
            or subclass of xarray.backends.BackendEntrypoint, optional
            Engine to use when reading files. If not provided, the default engine
            is chosen based on available dependencies, with a preference for
            "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
            can also be used.
        chunks : int, dict, 'auto' or None, default: None
            If provided, used to load the data into dask arrays.
    
            - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
              engine preferred chunks.
            - ``chunks=None`` skips using dask, which is generally faster for
              small arrays.
            - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
            - ``chunks={}`` loads the data with dask using the engine's preferred chunk
              size, generally identical to the format's chunk size. If not available, a
              single chunk for all arrays.
    
            See dask chunking for more details.
        cache : bool, optional
            If True, cache data loaded from the underlying datastore in memory as
            NumPy arrays when accessed to avoid reading from the underlying data-
            store multiple times. Defaults to True unless you specify the `chunks`
            argument to use dask, in which case it defaults to False. Does not
            change the behavior of coordinates corresponding to dimensions, which
            always load their data from disk into a ``pandas.Index``.
        decode_cf : bool, optional
            Whether to decode these variables, assuming they were saved according
            to CF conventions.
        mask_and_scale : bool or dict-like, optional
            If True, replace array values equal to `_FillValue` with NA and scale
            values according to the formula `original_values * scale_factor +
            add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
            taken from variable attributes (if they exist).  If the `_FillValue` or
            `missing_value` attribute contains multiple values a warning will be
            issued and all array values matching one of the multiple values will
            be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_times : bool or dict-like, optional
            If True, decode times encoded in the standard NetCDF datetime format
            into datetime objects. Otherwise, leave them encoded as numbers.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_timedelta : bool or dict-like, optional
            If True, decode variables and coordinates with time units in
            {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
            into timedelta objects. If False, leave them encoded as numbers.
            If None (default), assume the same value of decode_time.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        use_cftime: bool or dict-like, optional
            Only relevant if encoded dates come from a standard calendar
            (e.g. "gregorian", "proleptic_gregorian", "standard", or not
            specified).  If None (default), attempt to decode times to
            ``np.datetime64[ns]`` objects; if this is not possible, decode times to
            ``cftime.datetime`` objects. If True, always decode times to
            ``cftime.datetime`` objects, regardless of whether or not they can be
            represented using ``np.datetime64[ns]`` objects.  If False, always
            decode times to ``np.datetime64[ns]`` objects; if this is not possible
            raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        concat_characters : bool or dict-like, optional
            If True, concatenate along the last dimension of character arrays to
            form string arrays. Dimensions will only be concatenated over (and
            removed) if they have no corresponding variable and if they are only
            used as the last dimension of character arrays.
            Pass a mapping, e.g. ``{"my_variable": False}``,
            to toggle this feature per-variable individually.
            This keyword may not be supported by all the backends.
        decode_coords : bool or {"coordinates", "all"}, optional
            Controls which variables are set as coordinate variables:
    
            - "coordinates" or True: Set variables referred to in the
              ``'coordinates'`` attribute of the datasets or individual variables
              as coordinate variables.
            - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
              other attributes as coordinate variables.
    
            Only existing variables can be set as coordinates. Missing variables
            will be silently ignored.
        drop_variables: str or iterable of str, optional
            A variable or list of variables to exclude from being parsed from the
            dataset. This may be useful to drop variables with problems or
            inconsistent values.
        inline_array: bool, default: False
            How to include the array in the dask task graph.
            By default(``inline_array=False``) the array is included in a task by
            itself, and each chunk refers to that task by its key. With
            ``inline_array=True``, Dask will instead inline the array directly
            in the values of the task graph. See :py:func:`dask.array.from_array`.
        chunked_array_type: str, optional
            Which chunked array type to coerce this datasets' arrays to.
            Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
            Experimental API that should not be relied upon.
        from_array_kwargs: dict
            Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
            chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
            For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
            to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
        backend_kwargs: dict
            Additional keyword arguments passed on to the engine open function,
            equivalent to `**kwargs`.
        **kwargs: dict
            Additional keyword arguments passed on to the engine open function.
            For example:
    
            - 'group': path to the netCDF4 group in the given file to open given as
              a str,supported by "netcdf4", "h5netcdf", "zarr".
            - 'lock': resource lock to use when reading data from disk. Only
              relevant when using dask or another form of parallelism. By default,
              appropriate locks are chosen to safely read and write files with the
              currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
              "scipy".
    
            See engine open function for kwargs accepted by each specific engine.
    
        Returns
        -------
        dataset : Dataset
            The newly created dataset.
    
        Notes
        -----
        ``open_dataset`` opens the file with read-only access. When you modify
        values of a Dataset, even one linked to files on disk, only the in-memory
        copy you are manipulating in xarray is modified: the original file on disk
        is never touched.
    
        See Also
        --------
        open_mfdataset
        """
    
        if cache is None:
            cache = chunks is None
    
        if backend_kwargs is not None:
            kwargs.update(backend_kwargs)
    
        if engine is None:
            engine = plugins.guess_engine(filename_or_obj)
    
        if from_array_kwargs is None:
            from_array_kwargs = {}
    
        backend = plugins.get_backend(engine)
    
        decoders = _resolve_decoders_kwargs(
            decode_cf,
            open_backend_dataset_parameters=backend.open_dataset_parameters,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            concat_characters=concat_characters,
            use_cftime=use_cftime,
            decode_coords=decode_coords,
        )
    
        overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
>       backend_ds = backend.open_dataset(
            filename_or_obj,
            drop_variables=drop_variables,
            **decoders,
            **kwargs,
        )
#x1B[1m#x1B[31mE       TypeError: ZarrBackendEntrypoint.open_dataset() got an unexpected keyword argument 'cache_members'#x1B[0m

#x1B[1m#x1B[31m/home/runner/work/xarray/xarray/xarray/backends/api.py#x1B[0m:670: TypeError

View more details on GitHub Actions

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cache pre-existing Zarr arrays in Zarr backend #15788

Test Results