Illviljan · pull · Nov 14, 2023 · Nov 14, 2023 · Nov 14, 2023
diff --git a/.binder/environment.yml b/.binder/environment.yml
@@ -6,7 +6,6 @@ dependencies:
   - boto3
   - bottleneck
   - cartopy
-  - cdms2
   - cfgrib
   - cftime
   - coveralls
@@ -38,5 +37,4 @@ dependencies:
   - toolz
   - xarray
   - zarr
-  - pip:
-    - numbagg
+  - numbagg
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -67,13 +67,7 @@ jobs:
         run: |
           echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
 
-          if [[ "${{matrix.python-version}}" == "3.11" ]]; then
-            if [[ ${{matrix.os}} == windows* ]]; then
-              echo "CONDA_ENV_FILE=ci/requirements/environment-windows-py311.yml" >> $GITHUB_ENV
-            else
-              echo "CONDA_ENV_FILE=ci/requirements/environment-py311.yml" >> $GITHUB_ENV
-            fi
-          elif [[ ${{ matrix.os }} == windows* ]] ;
+          if [[ ${{ matrix.os }} == windows* ]] ;
           then
             echo "CONDA_ENV_FILE=ci/requirements/environment-windows.yml" >> $GITHUB_ENV
           elif [[ "${{ matrix.env }}" != "" ]] ;

diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml
@@ -3,13 +3,11 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - python=3.10
   - black
   - aiobotocore
   - boto3
   - bottleneck
   - cartopy
-  - cdms2
   - cftime
   - coveralls
   - flox

diff --git a/ci/requirements/environment-py311.yml b/ci/requirements/environment-py311.yml
diff --git a/ci/requirements/environment-windows-py311.yml b/ci/requirements/environment-windows-py311.yml
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -5,7 +5,6 @@ dependencies:
   - boto3
   - bottleneck
   - cartopy
-  # - cdms2  # Not available on Windows
   - cftime
   - dask-core
   - distributed

diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -7,7 +7,6 @@ dependencies:
   - boto3
   - bottleneck
   - cartopy
-  - cdms2
   - cftime
   - dask-core
   - distributed

diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
@@ -11,7 +11,6 @@ dependencies:
   - boto3=1.24
   - bottleneck=1.3
   - cartopy=0.20
-  - cdms2=3.1
   - cftime=1.6
   - coveralls
   - dask-core=2022.7

diff --git a/doc/api.rst b/doc/api.rst
@@ -628,11 +628,9 @@ DataArray methods
    load_dataarray
    open_dataarray
    DataArray.as_numpy
-   DataArray.from_cdms2
    DataArray.from_dict
    DataArray.from_iris
    DataArray.from_series
-   DataArray.to_cdms2
    DataArray.to_dask_dataframe
    DataArray.to_dataframe
    DataArray.to_dataset

diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
@@ -168,18 +168,11 @@ integration with Cartopy_.
 .. _Iris: https://scitools-iris.readthedocs.io/en/stable/
 .. _Cartopy: https://scitools.org.uk/cartopy/docs/latest/
 
-`UV-CDAT`__ is another Python library that implements in-memory netCDF-like
-variables and `tools for working with climate data`__.
-
-__ https://uvcdat.llnl.gov/
-__ https://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/
-
 We think the design decisions we have made for xarray (namely, basing it on
 pandas) make it a faster and more flexible data analysis tool. That said, Iris
-and CDAT have some great domain specific functionality, and xarray includes
-methods for converting back and forth between xarray and these libraries. See
-:py:meth:`~xarray.DataArray.to_iris` and :py:meth:`~xarray.DataArray.to_cdms2`
-for more details.
+has some great domain specific functionality, and xarray includes
+methods for converting back and forth between xarray and Iris. See
+:py:meth:`~xarray.DataArray.to_iris` for more details.
 
 What other projects leverage xarray?
 ------------------------------------

diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -876,17 +876,20 @@ and then calling ``to_zarr`` with ``compute=False`` to write only metadata
     ds.to_zarr(path, compute=False)
 
 Now, a Zarr store with the correct variable shapes and attributes exists that
-can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a
-mapping from dimension names to Python ``slice`` objects indicating where the
-data should be written (in index space, not coordinate space), e.g.,
+can be filled out by subsequent calls to ``to_zarr``. ``region`` can be
+specified as ``"auto"``, which opens the existing store and determines the
+correct alignment of the new data with the existing coordinates, or as an
+explicit mapping from dimension names to Python ``slice`` objects indicating
+where the data should be written (in index space, not label space), e.g.,
 
 .. ipython:: python
 
     # For convenience, we'll slice a single dataset, but in the real use-case
     # we would create them separately possibly even from separate processes.
     ds = xr.Dataset({"foo": ("x", np.arange(30))})
-    ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)})
-    ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)})
+    # Any of the following region specifications are valid
+    ds.isel(x=slice(0, 10)).to_zarr(path, region="auto")
+    ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"})
     ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)})
 
 Concurrent writes with ``region`` are safe as long as they modify distinct

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -26,9 +26,16 @@ New Features
   By `Deepak Cherian <https://github.com/dcherian>`_. (:issue:`7764`, :pull:`8373`).
 - Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`).
   By `Ben Mares <https://github.com/maresb>`_.
+- Allow passing ``region="auto"`` in  :py:meth:`Dataset.to_zarr` to automatically infer the
+  region to write in the original store. Also implement automatic transpose when dimension
+  order does not match the original store. (:issue:`7702`, :issue:`8421`, :pull:`8434`).
+  By `Sam Levang <https://github.com/slevang>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
+- drop support for `cdms2 <https://github.com/CDAT/cdms>`_. Please use
+  `xcdat <https://github.com/xCDAT/xcdat>`_ instead (:pull:`8441`).
+  By `Justus Magin <https://github.com/keewis`_.
 
 - Bump minimum tested pint version to ``>=0.22``. By `Deepak Cherian <https://github.com/dcherian>`_.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -88,7 +88,6 @@ module = [
   "affine.*",
   "bottleneck.*",
   "cartopy.*",
-  "cdms2.*",
   "cf_units.*",
   "cfgrib.*",
   "cftime.*",

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -27,6 +27,7 @@
     _normalize_path,
 )
 from xarray.backends.locks import _get_scheduler
+from xarray.backends.zarr import open_zarr
 from xarray.core import indexing
 from xarray.core.combine import (
     _infer_concat_order_from_positions,
@@ -1443,10 +1444,63 @@ def save_mfdataset(
         )
 
 
-def _validate_region(ds, region):
+def _auto_detect_region(ds_new, ds_orig, dim):
+    # Create a mapping array of coordinates to indices on the original array
+    coord = ds_orig[dim]
+    da_map = DataArray(np.arange(coord.size), coords={dim: coord})
+
+    try:
+        da_idxs = da_map.sel({dim: ds_new[dim]})
+    except KeyError as e:
+        if "not all values found" in str(e):
+            raise KeyError(
+                f"Not all values of coordinate '{dim}' in the new array were"
+                " found in the original store. Writing to a zarr region slice"
+                " requires that no dimensions or metadata are changed by the write."
+            )
+        else:
+            raise e
+
+    if (da_idxs.diff(dim) != 1).any():
+        raise ValueError(
+            f"The auto-detected region of coordinate '{dim}' for writing new data"
+            " to the original store had non-contiguous indices. Writing to a zarr"
+            " region slice requires that the new data constitute a contiguous subset"
+            " of the original store."
+        )
+
+    dim_slice = slice(da_idxs.values[0], da_idxs.values[-1] + 1)
+
+    return dim_slice
+
+
+def _auto_detect_regions(ds, region, open_kwargs):
+    ds_original = open_zarr(**open_kwargs)
+    for key, val in region.items():
+        if val == "auto":
+            region[key] = _auto_detect_region(ds, ds_original, key)
+    return region
+
+
+def _validate_and_autodetect_region(
+    ds, region, mode, open_kwargs
+) -> tuple[dict[str, slice], bool]:
+    if region == "auto":
+        region = {dim: "auto" for dim in ds.dims}
+
     if not isinstance(region, dict):
         raise TypeError(f"``region`` must be a dict, got {type(region)}")
 
+    if any(v == "auto" for v in region.values()):
+        region_was_autodetected = True
+        if mode != "r+":
+            raise ValueError(
+                f"``mode`` must be 'r+' when using ``region='auto'``, got {mode}"
+            )
+        region = _auto_detect_regions(ds, region, open_kwargs)
+    else:
+        region_was_autodetected = False
+
     for k, v in region.items():
         if k not in ds.dims:
             raise ValueError(
@@ -1478,6 +1532,8 @@ def _validate_region(ds, region):
             f".drop_vars({non_matching_vars!r})"
         )
 
+    return region, region_was_autodetected
+
 
 def _validate_datatypes_for_zarr_append(zstore, dataset):
     """If variable exists in the store, confirm dtype of the data to append is compatible with
@@ -1529,7 +1585,7 @@ def to_zarr(
     compute: Literal[True] = True,
     consolidated: bool | None = None,
     append_dim: Hashable | None = None,
-    region: Mapping[str, slice] | None = None,
+    region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
     zarr_version: int | None = None,
@@ -1553,7 +1609,7 @@ def to_zarr(
     compute: Literal[False],
     consolidated: bool | None = None,
     append_dim: Hashable | None = None,
-    region: Mapping[str, slice] | None = None,
+    region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
     zarr_version: int | None = None,
@@ -1575,7 +1631,7 @@ def to_zarr(
     compute: bool = True,
     consolidated: bool | None = None,
     append_dim: Hashable | None = None,
-    region: Mapping[str, slice] | None = None,
+    region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
     zarr_version: int | None = None,
@@ -1640,7 +1696,20 @@ def to_zarr(
     _validate_dataset_names(dataset)
 
     if region is not None:
-        _validate_region(dataset, region)
+        open_kwargs = dict(
+            store=store,
+            synchronizer=synchronizer,
+            group=group,
+            consolidated=consolidated,
+            storage_options=storage_options,
+            zarr_version=zarr_version,
+        )
+        region, region_was_autodetected = _validate_and_autodetect_region(
+            dataset, region, mode, open_kwargs
+        )
+        # drop indices to avoid potential race condition with auto region
+        if region_was_autodetected:
+            dataset = dataset.drop_vars(dataset.indexes)
         if append_dim is not None and append_dim in region:
             raise ValueError(
                 f"cannot list the same dimension in both ``append_dim`` and "
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,7 +6,6 @@ dependencies: @@
       - boto3
       - bottleneck
       - cartopy
-      - cdms2
       - cfgrib
       - cftime
       - coveralls
@@ Expand Down Expand Up / @@ -38,5 +37,4 @@ dependencies: @@
       - toolz
       - xarray
       - zarr
-      - pip:
-        - numbagg
+      - numbagg