Merge branch 'main' into list-engines

pydata · Dec 13, 2024 · f5af66d · f5af66d
2 parents bed7e1d + f05c5ec
commit f5af66d
Show file tree

Hide file tree

Showing 60 changed files with 1,248 additions and 638 deletions.
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -123,7 +123,7 @@ jobs:
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.1.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy
@@ -174,7 +174,7 @@ jobs:
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.1.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy-min
@@ -230,7 +230,7 @@ jobs:
           python -m pyright xarray/
 
       - name: Upload pyright coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.1.1
         with:
           file: pyright_report/cobertura.xml
           flags: pyright
@@ -286,7 +286,7 @@ jobs:
           python -m pyright xarray/
 
       - name: Upload pyright coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.1.1
         with:
           file: pyright_report/cobertura.xml
           flags: pyright39

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -159,7 +159,9 @@ jobs:
           path: pytest.xml
 
       - name: Upload code coverage to Codecov
-        uses: codecov/[email protected]
+        uses: codecov/[email protected]
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
         with:
           file: ./coverage.xml
           flags: unittests

diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
@@ -140,7 +140,7 @@ jobs:
         run: |
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.1.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
       - id: text-unicode-replacement-char
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.2
+    rev: v0.8.1
     hooks:
       - id: ruff-format
       - id: ruff
@@ -37,7 +37,7 @@ repos:
         exclude: "generate_aggregations.py"
         additional_dependencies: ["black==24.8.0"]
   - repo: https://github.com/rbubley/mirrors-prettier
-    rev: v3.3.3
+    rev: v3.4.1
     hooks:
       - id: prettier
         args: [--cache-location=.prettier_cache/cache]
@@ -63,3 +63,13 @@ repos:
     rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
     hooks:
       - id: validate-cff
+  - repo: https://github.com/ComPWA/taplo-pre-commit
+    rev: v0.9.3
+    hooks:
+      - id: taplo-format
+        args: ["--option", "array_auto_collapse=false"]
+  - repo: https://github.com/abravalheri/validate-pyproject
+    rev: v0.23
+    hooks:
+      - id: validate-pyproject
+        additional_dependencies: ["validate-pyproject-schema-store[all]"]
diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
@@ -30,13 +30,13 @@ def requires_sparse():
 
 
 def randn(shape, frac_nan=None, chunks=None, seed=0):
-    rng = np.random.RandomState(seed)
+    rng = np.random.default_rng(seed)
     if chunks is None:
         x = rng.standard_normal(shape)
     else:
         import dask.array as da
 
-        rng = da.random.RandomState(seed)
+        rng = da.random.default_rng(seed)
         x = rng.standard_normal(shape, chunks=chunks)
 
     if frac_nan is not None:
@@ -47,7 +47,7 @@ def randn(shape, frac_nan=None, chunks=None, seed=0):
 
 
 def randint(low, high=None, size=None, frac_minus=None, seed=0):
-    rng = np.random.RandomState(seed)
+    rng = np.random.default_rng(seed)
     x = rng.randint(low, high, size)
     if frac_minus is not None:
         inds = rng.choice(range(x.size), int(x.size * frac_minus))

diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -305,7 +305,7 @@ def make_ds(self, nfiles=10):
             ds.attrs = {"history": "created for xarray benchmarking"}
 
             self.ds_list.append(ds)
-            self.filenames_list.append("test_netcdf_%i.nc" % i)
+            self.filenames_list.append(f"test_netcdf_{i}.nc")
 
 
 class IOWriteMultipleNetCDF3(IOMultipleNetCDF):

diff --git a/asv_bench/benchmarks/reindexing.py b/asv_bench/benchmarks/reindexing.py
@@ -11,7 +11,7 @@
 
 class Reindex:
     def setup(self):
-        data = np.random.RandomState(0).randn(ntime, nx, ny)
+        data = np.random.default_rng(0).random((ntime, nx, ny))
         self.ds = xr.Dataset(
             {"temperature": (("time", "x", "y"), data)},
             coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)},

diff --git a/asv_bench/benchmarks/unstacking.py b/asv_bench/benchmarks/unstacking.py
@@ -8,7 +8,7 @@
 
 class Unstacking:
     def setup(self):
-        data = np.random.RandomState(0).randn(250, 500)
+        data = np.random.default_rng(0).random((250, 500))
         self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
         self.da_missing = self.da_full[:-1]
         self.df_missing = self.da_missing.to_pandas()

diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -20,6 +20,7 @@ dependencies:
   - jupyter_client
   - matplotlib-base
   - nbsphinx
+  - ncdata
   - netcdf4>=1.5
   - numba
   - numpy>=2

diff --git a/ci/requirements/environment-3.13.yml b/ci/requirements/environment-3.13.yml
@@ -47,3 +47,5 @@ dependencies:
   - toolz
   - typing_extensions
   - zarr
+  - pip:
+      - jax # no way to get cpu-only jaxlib from conda if gpu is present
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -49,3 +49,5 @@ dependencies:
   - toolz
   - typing_extensions
   - zarr
+  - pip:
+      - jax # no way to get cpu-only jaxlib from conda if gpu is present
diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
@@ -173,9 +173,9 @@ integration with Cartopy_.
 
 We think the design decisions we have made for xarray (namely, basing it on
 pandas) make it a faster and more flexible data analysis tool. That said, Iris
-has some great domain specific functionality, and xarray includes
-methods for converting back and forth between xarray and Iris. See
-:py:meth:`~xarray.DataArray.to_iris` for more details.
+has some great domain specific functionality, and there are dedicated methods for
+converting back and forth between xarray and Iris. See
+:ref:`Reading and Writing Iris data <io.iris>` for more details.
 
 What other projects leverage xarray?
 ------------------------------------

diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
@@ -30,7 +30,8 @@ numpy) over all array values:
 .. ipython:: python
 
     arr = xr.DataArray(
-        np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]
+        np.random.default_rng(0).random((2, 3)),
+        [("x", ["a", "b"]), ("y", [10, 20, 30])],
     )
     arr - 3
     abs(arr)

diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst
@@ -292,7 +292,7 @@ work as a streaming operation, when run on arrays loaded from disk:
 .. ipython::
     :verbatim:
 
-    In [56]: rs = np.random.RandomState(0)
+    In [56]: rs = np.random.default_rng(0)
 
     In [57]: array1 = xr.DataArray(rs.randn(1000, 100000), dims=["place", "time"])  # 800MB
 

diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -13,6 +13,8 @@ format (recommended).
 
     import os
 
+    import iris
+    import ncdata.iris_xarray
     import numpy as np
     import pandas as pd
     import xarray as xr
@@ -1072,8 +1074,11 @@ Iris
 
 The Iris_ tool allows easy reading of common meteorological and climate model formats
 (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very
-similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is
-installed, xarray can convert a ``DataArray`` into a ``Cube`` using
+similar to ``DataArray`` objects, while enforcing a CF-compliant data model.
+
+DataArray ``to_iris`` and ``from_iris``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If iris is installed, xarray can convert a ``DataArray`` into a ``Cube`` using
 :py:meth:`DataArray.to_iris`:
 
 .. ipython:: python
@@ -1095,9 +1100,36 @@ Conversely, we can create a new ``DataArray`` object from a ``Cube`` using
     da_cube = xr.DataArray.from_iris(cube)
     da_cube
 
+Ncdata
+~~~~~~
+Ncdata_ provides more sophisticated means of transferring data, including entire
+datasets.  It uses the file saving and loading functions in both projects to provide a
+more "correct" translation between them, but still with very low overhead and not
+using actual disk files.
 
-.. _Iris: https://scitools.org.uk/iris
+For example:
+
+.. ipython:: python
+    :okwarning:
 
+    ds = xr.tutorial.open_dataset("air_temperature_gradient")
+    cubes = ncdata.iris_xarray.cubes_from_xarray(ds)
+    print(cubes)
+    print(cubes[1])
+
+.. ipython:: python
+    :okwarning:
+
+    ds = ncdata.iris_xarray.cubes_to_xarray(cubes)
+    print(ds)
+
+Ncdata can also adjust file data within load and save operations, to fix data loading
+problems or provide exact save formatting without needing to modify files on disk.
+See for example : `ncdata usage examples`_
+
+.. _Iris: https://scitools.org.uk/iris
+.. _Ncdata: https://ncdata.readthedocs.io/en/latest/index.html
+.. _ncdata usage examples: https://github.com/pp-mo/ncdata/tree/v0.1.2?tab=readme-ov-file#correct-a-miscoded-attribute-in-iris-input
 
 OPeNDAP
 -------

diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst
@@ -202,7 +202,7 @@ Let's take a look:
 
 .. ipython:: python
 
-    data = np.random.RandomState(0).rand(2, 3, 4)
+    data = np.random.default_rng(0).rand(2, 3, 4)
     items = list("ab")
     major_axis = list("mno")
     minor_axis = pd.date_range(start="2000", periods=4, name="date")

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -14,36 +14,50 @@ What's New
 
     np.random.seed(123456)
 
-.. _whats-new.2024.11.1:
+.. _whats-new.2024.12.0:
 
-v.2024.11.1 (unreleased)
+v.2024.12.0 (unreleased)
 ------------------------
 
 New Features
 ~~~~~~~~~~~~
 - Add :py:func:`~xarray.show_backends` alias for :py:func:`~xarray.backends.list_engines` (:issue:`6577`, :pull:`9821`).
   By `Nick Hodgskin <https://github.com/VeckoTheGecko>`_.
+- Better support wrapping additional array types (e.g. ``cupy`` or ``jax``) by calling generalized
+  duck array operations throughout more xarray methods. (:issue:`7848`, :pull:`9798`).
+  By `Sam Levang <https://github.com/slevang>`_.
+
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
-
+- Methods including ``dropna``, ``rank``, ``idxmax``, ``idxmin`` require
+  non-dimension arguments to be passed as keyword arguments. The previous
+  behavior, which allowed ``.idxmax('foo', 'all')`` was too easily confused with
+  ``'all'`` being a dimension. The updated equivalent is ``.idxmax('foo',
+  how='all')``. The previous behavior was deprecated in v2023.10.0.
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 Deprecations
 ~~~~~~~~~~~~
-
+- Finalize deprecation of ``closed`` parameters of :py:func:`cftime_range` and
+  :py:func:`date_range` (:pull:`9882`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Bug fixes
 ~~~~~~~~~
-
+- Fix type annotations for ``get_axis_num``. (:issue:`9822`, :pull:`9827`).
+  By `Bruce Merry <https://github.com/bmerry>`_.
+- Fix unintended load on datasets when calling :py:meth:`DataArray.plot.scatter` (:pull:`9818`).
+  By `Jimmy Westling <https://github.com/illviljan>`_.
 
 Documentation
 ~~~~~~~~~~~~~
 
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
-
-
+- Move non-CF related ``ensure_dtype_not_object`` from conventions to backends (:pull:`9828`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 .. _whats-new.2024.11.0:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 authors = [
-  {name = "xarray Developers", email = "[email protected]"},
+  { name = "xarray Developers", email = "[email protected]" },
 ]
 classifiers = [
   "Development Status :: 5 - Production/Stable",
@@ -16,7 +16,7 @@ classifiers = [
 ]
 description = "N-D labeled arrays and datasets in Python"
 dynamic = ["version"]
-license = {text = "Apache-2.0"}
+license = { text = "Apache-2.0" }
 name = "xarray"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -45,12 +45,21 @@ dev = [
   "pytest-env",
   "pytest-xdist",
   "pytest-timeout",
-  "ruff",
+  "ruff>=0.8.0",
   "sphinx",
   "sphinx_autosummary_accessors",
   "xarray[complete]",
 ]
-io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
+io = [
+  "netCDF4",
+  "h5netcdf",
+  "scipy",
+  'pydap; python_version<"3.10"',
+  "zarr",
+  "fsspec",
+  "cftime",
+  "pooch",
+]
 etc = ["sparse"]
 parallel = ["dask[complete]"]
 viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]
@@ -249,14 +258,13 @@ extend-select = [
   "RUF",
 ]
 extend-safe-fixes = [
-  "TID252",  # absolute imports
+  "TID252", # absolute imports
 ]
 ignore = [
   "E402",    # module level import not at top of file
   "E501",    # line too long - let the formatter worry about that
   "E731",    # do not assign a lambda expression, use a def
   "UP007",   # use X | Y for type annotations
-  "UP027",   # deprecated
   "C40",     # unnecessary generator, comprehension, or literal
   "PIE790",  # unnecessary pass statement
   "PERF203", # try-except within a loop incurs performance overhead
@@ -328,7 +336,9 @@ filterwarnings = [
   "default:the `pandas.MultiIndex` object:FutureWarning:xarray.tests.test_variable",
   "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
   "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
-  "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
+
+  # TODO: remove once we know how to deal with a changed signature in protocols
+  "default:::xarray.tests.test_strategies",
 ]
 
 log_cli_level = "INFO"
-Original file line number
+Diff line change
@@ Expand Up / @@ -47,3 +47,5 @@ dependencies: @@
       - toolz
       - typing_extensions
       - zarr
+      - pip:
+          - jax # no way to get cpu-only jaxlib from conda if gpu is present