From 7087ca49629e07be004d92fd08f916e3359a57e1 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Thu, 11 Jul 2024 10:54:18 +0200
Subject: [PATCH 1/4] `numpy` 2 compatibility in the `netcdf4` and `h5netcdf`
 backends (#9136)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* don't remove `netcdf4` from the upstream-dev environment

* also stop removing `h5py` and `hdf5`

* hard-code the precision (I believe this was missed in #9081)

* don't remove `h5py` either

* use on-diks _FillValue as standrd expects, use view instead of cast to prevent OverflowError.

* whats-new

* unpin `numpy`

* rework UnsignedCoder

* add test

* Update xarray/coding/variables.py

Co-authored-by: Justus Magin <keewis@users.noreply.github.com>

---------

Co-authored-by: Kai Mühlbauer <kai.muehlbauer@uni-bonn.de>
Co-authored-by: Kai Mühlbauer <kmuehlbauer@wradlib.org>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 ci/install-upstream-wheels.sh           |  5 ++--
 ci/requirements/all-but-dask.yml        |  2 +-
 ci/requirements/environment-windows.yml |  2 +-
 ci/requirements/environment.yml         |  2 +-
 doc/whats-new.rst                       |  4 ++-
 xarray/coding/variables.py              | 16 +++++++-----
 xarray/tests/test_backends.py           | 33 +++++++++++++++++++++++--
 7 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh
index d728768439a..79fae3c46a9 100755
--- a/ci/install-upstream-wheels.sh
+++ b/ci/install-upstream-wheels.sh
@@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse
 # temporarily remove numexpr
 $conda remove -y numexpr
 # temporarily remove backends
-$conda remove -y cf_units hdf5 h5py netcdf4 pydap
+$conda remove -y cf_units pydap
 # forcibly remove packages to avoid artifacts
 $conda remove -y --force \
     numpy \
@@ -37,8 +37,7 @@ python -m pip install \
     numpy \
     scipy \
     matplotlib \
-    pandas \
-    h5py
+    pandas
 # for some reason pandas depends on pyarrow already.
 # Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge`
 python -m pip install \
diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml
index abf6a88690a..119db282ad9 100644
--- a/ci/requirements/all-but-dask.yml
+++ b/ci/requirements/all-but-dask.yml
@@ -22,7 +22,7 @@ dependencies:
   - netcdf4
   - numba
   - numbagg
-  - numpy<2
+  - numpy
   - packaging
   - pandas
   - pint>=0.22
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
index 2eedc9b0621..896e390ea3e 100644
--- a/ci/requirements/environment-windows.yml
+++ b/ci/requirements/environment-windows.yml
@@ -23,7 +23,7 @@ dependencies:
   - netcdf4
   - numba
   - numbagg
-  - numpy<2
+  - numpy
   - packaging
   - pandas
   # - pint>=0.22
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
index 317e1fe5f41..ef02a3e7f23 100644
--- a/ci/requirements/environment.yml
+++ b/ci/requirements/environment.yml
@@ -26,7 +26,7 @@ dependencies:
   - numba
   - numbagg
   - numexpr
-  - numpy<2
+  - numpy
   - opt_einsum
   - packaging
   - pandas
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index f237b406bd5..e8369dc2f40 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -45,6 +45,8 @@ Bug fixes
   By `Pontus Lurcock <https://github.com/pont-us>`_.
 - Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`).
   By `Justus Magin <https://github.com/keewis>`_.
+- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`).
+  By `Justus Magin <https://github.com/keewis>`_ and `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`).
   By `Justus Magin <https://github.com/keewis>`_.
 - Address regression introduced in :pull:`9002` that prevented objects returned
@@ -67,7 +69,7 @@ Documentation
 - Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 <https://github.com/pydata/xarray/discussions/8990>`_).
   By `Jessica Scheick <https://github.com/jessicas11>`_.
 - Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`)
-  By `Maximilian Roos <https://github.com/max-sixty>`_
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 
 Internal Changes
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index d31cb6e626a..d19f285d2b9 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -516,10 +516,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
             dims, data, attrs, encoding = unpack_for_encoding(variable)
 
             pop_to(encoding, attrs, "_Unsigned")
-            signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
+            # we need the on-disk type here
+            # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available
+            signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}"))
             if "_FillValue" in attrs:
-                new_fill = signed_dtype.type(attrs["_FillValue"])
-                attrs["_FillValue"] = new_fill
+                new_fill = np.array(attrs["_FillValue"])
+                # use view here to prevent OverflowError
+                attrs["_FillValue"] = new_fill.view(signed_dtype).item()
             data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype)
 
             return Variable(dims, data, attrs, encoding, fastpath=True)
@@ -535,10 +538,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
                 if unsigned == "true":
                     unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}")
                     transform = partial(np.asarray, dtype=unsigned_dtype)
-                    data = lazy_elemwise_func(data, transform, unsigned_dtype)
                     if "_FillValue" in attrs:
-                        new_fill = unsigned_dtype.type(attrs["_FillValue"])
-                        attrs["_FillValue"] = new_fill
+                        new_fill = np.array(attrs["_FillValue"], dtype=data.dtype)
+                        # use view here to prevent OverflowError
+                        attrs["_FillValue"] = new_fill.view(unsigned_dtype).item()
+                    data = lazy_elemwise_func(data, transform, unsigned_dtype)
             elif data.dtype.kind == "u":
                 if unsigned == "false":
                     signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 15485dc178a..0b90a05262d 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset:
 
 def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     encoding = {
-        "_FillValue": 255,
+        "_FillValue": np.int8(-1),
         "_Unsigned": "true",
         "dtype": "i1",
         "add_offset": dtype.type(10),
@@ -925,6 +925,35 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
             assert_allclose(decoded, actual, decode_bytes=False)
 
+    @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)])
+    def test_roundtrip_unsigned(self, fillvalue):
+        # regression/numpy2 test for
+        encoding = {
+            "_FillValue": fillvalue,
+            "_Unsigned": "true",
+            "dtype": "i1",
+        }
+        x = np.array([0, 1, 127, 128, 254, np.nan], dtype=np.float32)
+        decoded = Dataset({"x": ("t", x, {}, encoding)})
+
+        attributes = {
+            "_FillValue": fillvalue,
+            "_Unsigned": "true",
+        }
+        # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
+        sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1")
+        encoded = Dataset({"x": ("t", sb, attributes)})
+
+        with self.roundtrip(decoded) as actual:
+            for k in decoded.variables:
+                assert decoded.variables[k].dtype == actual.variables[k].dtype
+            assert_allclose(decoded, actual, decode_bytes=False)
+
+        with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual:
+            for k in encoded.variables:
+                assert encoded.variables[k].dtype == actual.variables[k].dtype
+            assert_allclose(encoded, actual, decode_bytes=False)
+
     @staticmethod
     def _create_cf_dataset():
         original = Dataset(
@@ -4285,7 +4314,7 @@ def test_roundtrip_coordinates_with_space(self) -> None:
     def test_roundtrip_numpy_datetime_data(self) -> None:
         # Override method in DatasetIOBase - remove not applicable
         # save_kwargs
-        times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"])
+        times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns")
         expected = Dataset({"t": ("t", times), "t0": times[0]})
         with self.roundtrip(expected) as actual:
             assert_identical(expected, actual)

From e12aa447c31908c9022d33c226f1481763a6ed9a Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:25:47 +0200
Subject: [PATCH 2/4] `numpy` 2 compatibility in the iris code paths (#9156)

* don't remove `cf_units` (and thus `iris`) [skip-ci]

* try keeping netcdf4, h5netcdf, and h5py
---
 ci/install-upstream-wheels.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh
index 79fae3c46a9..855336ad8bd 100755
--- a/ci/install-upstream-wheels.sh
+++ b/ci/install-upstream-wheels.sh
@@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse
 # temporarily remove numexpr
 $conda remove -y numexpr
 # temporarily remove backends
-$conda remove -y cf_units pydap
+$conda remove -y pydap
 # forcibly remove packages to avoid artifacts
 $conda remove -y --force \
     numpy \

From c1965c215d01f3d7d0545b2954c7bab9eaf94f8f Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:00:12 +0200
Subject: [PATCH 3/4] switch the documentation to run with `numpy>=2` (#9177)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* `NaN` → `nan`

* new numpy scalar repr

* unpin `numpy` in the docs [skip-ci]

* try extracting the scalars to pass to `linspace`

* use `numpy` instead of `numpy.array_api` [skip-ci]

and mention that we're falling back to `numpy.array_api` in `numpy<2.0`

* Update ci/requirements/doc.yml

* Remove 2D cross product doctests

* One more fix

---------

Co-authored-by: Jessica Scheick <JessicaS11@users.noreply.github.com>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
Co-authored-by: Deepak Cherian <deepak@cherian.net>
---
 ci/requirements/doc.yml           |  2 +-
 doc/getting-started-guide/faq.rst |  4 +--
 doc/user-guide/computation.rst    |  4 +--
 doc/user-guide/interpolation.rst  |  4 +--
 doc/user-guide/testing.rst        |  5 ++--
 xarray/core/computation.py        | 47 +------------------------------
 xarray/plot/facetgrid.py          |  4 +--
 xarray/plot/utils.py              |  6 ++--
 8 files changed, 16 insertions(+), 60 deletions(-)

diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
index 116eee7f702..cbbbaa16d7a 100644
--- a/ci/requirements/doc.yml
+++ b/ci/requirements/doc.yml
@@ -21,7 +21,7 @@ dependencies:
   - nbsphinx
   - netcdf4>=1.5
   - numba
-  - numpy>=1.21,<2
+  - numpy>=2
   - packaging>=21.3
   - pandas>=1.4,!=2.1.0
   - pooch
diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
index 7f99fa77e3a..58d5448cdf5 100644
--- a/doc/getting-started-guide/faq.rst
+++ b/doc/getting-started-guide/faq.rst
@@ -352,9 +352,9 @@ Some packages may have additional functionality beyond what is shown here. You c
 How does xarray handle missing values?
 --------------------------------------
 
-**xarray can handle missing values using ``np.NaN``**
+**xarray can handle missing values using ``np.nan``**
 
-- ``np.NaN`` is  used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.NaN`` is a constant value in NumPy that represents "Not a Number" or missing values.
+- ``np.nan`` is  used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.nan`` is a constant value in NumPy that represents "Not a Number" or missing values.
 
 - Most of xarray's computation methods are designed to automatically handle missing values appropriately.
 
diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
index f99d41be538..23ecbedf61e 100644
--- a/doc/user-guide/computation.rst
+++ b/doc/user-guide/computation.rst
@@ -426,7 +426,7 @@ However, the functions also take missing values in the data into account:
 
 .. ipython:: python
 
-    data = xr.DataArray([np.NaN, 2, 4])
+    data = xr.DataArray([np.nan, 2, 4])
     weights = xr.DataArray([8, 1, 1])
 
     data.weighted(weights).mean()
@@ -444,7 +444,7 @@ If the weights add up to to 0, ``sum`` returns 0:
 
     data.weighted(weights).sum()
 
-and ``mean``, ``std`` and ``var`` return ``NaN``:
+and ``mean``, ``std`` and ``var`` return ``nan``:
 
 .. ipython:: python
 
diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst
index 311e1bf0129..53d8a52b342 100644
--- a/doc/user-guide/interpolation.rst
+++ b/doc/user-guide/interpolation.rst
@@ -292,8 +292,8 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data.
     axes[0].set_title("Raw data")
 
     # Interpolated data
-    new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.sizes["lon"] * 4)
-    new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.sizes["lat"] * 4)
+    new_lon = np.linspace(ds.lon[0].item(), ds.lon[-1].item(), ds.sizes["lon"] * 4)
+    new_lat = np.linspace(ds.lat[0].item(), ds.lat[-1].item(), ds.sizes["lat"] * 4)
     dsi = ds.interp(lat=new_lat, lon=new_lon)
     dsi.air.plot(ax=axes[1])
     @savefig interpolation_sample3.png width=8in
diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst
index 13279eccb0b..d82d9d7d7d9 100644
--- a/doc/user-guide/testing.rst
+++ b/doc/user-guide/testing.rst
@@ -239,9 +239,10 @@ If the array type you want to generate has an array API-compliant top-level name
 you can use this neat trick:
 
 .. ipython:: python
-    :okwarning:
 
-    from numpy import array_api as xp  # available in numpy 1.26.0
+    import numpy as xp  # compatible in numpy 2.0
+
+    # use `import numpy.array_api as xp` in numpy>=1.23,<2.0
 
     from hypothesis.extra.array_api import make_strategies_namespace
 
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index f418d3821c2..5d21d0836b9 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1064,7 +1064,7 @@ def apply_ufunc(
     supported:
 
     >>> magnitude(3, 4)
-    5.0
+    np.float64(5.0)
     >>> magnitude(3, np.array([0, 4]))
     array([3., 5.])
     >>> magnitude(array, 0)
@@ -1587,15 +1587,6 @@ def cross(
     array([-3,  6, -3])
     Dimensions without coordinates: dim_0
 
-    Vector cross-product with 2 dimensions, returns in the perpendicular
-    direction:
-
-    >>> a = xr.DataArray([1, 2])
-    >>> b = xr.DataArray([4, 5])
-    >>> xr.cross(a, b, dim="dim_0")
-    <xarray.DataArray ()> Size: 8B
-    array(-3)
-
     Vector cross-product with 3 dimensions but zeros at the last axis
     yields the same results as with 2 dimensions:
 
@@ -1606,42 +1597,6 @@ def cross(
     array([ 0,  0, -3])
     Dimensions without coordinates: dim_0
 
-    One vector with dimension 2:
-
-    >>> a = xr.DataArray(
-    ...     [1, 2],
-    ...     dims=["cartesian"],
-    ...     coords=dict(cartesian=(["cartesian"], ["x", "y"])),
-    ... )
-    >>> b = xr.DataArray(
-    ...     [4, 5, 6],
-    ...     dims=["cartesian"],
-    ...     coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
-    ... )
-    >>> xr.cross(a, b, dim="cartesian")
-    <xarray.DataArray (cartesian: 3)> Size: 24B
-    array([12, -6, -3])
-    Coordinates:
-      * cartesian  (cartesian) <U1 12B 'x' 'y' 'z'
-
-    One vector with dimension 2 but coords in other positions:
-
-    >>> a = xr.DataArray(
-    ...     [1, 2],
-    ...     dims=["cartesian"],
-    ...     coords=dict(cartesian=(["cartesian"], ["x", "z"])),
-    ... )
-    >>> b = xr.DataArray(
-    ...     [4, 5, 6],
-    ...     dims=["cartesian"],
-    ...     coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
-    ... )
-    >>> xr.cross(a, b, dim="cartesian")
-    <xarray.DataArray (cartesian: 3)> Size: 24B
-    array([-10,   2,   5])
-    Coordinates:
-      * cartesian  (cartesian) <U1 12B 'x' 'y' 'z'
-
     Multiple vector cross-products. Note that the direction of the
     cross product vector is defined by the right-hand rule:
 
diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py
index faf809a8a74..613362ed5d5 100644
--- a/xarray/plot/facetgrid.py
+++ b/xarray/plot/facetgrid.py
@@ -774,7 +774,7 @@ def _get_largest_lims(self) -> dict[str, tuple[float, float]]:
         >>> ds = xr.tutorial.scatter_example_dataset(seed=42)
         >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w")
         >>> round(fg._get_largest_lims()["x"][0], 3)
-        -0.334
+        np.float64(-0.334)
         """
         lims_largest: dict[str, tuple[float, float]] = dict(
             x=(np.inf, -np.inf), y=(np.inf, -np.inf), z=(np.inf, -np.inf)
@@ -817,7 +817,7 @@ def _set_lims(
         >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w")
         >>> fg._set_lims(x=(-0.3, 0.3), y=(0, 2), z=(0, 4))
         >>> fg.axs[0, 0].get_xlim(), fg.axs[0, 0].get_ylim()
-        ((-0.3, 0.3), (0.0, 2.0))
+        ((np.float64(-0.3), np.float64(0.3)), (np.float64(0.0), np.float64(2.0)))
         """
         lims_largest = self._get_largest_lims()
 
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index 8789bc2f9c2..a0abe0c8cfd 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -811,11 +811,11 @@ def _update_axes(
 def _is_monotonic(coord, axis=0):
     """
     >>> _is_monotonic(np.array([0, 1, 2]))
-    True
+    np.True_
     >>> _is_monotonic(np.array([2, 1, 0]))
-    True
+    np.True_
     >>> _is_monotonic(np.array([0, 2, 1]))
-    False
+    np.False_
     """
     if coord.shape[axis] < 3:
         return True

From a69815f594f623cb51b30c5ccc58b53fa4aa67fc Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Thu, 11 Jul 2024 15:08:36 +0200
Subject: [PATCH 4/4] exclude the bots from the release notes (#9235)

---
 .github/release.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .github/release.yml

diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 00000000000..9d1e0987bfc
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,5 @@
+changelog:
+  exclude:
+    authors:
+      - dependabot
+      - pre-commit-ci