From 7087ca49629e07be004d92fd08f916e3359a57e1 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 11 Jul 2024 10:54:18 +0200 Subject: [PATCH 1/4] `numpy` 2 compatibility in the `netcdf4` and `h5netcdf` backends (#9136) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * don't remove `netcdf4` from the upstream-dev environment * also stop removing `h5py` and `hdf5` * hard-code the precision (I believe this was missed in #9081) * don't remove `h5py` either * use on-diks _FillValue as standrd expects, use view instead of cast to prevent OverflowError. * whats-new * unpin `numpy` * rework UnsignedCoder * add test * Update xarray/coding/variables.py Co-authored-by: Justus Magin --------- Co-authored-by: Kai Mühlbauer Co-authored-by: Kai Mühlbauer Co-authored-by: Deepak Cherian --- ci/install-upstream-wheels.sh | 5 ++-- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- doc/whats-new.rst | 4 ++- xarray/coding/variables.py | 16 +++++++----- xarray/tests/test_backends.py | 33 +++++++++++++++++++++++-- 7 files changed, 49 insertions(+), 15 deletions(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index d728768439a..79fae3c46a9 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units hdf5 h5py netcdf4 pydap +$conda remove -y cf_units pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ @@ -37,8 +37,7 @@ python -m pip install \ numpy \ scipy \ matplotlib \ - pandas \ - h5py + pandas # for some reason pandas depends on pyarrow already. # Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge` python -m pip install \ diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index abf6a88690a..119db282ad9 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas - pint>=0.22 diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 2eedc9b0621..896e390ea3e 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas # - pint>=0.22 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 317e1fe5f41..ef02a3e7f23 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy<2 + - numpy - opt_einsum - packaging - pandas diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f237b406bd5..e8369dc2f40 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,8 @@ Bug fixes By `Pontus Lurcock `_. - Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`). By `Justus Magin `_. +- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`). + By `Justus Magin `_ and `Kai Mühlbauer `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. - Address regression introduced in :pull:`9002` that prevented objects returned @@ -67,7 +69,7 @@ Documentation - Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 `_). By `Jessica Scheick `_. - Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`) - By `Maximilian Roos `_ + By `Maximilian Roos `_. Internal Changes diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..d19f285d2b9 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -516,10 +516,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) pop_to(encoding, attrs, "_Unsigned") - signed_dtype = np.dtype(f"i{data.dtype.itemsize}") + # we need the on-disk type here + # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available + signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}")) if "_FillValue" in attrs: - new_fill = signed_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + new_fill = np.array(attrs["_FillValue"]) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(signed_dtype).item() data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype) return Variable(dims, data, attrs, encoding, fastpath=True) @@ -535,10 +538,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: if unsigned == "true": unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) - data = lazy_elemwise_func(data, transform, unsigned_dtype) if "_FillValue" in attrs: - new_fill = unsigned_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + new_fill = np.array(attrs["_FillValue"], dtype=data.dtype) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(unsigned_dtype).item() + data = lazy_elemwise_func(data, transform, unsigned_dtype) elif data.dtype.kind == "u": if unsigned == "false": signed_dtype = np.dtype(f"i{data.dtype.itemsize}") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 15485dc178a..0b90a05262d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset: def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset: encoding = { - "_FillValue": 255, + "_FillValue": np.int8(-1), "_Unsigned": "true", "dtype": "i1", "add_offset": dtype.type(10), @@ -925,6 +925,35 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None: assert decoded.variables[k].dtype == actual.variables[k].dtype assert_allclose(decoded, actual, decode_bytes=False) + @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)]) + def test_roundtrip_unsigned(self, fillvalue): + # regression/numpy2 test for + encoding = { + "_FillValue": fillvalue, + "_Unsigned": "true", + "dtype": "i1", + } + x = np.array([0, 1, 127, 128, 254, np.nan], dtype=np.float32) + decoded = Dataset({"x": ("t", x, {}, encoding)}) + + attributes = { + "_FillValue": fillvalue, + "_Unsigned": "true", + } + # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned + sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1") + encoded = Dataset({"x": ("t", sb, attributes)}) + + with self.roundtrip(decoded) as actual: + for k in decoded.variables: + assert decoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(decoded, actual, decode_bytes=False) + + with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual: + for k in encoded.variables: + assert encoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(encoded, actual, decode_bytes=False) + @staticmethod def _create_cf_dataset(): original = Dataset( @@ -4285,7 +4314,7 @@ def test_roundtrip_coordinates_with_space(self) -> None: def test_roundtrip_numpy_datetime_data(self) -> None: # Override method in DatasetIOBase - remove not applicable # save_kwargs - times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) + times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns") expected = Dataset({"t": ("t", times), "t0": times[0]}) with self.roundtrip(expected) as actual: assert_identical(expected, actual) From e12aa447c31908c9022d33c226f1481763a6ed9a Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 11 Jul 2024 11:25:47 +0200 Subject: [PATCH 2/4] `numpy` 2 compatibility in the iris code paths (#9156) * don't remove `cf_units` (and thus `iris`) [skip-ci] * try keeping netcdf4, h5netcdf, and h5py --- ci/install-upstream-wheels.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 79fae3c46a9..855336ad8bd 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units pydap +$conda remove -y pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ From c1965c215d01f3d7d0545b2954c7bab9eaf94f8f Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 11 Jul 2024 14:00:12 +0200 Subject: [PATCH 3/4] switch the documentation to run with `numpy>=2` (#9177) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * `NaN` → `nan` * new numpy scalar repr * unpin `numpy` in the docs [skip-ci] * try extracting the scalars to pass to `linspace` * use `numpy` instead of `numpy.array_api` [skip-ci] and mention that we're falling back to `numpy.array_api` in `numpy<2.0` * Update ci/requirements/doc.yml * Remove 2D cross product doctests * One more fix --------- Co-authored-by: Jessica Scheick Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- ci/requirements/doc.yml | 2 +- doc/getting-started-guide/faq.rst | 4 +-- doc/user-guide/computation.rst | 4 +-- doc/user-guide/interpolation.rst | 4 +-- doc/user-guide/testing.rst | 5 ++-- xarray/core/computation.py | 47 +------------------------------ xarray/plot/facetgrid.py | 4 +-- xarray/plot/utils.py | 6 ++-- 8 files changed, 16 insertions(+), 60 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 116eee7f702..cbbbaa16d7a 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,7 +21,7 @@ dependencies: - nbsphinx - netcdf4>=1.5 - numba - - numpy>=1.21,<2 + - numpy>=2 - packaging>=21.3 - pandas>=1.4,!=2.1.0 - pooch diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst index 7f99fa77e3a..58d5448cdf5 100644 --- a/doc/getting-started-guide/faq.rst +++ b/doc/getting-started-guide/faq.rst @@ -352,9 +352,9 @@ Some packages may have additional functionality beyond what is shown here. You c How does xarray handle missing values? -------------------------------------- -**xarray can handle missing values using ``np.NaN``** +**xarray can handle missing values using ``np.nan``** -- ``np.NaN`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.NaN`` is a constant value in NumPy that represents "Not a Number" or missing values. +- ``np.nan`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.nan`` is a constant value in NumPy that represents "Not a Number" or missing values. - Most of xarray's computation methods are designed to automatically handle missing values appropriately. diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index f99d41be538..23ecbedf61e 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -426,7 +426,7 @@ However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) + data = xr.DataArray([np.nan, 2, 4]) weights = xr.DataArray([8, 1, 1]) data.weighted(weights).mean() @@ -444,7 +444,7 @@ If the weights add up to to 0, ``sum`` returns 0: data.weighted(weights).sum() -and ``mean``, ``std`` and ``var`` return ``NaN``: +and ``mean``, ``std`` and ``var`` return ``nan``: .. ipython:: python diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 311e1bf0129..53d8a52b342 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -292,8 +292,8 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.sizes["lon"] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.sizes["lat"] * 4) + new_lon = np.linspace(ds.lon[0].item(), ds.lon[-1].item(), ds.sizes["lon"] * 4) + new_lat = np.linspace(ds.lat[0].item(), ds.lat[-1].item(), ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 13279eccb0b..d82d9d7d7d9 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -239,9 +239,10 @@ If the array type you want to generate has an array API-compliant top-level name you can use this neat trick: .. ipython:: python - :okwarning: - from numpy import array_api as xp # available in numpy 1.26.0 + import numpy as xp # compatible in numpy 2.0 + + # use `import numpy.array_api as xp` in numpy>=1.23,<2.0 from hypothesis.extra.array_api import make_strategies_namespace diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f418d3821c2..5d21d0836b9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1064,7 +1064,7 @@ def apply_ufunc( supported: >>> magnitude(3, 4) - 5.0 + np.float64(5.0) >>> magnitude(3, np.array([0, 4])) array([3., 5.]) >>> magnitude(array, 0) @@ -1587,15 +1587,6 @@ def cross( array([-3, 6, -3]) Dimensions without coordinates: dim_0 - Vector cross-product with 2 dimensions, returns in the perpendicular - direction: - - >>> a = xr.DataArray([1, 2]) - >>> b = xr.DataArray([4, 5]) - >>> xr.cross(a, b, dim="dim_0") - Size: 8B - array(-3) - Vector cross-product with 3 dimensions but zeros at the last axis yields the same results as with 2 dimensions: @@ -1606,42 +1597,6 @@ def cross( array([ 0, 0, -3]) Dimensions without coordinates: dim_0 - One vector with dimension 2: - - >>> a = xr.DataArray( - ... [1, 2], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y"])), - ... ) - >>> b = xr.DataArray( - ... [4, 5, 6], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])), - ... ) - >>> xr.cross(a, b, dim="cartesian") - Size: 24B - array([12, -6, -3]) - Coordinates: - * cartesian (cartesian) >> a = xr.DataArray( - ... [1, 2], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "z"])), - ... ) - >>> b = xr.DataArray( - ... [4, 5, 6], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])), - ... ) - >>> xr.cross(a, b, dim="cartesian") - Size: 24B - array([-10, 2, 5]) - Coordinates: - * cartesian (cartesian) dict[str, tuple[float, float]]: >>> ds = xr.tutorial.scatter_example_dataset(seed=42) >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> round(fg._get_largest_lims()["x"][0], 3) - -0.334 + np.float64(-0.334) """ lims_largest: dict[str, tuple[float, float]] = dict( x=(np.inf, -np.inf), y=(np.inf, -np.inf), z=(np.inf, -np.inf) @@ -817,7 +817,7 @@ def _set_lims( >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> fg._set_lims(x=(-0.3, 0.3), y=(0, 2), z=(0, 4)) >>> fg.axs[0, 0].get_xlim(), fg.axs[0, 0].get_ylim() - ((-0.3, 0.3), (0.0, 2.0)) + ((np.float64(-0.3), np.float64(0.3)), (np.float64(0.0), np.float64(2.0))) """ lims_largest = self._get_largest_lims() diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 8789bc2f9c2..a0abe0c8cfd 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -811,11 +811,11 @@ def _update_axes( def _is_monotonic(coord, axis=0): """ >>> _is_monotonic(np.array([0, 1, 2])) - True + np.True_ >>> _is_monotonic(np.array([2, 1, 0])) - True + np.True_ >>> _is_monotonic(np.array([0, 2, 1])) - False + np.False_ """ if coord.shape[axis] < 3: return True From a69815f594f623cb51b30c5ccc58b53fa4aa67fc Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 11 Jul 2024 15:08:36 +0200 Subject: [PATCH 4/4] exclude the bots from the release notes (#9235) --- .github/release.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/release.yml diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 00000000000..9d1e0987bfc --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,5 @@ +changelog: + exclude: + authors: + - dependabot + - pre-commit-ci