diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 00000000000..9d1e0987bfc --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,5 @@ +changelog: + exclude: + authors: + - dependabot + - pre-commit-ci diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index d728768439a..855336ad8bd 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse # temporarily remove numexpr $conda remove -y numexpr # temporarily remove backends -$conda remove -y cf_units hdf5 h5py netcdf4 pydap +$conda remove -y pydap # forcibly remove packages to avoid artifacts $conda remove -y --force \ numpy \ @@ -37,8 +37,7 @@ python -m pip install \ numpy \ scipy \ matplotlib \ - pandas \ - h5py + pandas # for some reason pandas depends on pyarrow already. # Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge` python -m pip install \ diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index abf6a88690a..119db282ad9 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas - pint>=0.22 diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 116eee7f702..cbbbaa16d7a 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,7 +21,7 @@ dependencies: - nbsphinx - netcdf4>=1.5 - numba - - numpy>=1.21,<2 + - numpy>=2 - packaging>=21.3 - pandas>=1.4,!=2.1.0 - pooch diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 2eedc9b0621..896e390ea3e 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<2 + - numpy - packaging - pandas # - pint>=0.22 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 317e1fe5f41..ef02a3e7f23 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy<2 + - numpy - opt_einsum - packaging - pandas diff --git a/doc/api.rst b/doc/api.rst index a8f8ea7dd1c..4cf8f374d37 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -111,6 +111,7 @@ Dataset contents Dataset.drop_duplicates Dataset.drop_dims Dataset.drop_encoding + Dataset.drop_attrs Dataset.set_coords Dataset.reset_coords Dataset.convert_calendar @@ -306,6 +307,7 @@ DataArray contents DataArray.drop_indexes DataArray.drop_duplicates DataArray.drop_encoding + DataArray.drop_attrs DataArray.reset_coords DataArray.copy DataArray.convert_calendar diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst index 7f99fa77e3a..58d5448cdf5 100644 --- a/doc/getting-started-guide/faq.rst +++ b/doc/getting-started-guide/faq.rst @@ -352,9 +352,9 @@ Some packages may have additional functionality beyond what is shown here. You c How does xarray handle missing values? -------------------------------------- -**xarray can handle missing values using ``np.NaN``** +**xarray can handle missing values using ``np.nan``** -- ``np.NaN`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.NaN`` is a constant value in NumPy that represents "Not a Number" or missing values. +- ``np.nan`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.nan`` is a constant value in NumPy that represents "Not a Number" or missing values. - Most of xarray's computation methods are designed to automatically handle missing values appropriately. diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index f99d41be538..23ecbedf61e 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -426,7 +426,7 @@ However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) + data = xr.DataArray([np.nan, 2, 4]) weights = xr.DataArray([8, 1, 1]) data.weighted(weights).mean() @@ -444,7 +444,7 @@ If the weights add up to to 0, ``sum`` returns 0: data.weighted(weights).sum() -and ``mean``, ``std`` and ``var`` return ``NaN``: +and ``mean``, ``std`` and ``var`` return ``nan``: .. ipython:: python diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 311e1bf0129..53d8a52b342 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -292,8 +292,8 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.sizes["lon"] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.sizes["lat"] * 4) + new_lon = np.linspace(ds.lon[0].item(), ds.lon[-1].item(), ds.sizes["lon"] * 4) + new_lat = np.linspace(ds.lat[0].item(), ds.lat[-1].item(), ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 13279eccb0b..d82d9d7d7d9 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -239,9 +239,10 @@ If the array type you want to generate has an array API-compliant top-level name you can use this neat trick: .. ipython:: python - :okwarning: - from numpy import array_api as xp # available in numpy 1.26.0 + import numpy as xp # compatible in numpy 2.0 + + # use `import numpy.array_api as xp` in numpy>=1.23,<2.0 from hypothesis.extra.array_api import make_strategies_namespace diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8c6b3a099c2..6a8e898c93c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,10 @@ New Features By `Martin Raspaud `_. - Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`). By `Justus Magin `_. +- Add :py:meth:`DataArray.drop_attrs` & :py:meth:`Dataset.drop_attrs` methods, + to return an object without ``attrs``. A ``deep`` parameter controls whether + variables' ``attrs`` are also dropped. + By `Maximilian Roos `_. (:pull:`8288`) Breaking changes ~~~~~~~~~~~~~~~~ @@ -37,14 +41,24 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix scatter plot broadcasting unneccesarily. (:issue:`9129`, :pull:`9206`) + By `Jimmy Westling `_. - Don't convert custom indexes to ``pandas`` indexes when computing a diff (:pull:`9157`) By `Justus Magin `_. - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. - Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`). By `Justus Magin `_. +- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`). + By `Justus Magin `_ and `Kai Mühlbauer `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. +- Address regression introduced in :pull:`9002` that prevented objects returned + by py:meth:`DataArray.convert_calendar` to be indexed by a time index in + certain circumstances (:issue:`9138`, :pull:`9192`). By `Mark Harfouche + `_ and `Spencer Clark + `. + - Fiy static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`). By `Michael Niklas `_. - Dark themes are now properly detected for ``html[data-theme=dark]``-tags (:pull:`9200`). @@ -59,7 +73,7 @@ Documentation - Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 `_). By `Jessica Scheick `_. - Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`) - By `Maximilian Roos `_ + By `Maximilian Roos `_. Internal Changes diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index c4fe9e1f4ae..6f492e78bf9 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -5,7 +5,10 @@ from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex -from xarray.coding.times import _should_cftime_be_used, convert_times +from xarray.coding.times import ( + _should_cftime_be_used, + convert_times, +) from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like try: @@ -222,6 +225,13 @@ def convert_calendar( # Remove NaN that where put on invalid dates in target calendar out = out.where(out[dim].notnull(), drop=True) + if use_cftime: + # Reassign times to ensure time index of output is a CFTimeIndex + # (previously it was an Index due to the presence of NaN values). + # Note this is not needed in the case that the output time index is + # a DatetimeIndex, since DatetimeIndexes can handle NaN values. + out[dim] = CFTimeIndex(out[dim].data) + if missing is not None: time_target = date_range_like(time, calendar=calendar, use_cftime=use_cftime) out = out.reindex({dim: time_target}, fill_value=missing) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 0804956fb57..7be26f95697 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -22,7 +22,7 @@ ) from xarray.core import indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import asarray +from xarray.core.duck_array_ops import asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.utils import emit_user_level_warning @@ -318,7 +318,7 @@ def decode_cf_datetime( cftime.num2date """ num_dates = np.asarray(num_dates) - flat_num_dates = num_dates.ravel() + flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" @@ -351,7 +351,7 @@ def decode_cf_datetime( else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - return dates.reshape(num_dates.shape) + return reshape(dates, num_dates.shape) def to_timedelta_unboxed(value, **kwargs): @@ -372,8 +372,8 @@ def decode_cf_timedelta(num_timedeltas, units: str) -> np.ndarray: """ num_timedeltas = np.asarray(num_timedeltas) units = _netcdf_to_numpy_timeunit(units) - result = to_timedelta_unboxed(num_timedeltas.ravel(), unit=units) - return result.reshape(num_timedeltas.shape) + result = to_timedelta_unboxed(ravel(num_timedeltas), unit=units) + return reshape(result, num_timedeltas.shape) def _unit_timedelta_cftime(units: str) -> timedelta: @@ -431,7 +431,7 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = np.asarray(dates).ravel() + dates = ravel(np.asarray(dates)) if np.asarray(dates).dtype == "datetime64[ns]": dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] @@ -459,7 +459,7 @@ def infer_timedelta_units(deltas) -> str: {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly divide all unique time deltas in `deltas`) """ - deltas = to_timedelta_unboxed(np.asarray(deltas).ravel()) + deltas = to_timedelta_unboxed(ravel(np.asarray(deltas))) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) return _infer_time_units_from_diff(unique_timedeltas) @@ -647,7 +647,7 @@ def encode_datetime(d): except TypeError: return np.nan if d is None else cftime.date2num(d, units, calendar) - return np.array([encode_datetime(d) for d in dates.ravel()]).reshape(dates.shape) + return reshape(np.array([encode_datetime(d) for d in ravel(dates)]), dates.shape) def cast_to_int_if_safe(num) -> np.ndarray: @@ -757,7 +757,7 @@ def _eagerly_encode_cf_datetime( # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from # dates to be encoded (GH 2272). - dates_as_index = pd.DatetimeIndex(dates.ravel()) + dates_as_index = pd.DatetimeIndex(ravel(dates)) time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 @@ -795,7 +795,7 @@ def _eagerly_encode_cf_datetime( floor_division = True num = _division(time_deltas, time_delta, floor_division) - num = num.values.reshape(dates.shape) + num = reshape(num.values, dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): num = _encode_datetime_with_cftime(dates, units, calendar) @@ -883,7 +883,7 @@ def _eagerly_encode_cf_timedelta( units = data_units time_delta = _time_units_to_timedelta64(units) - time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) + time_deltas = pd.TimedeltaIndex(ravel(timedeltas)) # retrieve needed units to faithfully encode to int64 needed_units = data_units @@ -915,7 +915,7 @@ def _eagerly_encode_cf_timedelta( floor_division = True num = _division(time_deltas, time_delta, floor_division) - num = num.values.reshape(timedeltas.shape) + num = reshape(num.values, timedeltas.shape) if dtype is not None: num = _cast_to_dtype_if_safe(num, dtype) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..d19f285d2b9 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -516,10 +516,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) pop_to(encoding, attrs, "_Unsigned") - signed_dtype = np.dtype(f"i{data.dtype.itemsize}") + # we need the on-disk type here + # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available + signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}")) if "_FillValue" in attrs: - new_fill = signed_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + new_fill = np.array(attrs["_FillValue"]) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(signed_dtype).item() data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype) return Variable(dims, data, attrs, encoding, fastpath=True) @@ -535,10 +538,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: if unsigned == "true": unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}") transform = partial(np.asarray, dtype=unsigned_dtype) - data = lazy_elemwise_func(data, transform, unsigned_dtype) if "_FillValue" in attrs: - new_fill = unsigned_dtype.type(attrs["_FillValue"]) - attrs["_FillValue"] = new_fill + new_fill = np.array(attrs["_FillValue"], dtype=data.dtype) + # use view here to prevent OverflowError + attrs["_FillValue"] = new_fill.view(unsigned_dtype).item() + data = lazy_elemwise_func(data, transform, unsigned_dtype) elif data.dtype.kind == "u": if unsigned == "false": signed_dtype = np.dtype(f"i{data.dtype.itemsize}") diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f418d3821c2..5d21d0836b9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1064,7 +1064,7 @@ def apply_ufunc( supported: >>> magnitude(3, 4) - 5.0 + np.float64(5.0) >>> magnitude(3, np.array([0, 4])) array([3., 5.]) >>> magnitude(array, 0) @@ -1587,15 +1587,6 @@ def cross( array([-3, 6, -3]) Dimensions without coordinates: dim_0 - Vector cross-product with 2 dimensions, returns in the perpendicular - direction: - - >>> a = xr.DataArray([1, 2]) - >>> b = xr.DataArray([4, 5]) - >>> xr.cross(a, b, dim="dim_0") - Size: 8B - array(-3) - Vector cross-product with 3 dimensions but zeros at the last axis yields the same results as with 2 dimensions: @@ -1606,42 +1597,6 @@ def cross( array([ 0, 0, -3]) Dimensions without coordinates: dim_0 - One vector with dimension 2: - - >>> a = xr.DataArray( - ... [1, 2], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y"])), - ... ) - >>> b = xr.DataArray( - ... [4, 5, 6], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])), - ... ) - >>> xr.cross(a, b, dim="cartesian") - Size: 24B - array([12, -6, -3]) - Coordinates: - * cartesian (cartesian) >> a = xr.DataArray( - ... [1, 2], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "z"])), - ... ) - >>> b = xr.DataArray( - ... [4, 5, 6], - ... dims=["cartesian"], - ... coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])), - ... ) - >>> xr.cross(a, b, dim="cartesian") - Size: 24B - array([-10, 2, 5]) - Coordinates: - * cartesian (cartesian) Self: + """ + Removes all attributes from the DataArray. + + Parameters + ---------- + deep : bool, default True + Removes attributes from coordinates. + + Returns + ------- + DataArray + """ + return ( + self._to_temp_dataset().drop_attrs(deep=deep).pipe(self._from_temp_dataset) + ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 62864807519..1793abf02d8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -10687,3 +10687,45 @@ def resample( restore_coord_dims=restore_coord_dims, **indexer_kwargs, ) + + def drop_attrs(self, *, deep: bool = True) -> Self: + """ + Removes all attributes from the Dataset and its variables. + + Parameters + ---------- + deep : bool, default True + Removes attributes from all variables. + + Returns + ------- + Dataset + """ + # Remove attributes from the dataset + self = self._replace(attrs={}) + + if not deep: + return self + + # Remove attributes from each variable in the dataset + for var in self.variables: + # variables don't have a `._replace` method, so we copy and then remove + # attrs. If we added a `._replace` method, we could use that instead. + if var not in self.indexes: + self[var] = self[var].copy() + self[var].attrs = {} + + new_idx_variables = {} + # Not sure this is the most elegant way of doing this, but it works. + # (Should we have a more general "map over all variables, including + # indexes" approach?) + for idx, idx_vars in self.xindexes.group_by_index(): + # copy each coordinate variable of an index and drop their attrs + temp_idx_variables = {k: v.copy() for k, v in idx_vars.items()} + for v in temp_idx_variables.values(): + v.attrs = {} + # re-wrap the index object in new coordinate variables + new_idx_variables.update(idx.create_variables(temp_idx_variables)) + self = self.assign(new_idx_variables) + + return self diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index edc2bf43629..96b59f6174e 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -721,8 +721,8 @@ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataAr """Create a temporary datarray with extra coords.""" from xarray.core.dataarray import DataArray - # Base coords: - coords = dict(ds.coords) + coords = dict(ds[y].coords) + dims = set(ds[y].dims) # Add extra coords to the DataArray from valid kwargs, if using all # kwargs there is a risk that we add unnecessary dataarrays as @@ -732,12 +732,17 @@ def _temp_dataarray(ds: Dataset, y: Hashable, locals_: dict[str, Any]) -> DataAr coord_kwargs = locals_.keys() & valid_coord_kwargs for k in coord_kwargs: key = locals_[k] - if ds.data_vars.get(key) is not None: - coords[key] = ds[key] + darray = ds.get(key) + if darray is not None: + coords[key] = darray + dims.update(darray.dims) + + # Trim dataset from unneccessary dims: + ds_trimmed = ds.drop_dims(ds.sizes.keys() - dims) # TODO: Use ds.dims in the future # The dataarray has to include all the dims. Broadcast to that shape # and add the additional coords: - _y = ds[y].broadcast_like(ds) + _y = ds[y].broadcast_like(ds_trimmed) return DataArray(_y, coords=coords) diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index faf809a8a74..613362ed5d5 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -774,7 +774,7 @@ def _get_largest_lims(self) -> dict[str, tuple[float, float]]: >>> ds = xr.tutorial.scatter_example_dataset(seed=42) >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> round(fg._get_largest_lims()["x"][0], 3) - -0.334 + np.float64(-0.334) """ lims_largest: dict[str, tuple[float, float]] = dict( x=(np.inf, -np.inf), y=(np.inf, -np.inf), z=(np.inf, -np.inf) @@ -817,7 +817,7 @@ def _set_lims( >>> fg = ds.plot.scatter(x="A", y="B", hue="y", row="x", col="w") >>> fg._set_lims(x=(-0.3, 0.3), y=(0, 2), z=(0, 4)) >>> fg.axs[0, 0].get_xlim(), fg.axs[0, 0].get_ylim() - ((-0.3, 0.3), (0.0, 2.0)) + ((np.float64(-0.3), np.float64(0.3)), (np.float64(0.0), np.float64(2.0))) """ lims_largest = self._get_largest_lims() diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 8789bc2f9c2..a0abe0c8cfd 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -811,11 +811,11 @@ def _update_axes( def _is_monotonic(coord, axis=0): """ >>> _is_monotonic(np.array([0, 1, 2])) - True + np.True_ >>> _is_monotonic(np.array([2, 1, 0])) - True + np.True_ >>> _is_monotonic(np.array([0, 2, 1])) - False + np.False_ """ if coord.shape[axis] < 3: return True diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ec3885bb078..152a9ec40e9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset: def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset: encoding = { - "_FillValue": 255, + "_FillValue": np.int8(-1), "_Unsigned": "true", "dtype": "i1", "add_offset": dtype.type(10), @@ -925,6 +925,35 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None: assert decoded.variables[k].dtype == actual.variables[k].dtype assert_allclose(decoded, actual, decode_bytes=False) + @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)]) + def test_roundtrip_unsigned(self, fillvalue): + # regression/numpy2 test for + encoding = { + "_FillValue": fillvalue, + "_Unsigned": "true", + "dtype": "i1", + } + x = np.array([0, 1, 127, 128, 254, np.nan], dtype=np.float32) + decoded = Dataset({"x": ("t", x, {}, encoding)}) + + attributes = { + "_FillValue": fillvalue, + "_Unsigned": "true", + } + # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned + sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1") + encoded = Dataset({"x": ("t", sb, attributes)}) + + with self.roundtrip(decoded) as actual: + for k in decoded.variables: + assert decoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(decoded, actual, decode_bytes=False) + + with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual: + for k in encoded.variables: + assert encoded.variables[k].dtype == actual.variables[k].dtype + assert_allclose(encoded, actual, decode_bytes=False) + @staticmethod def _create_cf_dataset(): original = Dataset( @@ -4285,7 +4314,7 @@ def test_roundtrip_coordinates_with_space(self) -> None: def test_roundtrip_numpy_datetime_data(self) -> None: # Override method in DatasetIOBase - remove not applicable # save_kwargs - times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) + times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns") expected = Dataset({"t": ("t", times), "t0": times[0]}) with self.roundtrip(expected) as actual: assert_identical(expected, actual) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 7d229371808..13e9f7a1030 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -1,9 +1,10 @@ from __future__ import annotations import numpy as np +import pandas as pd import pytest -from xarray import DataArray, infer_freq +from xarray import CFTimeIndex, DataArray, infer_freq from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftime_offsets import date_range from xarray.testing import assert_identical @@ -286,3 +287,25 @@ def test_interp_calendar_errors(): ValueError, match="Both 'source.x' and 'target' must contain datetime objects." ): interp_calendar(da1, da2, dim="x") + + +@requires_cftime +@pytest.mark.parametrize( + ("source_calendar", "target_calendar", "expected_index"), + [("standard", "noleap", CFTimeIndex), ("all_leap", "standard", pd.DatetimeIndex)], +) +def test_convert_calendar_produces_time_index( + source_calendar, target_calendar, expected_index +): + # https://github.com/pydata/xarray/issues/9138 + time = date_range("2000-01-01", "2002-01-01", freq="D", calendar=source_calendar) + temperature = np.ones(len(time)) + da = DataArray( + data=temperature, + dims=["time"], + coords=dict( + time=time, + ), + ) + converted = da.convert_calendar(target_calendar) + assert isinstance(converted.indexes["time"], expected_index) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 37e63f8d4c2..ef478af8786 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -15,14 +15,15 @@ Dataset, Variable, cftime_range, - coding, conventions, date_range, decode_cf, ) +from xarray.coding.times import _STANDARD_CALENDARS as _STANDARD_CALENDARS_UNSORTED from xarray.coding.times import ( CFDatetimeCoder, _encode_datetime_with_cftime, + _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, _should_cftime_be_used, cftime_to_nptime, @@ -44,6 +45,8 @@ FirstElementAccessibleArray, arm_xfail, assert_array_equal, + assert_duckarray_allclose, + assert_duckarray_equal, assert_no_warnings, has_cftime, requires_cftime, @@ -58,11 +61,9 @@ "all_leap", "366_day", } -_ALL_CALENDARS = sorted( - _NON_STANDARD_CALENDARS_SET.union(coding.times._STANDARD_CALENDARS) -) +_STANDARD_CALENDARS = sorted(_STANDARD_CALENDARS_UNSORTED) +_ALL_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET.union(_STANDARD_CALENDARS)) _NON_STANDARD_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET) -_STANDARD_CALENDARS = sorted(coding.times._STANDARD_CALENDARS) _CF_DATETIME_NUM_DATES_UNITS = [ (np.arange(10), "days since 2000-01-01"), (np.arange(10).astype("float64"), "days since 2000-01-01"), @@ -895,10 +896,10 @@ def test_time_units_with_timezone_roundtrip(calendar) -> None: ) if calendar in _STANDARD_CALENDARS: - np.testing.assert_array_equal(result_num_dates, expected_num_dates) + assert_duckarray_equal(result_num_dates, expected_num_dates) else: # cftime datetime arithmetic is not quite exact. - np.testing.assert_allclose(result_num_dates, expected_num_dates) + assert_duckarray_allclose(result_num_dates, expected_num_dates) assert result_units == expected_units assert result_calendar == calendar @@ -1005,7 +1006,7 @@ def test_decode_ambiguous_time_warns(calendar) -> None: # we don't decode non-standard calendards with # pandas so expect no warning will be emitted - is_standard_calendar = calendar in coding.times._STANDARD_CALENDARS + is_standard_calendar = calendar in _STANDARD_CALENDARS dates = [1, 2, 3] units = "days since 1-1-1" @@ -1042,7 +1043,7 @@ def test_encode_cf_datetime_defaults_to_correct_dtype( units = f"{encoding_units} since 2000-01-01" encoded, _units, _ = encode_cf_datetime(times, units) - numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units) + numpy_timeunit = _netcdf_to_numpy_timeunit(encoding_units) encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit) if pd.to_timedelta(1, freq) >= encoding_units_as_timedelta: assert encoded.dtype == np.int64 diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1401be7b9b5..b689bb8c02d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2983,6 +2983,11 @@ def test_assign_attrs(self) -> None: assert_identical(new_actual, expected) assert actual.attrs == {"a": 1, "b": 2} + def test_drop_attrs(self) -> None: + # Mostly tested in test_dataset.py, but adding a very small test here + da = DataArray([], attrs=dict(a=1, b=2)) + assert da.drop_attrs().attrs == {} + @pytest.mark.parametrize( "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs] ) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 52de0a265c3..1ba45616ab0 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4454,6 +4454,54 @@ def test_assign_attrs(self) -> None: assert_identical(new_actual, expected) assert actual.attrs == dict(a=1, b=2) + def test_drop_attrs(self) -> None: + # Simple example + ds = Dataset().assign_attrs(a=1, b=2) + original = ds.copy() + expected = Dataset() + result = ds.drop_attrs() + assert_identical(result, expected) + + # Doesn't change original + assert_identical(ds, original) + + # Example with variables and coords with attrs, and a multiindex. (arguably + # should have used a canonical dataset with all the features we're should + # support...) + var = Variable("x", [1, 2, 3], attrs=dict(x=1, y=2)) + idx = IndexVariable("y", [1, 2, 3], attrs=dict(c=1, d=2)) + mx = xr.Coordinates.from_pandas_multiindex( + pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["d", "e"]), "z" + ) + ds = Dataset(dict(var1=var), coords=dict(y=idx, z=mx)).assign_attrs(a=1, b=2) + assert ds.attrs != {} + assert ds["var1"].attrs != {} + assert ds["y"].attrs != {} + assert ds.coords["y"].attrs != {} + + original = ds.copy(deep=True) + result = ds.drop_attrs() + + assert result.attrs == {} + assert result["var1"].attrs == {} + assert result["y"].attrs == {} + assert list(result.data_vars) == list(ds.data_vars) + assert list(result.coords) == list(ds.coords) + + # Doesn't change original + assert_identical(ds, original) + # Specifically test that the attrs on the coords are still there. (The index + # can't currently contain `attrs`, so we can't test those.) + assert ds.coords["y"].attrs != {} + + # Test for deep=False + result_shallow = ds.drop_attrs(deep=False) + assert result_shallow.attrs == {} + assert result_shallow["var1"].attrs != {} + assert result_shallow["y"].attrs != {} + assert list(result.data_vars) == list(ds.data_vars) + assert list(result.coords) == list(ds.coords) + def test_assign_multiindex_level(self) -> None: data = create_test_multiindex() with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "): diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index a0bd6b939d5..92cd620c819 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -3418,3 +3418,43 @@ def test_9155() -> None: data = xr.DataArray([1, 2, 3], dims=["x"]) fig, ax = plt.subplots(ncols=1, nrows=1) data.plot(ax=ax) + + +@requires_matplotlib +def test_temp_dataarray() -> None: + from xarray.plot.dataset_plot import _temp_dataarray + + x = np.arange(1, 4) + y = np.arange(4, 6) + var1 = np.arange(x.size * y.size).reshape((x.size, y.size)) + var2 = np.arange(x.size * y.size).reshape((x.size, y.size)) + ds = xr.Dataset( + { + "var1": (["x", "y"], var1), + "var2": (["x", "y"], 2 * var2), + "var3": (["x"], 3 * x), + }, + coords={ + "x": x, + "y": y, + "model": np.arange(7), + }, + ) + + # No broadcasting: + y_ = "var1" + locals_ = {"x": "var2"} + da = _temp_dataarray(ds, y_, locals_) + assert da.shape == (3, 2) + + # Broadcast from 1 to 2dim: + y_ = "var3" + locals_ = {"x": "var1"} + da = _temp_dataarray(ds, y_, locals_) + assert da.shape == (3, 2) + + # Ignore non-valid coord kwargs: + y_ = "var3" + locals_ = dict(x="x", extend="var2") + da = _temp_dataarray(ds, y_, locals_) + assert da.shape == (3,)