From b7e6036555e54ffdaa685ba3cc6a94cd9664c4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 13 Dec 2024 16:06:45 +0100 Subject: [PATCH 1/3] finalize deprecation of "closed"-parameter (#9882) * finalize deprecation of "closed" to "inclusive" in date_range and cftime_range * add whats-new.rst entry * fix tests * fix test * remove stale function --- doc/whats-new.rst | 4 +- xarray/coding/cftime_offsets.py | 73 ++--------------------------- xarray/tests/test_cftime_offsets.py | 48 ------------------- 3 files changed, 8 insertions(+), 117 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a08246182c..08e6218ca14 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,7 +37,9 @@ Breaking changes Deprecations ~~~~~~~~~~~~ - +- Finalize deprecation of ``closed`` parameters of :py:func:`cftime_range` and + :py:func:`date_range` (:pull:`9882`). + By `Kai Mühlbauer `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a994eb9661f..89c06e56ea7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -62,15 +62,13 @@ ) from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like from xarray.core.pdcompat import ( - NoDefault, count_not_none, nanosecond_precision_timestamp, - no_default, ) from xarray.core.utils import attempt_import, emit_user_level_warning if TYPE_CHECKING: - from xarray.core.types import InclusiveOptions, Self, SideOptions, TypeAlias + from xarray.core.types import InclusiveOptions, Self, TypeAlias DayOption: TypeAlias = Literal["start", "end"] @@ -943,42 +941,6 @@ def _generate_range(start, end, periods, offset): current = next_date -def _translate_closed_to_inclusive(closed): - """Follows code added in pandas #43504.""" - emit_user_level_warning( - "Following pandas, the `closed` parameter is deprecated in " - "favor of the `inclusive` parameter, and will be removed in " - "a future version of xarray.", - FutureWarning, - ) - if closed is None: - inclusive = "both" - elif closed in ("left", "right"): - inclusive = closed - else: - raise ValueError( - f"Argument `closed` must be either 'left', 'right', or None. " - f"Got {closed!r}." - ) - return inclusive - - -def _infer_inclusive( - closed: NoDefault | SideOptions, inclusive: InclusiveOptions | None -) -> InclusiveOptions: - """Follows code added in pandas #43504.""" - if closed is not no_default and inclusive is not None: - raise ValueError( - "Following pandas, deprecated argument `closed` cannot be " - "passed if argument `inclusive` is not None." - ) - if closed is not no_default: - return _translate_closed_to_inclusive(closed) - if inclusive is None: - return "both" - return inclusive - - def cftime_range( start=None, end=None, @@ -986,8 +948,7 @@ def cftime_range( freq=None, normalize=False, name=None, - closed: NoDefault | SideOptions = no_default, - inclusive: None | InclusiveOptions = None, + inclusive: InclusiveOptions = "both", calendar="standard", ) -> CFTimeIndex: """Return a fixed frequency CFTimeIndex. @@ -1006,16 +967,7 @@ def cftime_range( Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index - closed : {None, "left", "right"}, default: "NO_DEFAULT" - Make the interval closed with respect to the given frequency to the - "left", "right", or both sides (None). - - .. deprecated:: 2023.02.0 - Following pandas, the ``closed`` parameter is deprecated in favor - of the ``inclusive`` parameter, and will be removed in a future - version of xarray. - - inclusive : {None, "both", "neither", "left", "right"}, default None + inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 @@ -1193,8 +1145,6 @@ def cftime_range( offset = to_offset(freq) dates = np.array(list(_generate_range(start, end, periods, offset))) - inclusive = _infer_inclusive(closed, inclusive) - if inclusive == "neither": left_closed = False right_closed = False @@ -1229,8 +1179,7 @@ def date_range( tz=None, normalize=False, name=None, - closed: NoDefault | SideOptions = no_default, - inclusive: None | InclusiveOptions = None, + inclusive: InclusiveOptions = "both", calendar="standard", use_cftime=None, ): @@ -1257,20 +1206,10 @@ def date_range( Normalize start/end dates to midnight before generating date range. name : str, default: None Name of the resulting index - closed : {None, "left", "right"}, default: "NO_DEFAULT" - Make the interval closed with respect to the given frequency to the - "left", "right", or both sides (None). - - .. deprecated:: 2023.02.0 - Following pandas, the `closed` parameter is deprecated in favor - of the `inclusive` parameter, and will be removed in a future - version of xarray. - - inclusive : {None, "both", "neither", "left", "right"}, default: None + inclusive : {"both", "neither", "left", "right"}, default: "both" Include boundaries; whether to set each bound as closed or open. .. versionadded:: 2023.02.0 - calendar : str, default: "standard" Calendar type for the datetimes. use_cftime : boolean, optional @@ -1294,8 +1233,6 @@ def date_range( if tz is not None: use_cftime = False - inclusive = _infer_inclusive(closed, inclusive) - if _is_standard_calendar(calendar) and use_cftime is not True: try: return pd.date_range( diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index f6f97108c1d..1ab6c611aac 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1057,15 +1057,6 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg False, [(1, 1, 2), (1, 1, 3)], ), - ( - "0001-01-01", - "0001-01-04", - None, - "D", - None, - False, - [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)], - ), ( "0001-01-01", "0001-01-04", @@ -1294,13 +1285,6 @@ def test_invalid_cftime_range_inputs( cftime_range(start, end, periods, freq, inclusive=inclusive) # type: ignore[arg-type] -def test_invalid_cftime_arg() -> None: - with pytest.warns( - FutureWarning, match="Following pandas, the `closed` parameter is deprecated" - ): - cftime_range("2000", "2001", None, "YE", closed="left") - - _CALENDAR_SPECIFIC_MONTH_END_TESTS = [ ("noleap", [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), ("all_leap", [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), @@ -1534,15 +1518,6 @@ def as_timedelta_not_implemented_error(): tick.as_timedelta() -@pytest.mark.parametrize("function", [cftime_range, date_range]) -def test_cftime_or_date_range_closed_and_inclusive_error(function: Callable) -> None: - if function == cftime_range and not has_cftime: - pytest.skip("requires cftime") - - with pytest.raises(ValueError, match="Following pandas, deprecated"): - function("2000", periods=3, closed=None, inclusive="right") - - @pytest.mark.parametrize("function", [cftime_range, date_range]) def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> None: if function == cftime_range and not has_cftime: @@ -1552,29 +1527,6 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non function("2000", periods=3, inclusive="foo") -@pytest.mark.parametrize( - "function", - [ - pytest.param(cftime_range, id="cftime", marks=requires_cftime), - pytest.param(date_range, id="date"), - ], -) -@pytest.mark.parametrize( - ("closed", "inclusive"), [(None, "both"), ("left", "left"), ("right", "right")] -) -def test_cftime_or_date_range_closed( - function: Callable, - closed: Literal["left", "right", None], - inclusive: Literal["left", "right", "both"], -) -> None: - with pytest.warns(FutureWarning, match="Following pandas"): - result_closed = function("2000-01-01", "2000-01-04", freq="D", closed=closed) - result_inclusive = function( - "2000-01-01", "2000-01-04", freq="D", inclusive=inclusive - ) - np.testing.assert_equal(result_closed.values, result_inclusive.values) - - @pytest.mark.parametrize("function", [cftime_range, date_range]) def test_cftime_or_date_range_inclusive_None(function) -> None: if function == cftime_range and not has_cftime: From f05c5ec799f144b8cc3b9355a702814be7285d8f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Dec 2024 08:59:54 -0700 Subject: [PATCH 2/3] Fix upstream Zarr compatibility (#9884) Closes #9880 --- xarray/backends/zarr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cb3ab375c31..d7f056a209a 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1135,9 +1135,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No else: encoded_attrs[DIMENSION_KEY] = dims - encoding["exists_ok" if _zarr_v3() else "overwrite"] = ( - True if self._mode == "w" else False - ) + encoding["overwrite"] = True if self._mode == "w" else False zarr_array = self._create_new_array( name=name, From 755581c84dc2ad5435f0a9798e48115f80015f2d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 14 Dec 2024 01:33:10 -0700 Subject: [PATCH 3/3] Fix interpolation when non-numeric coords are present. (#9887) * Fix interpolation when non-numeric coords are present. Closes #8099 Closes #9839 * fix * Add basic 1d test --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/core/dataset.py | 11 ++++++----- xarray/tests/test_interp.py | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 08e6218ca14..cbc59a708aa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ Bug fixes By `Bruce Merry `_. - Fix unintended load on datasets when calling :py:meth:`DataArray.plot.scatter` (:pull:`9818`). By `Jimmy Westling `_. +- Fix interpolation when non-numeric coordinate variables are present (:issue:`8099`, :issue:`9839`). + By `Deepak Cherian `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea17a69f827..d4a23ac275a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4185,7 +4185,7 @@ def _validate_interp_indexer(x, new_x): } variables: dict[Hashable, Variable] = {} - reindex: bool = False + reindex_vars: list[Hashable] = [] for name, var in obj._variables.items(): if name in indexers: continue @@ -4207,19 +4207,20 @@ def _validate_interp_indexer(x, new_x): # booleans and objects and retains the dtype but inside # this loop there might be some duplicate code that slows it # down, therefore collect these signals and run it later: - reindex = True + reindex_vars.append(name) elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being # interpolated along: variables[name] = var - if reindex: - reindex_indexers = { + if reindex_vars and ( + reindex_indexers := { k: v for k, (_, v) in validated_indexers.items() if v.dims == (k,) } + ): reindexed = alignment.reindex( - obj, + obj[reindex_vars], indexers=reindex_indexers, method=method_non_numeric, exclude_vars=variables.keys(), diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index d602cb96a6a..86532a26f65 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1055,3 +1055,41 @@ def test_interp1d_complex_out_of_bounds() -> None: expected = da.interp(time=3.5, kwargs=dict(fill_value=np.nan + np.nan * 1j)) actual = da.interp(time=3.5) assert_identical(actual, expected) + + +@requires_scipy +def test_interp_non_numeric_1d() -> None: + ds = xr.Dataset( + { + "numeric": ("time", 1 + np.arange(0, 4, 1)), + "non_numeric": ("time", np.array(["a", "b", "c", "d"])), + }, + coords={"time": (np.arange(0, 4, 1))}, + ) + actual = ds.interp(time=np.linspace(0, 3, 7)) + + expected = xr.Dataset( + { + "numeric": ("time", 1 + np.linspace(0, 3, 7)), + "non_numeric": ("time", np.array(["a", "b", "b", "c", "c", "d", "d"])), + }, + coords={"time": np.linspace(0, 3, 7)}, + ) + xr.testing.assert_identical(actual, expected) + + +@requires_scipy +def test_interp_non_numeric_nd() -> None: + # regression test for GH8099, GH9839 + ds = xr.Dataset({"x": ("a", np.arange(4))}, coords={"a": (np.arange(4) - 1.5)}) + t = xr.DataArray( + np.random.randn(6).reshape((2, 3)) * 0.5, + dims=["r", "s"], + coords={"r": np.arange(2) - 0.5, "s": np.arange(3) - 1}, + ) + ds["m"] = ds.x > 1 + + actual = ds.interp(a=t, method="linear") + # with numeric only + expected = ds[["x"]].interp(a=t, method="linear") + assert_identical(actual[["x"]], expected)