diff --git a/CHANGES.rst b/CHANGES.rst index b12de9efa..09549095d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.48 (2024-02-19) ------------------ -Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`). +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`), Gabriel Rondeau-Genesse (:user:`RondeauG`). Announcements ^^^^^^^^^^^^^ @@ -25,6 +25,8 @@ New features and enhancements * Added option ``never_reached`` to ``degree_days_exceedance_date`` to assign a custom value when the sum threshold is never reached. (:issue:`1459`, :pull:`1647`). * Added option ``min_members`` to ensemble statistics to mask elements when the number of valid members is under a threshold. (:issue:`1459`, :pull:`1647`). * Distribution instances can now be passed to the ``dist`` argument of most statistical indices. (:pull:`1644`). +* Added a new ``xclim.indices.generic.select_rolling_resample_op`` function to allow for computing rolling statistics. (:issue:`1480`, :pull:`1643`). +* Add the possibility to use a group with a window in ``xc.sdba.processing.reordering``. (:pull:`1566`). Breaking changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_generic.py b/tests/test_generic.py index c471ccab9..7ec0771cd 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -31,6 +31,47 @@ def test_season(self, q_series): assert o[0] == 31 + 29 +class TestSelectRollingResampleOp: + def test_rollingmax(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "max", window=14, window_center=False, window_op="mean" + ) + np.testing.assert_array_equal( + [ + np.mean(np.arange(353, 366 + 1)), + np.mean(np.arange(353 + 365, 366 + 365 + 1)), + np.mean(np.arange(353 + 365 * 2, 366 + 365 * 2 + 1)), + ], + o.values, + ) + assert o.attrs["units"] == "m3 s-1" + + def test_rollingmaxindexer(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "min", window=14, window_center=False, window_op="max", season="DJF" + ) + np.testing.assert_array_equal( + [14, 367, 367 + 365], o.values + ) # 14th day for 1st year, then Jan 1st for the next two + assert o.attrs["units"] == "m3 s-1" + + def test_freq(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "max", window=3, window_center=True, window_op="integral", freq="MS" + ) + np.testing.assert_array_equal( + [ + np.sum([30, 31, 32]) * 86400, + np.sum([30 + 29, 31 + 29, 32 + 29]) * 86400, + ], # m3 s-1 being summed by the frequency of the data + o.isel(time=slice(0, 2)).values, + ) + assert o.attrs["units"] == "m3" + + class TestThresholdCount: def test_simple(self, tas_series): ts = tas_series(np.arange(365)) diff --git a/tests/test_sdba/test_processing.py b/tests/test_sdba/test_processing.py index 294ef980c..0dca69583 100644 --- a/tests/test_sdba/test_processing.py +++ b/tests/test_sdba/test_processing.py @@ -5,6 +5,7 @@ import pytest import xarray as xr +from xclim.core.calendar import date_range from xclim.core.units import units from xclim.sdba.adjustment import EmpiricalQuantileMapping from xclim.sdba.base import Grouper @@ -185,6 +186,31 @@ def test_reordering(): assert out.attrs == y.attrs +def test_reordering_with_window(): + time = list( + date_range("2000-01-01", "2000-01-04", freq="D", calendar="noleap") + ) + list(date_range("2001-01-01", "2001-01-04", freq="D", calendar="noleap")) + + x = xr.DataArray( + np.arange(1, 9, 1), + dims=("time"), + coords={"time": time}, + ) + + y = xr.DataArray( + np.arange(8, 0, -1), + dims=("time"), + coords={"time": time}, + ) + + group = Grouper(group="time.dayofyear", window=3) + out = reordering(x, y, group=group) + + np.testing.assert_array_equal(out, [3.0, 3.0, 2.0, 2.0, 7.0, 7.0, 6.0, 6.0]) + out.attrs.pop("history") + assert out.attrs == y.attrs + + def test_to_additive(pr_series, hurs_series): # log pr = pr_series(np.array([0, 1e-5, 1, np.e**10])) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 1190e935a..8d92d08c1 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -104,6 +104,52 @@ def select_resample_op( return to_agg_units(out, da, op) +def select_rolling_resample_op( + da: xr.DataArray, + op: str, + window: int, + window_center: bool = True, + window_op: str = "mean", + freq: str = "YS", + out_units=None, + **indexer, +) -> xr.DataArray: + """Apply operation over each period that is part of the index selection, using a rolling window before the operation. + + Parameters + ---------- + da : xr.DataArray + Input data. + op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral', 'argmax', 'argmin'} or func + Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray. + window : int + Size of the rolling window (centered). + window_center : bool + If True, the window is centered on the date. If False, the window is right-aligned. + window_op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral'} + Operation to apply to the rolling window. Default: 'mean'. + freq : str + Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. Applied after the rolling window. + out_units : str, optional + Output units to assign. Only necessary if `op` is function not supported by :py:func:`xclim.core.units.to_agg_units`. + indexer : {dim: indexer, }, optional + Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, + month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are + considered. + + Returns + ------- + xr.DataArray + The array for which the operation has been applied over each period. + """ + rolled = getattr( + da.rolling(time=window, center=window_center), + window_op.replace("integral", "sum"), + )() + rolled = to_agg_units(rolled, da, window_op) + return select_resample_op(rolled, op=op, freq=freq, out_units=out_units, **indexer) + + def doymax(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the maximum value.""" i = da.argmax(dim="time") diff --git a/xclim/sdba/_processing.py b/xclim/sdba/_processing.py index 41cc804fe..c6cf7f486 100644 --- a/xclim/sdba/_processing.py +++ b/xclim/sdba/_processing.py @@ -142,7 +142,7 @@ def _normalize( ) -@map_groups(reordered=[Grouper.DIM], main_only=True) +@map_groups(reordered=[Grouper.DIM], main_only=False) def _reordering(ds, *, dim): """Group-wise reordering. @@ -159,17 +159,47 @@ def _reordering(ds, *, dim): def _reordering_1d(data, ordr): return np.sort(data)[np.argsort(np.argsort(ordr))] - return ( - xr.apply_ufunc( - _reordering_1d, - ds.sim, - ds.ref, - input_core_dims=[[dim], [dim]], - output_core_dims=[[dim]], - vectorize=True, - dask="parallelized", - output_dtypes=[ds.sim.dtype], + def _reordering_2d(data, ordr): + data_r = data.ravel() + ordr_r = ordr.ravel() + reorder = np.sort(data_r)[np.argsort(np.argsort(ordr_r))] + return reorder.reshape(data.shape)[ + :, int(data.shape[1] / 2) + ] # pick the middle of the window + + if {"window", "time"} == set(dim): + return ( + xr.apply_ufunc( + _reordering_2d, + ds.sim, + ds.ref, + input_core_dims=[["time", "window"], ["time", "window"]], + output_core_dims=[["time"]], + vectorize=True, + dask="parallelized", + output_dtypes=[ds.sim.dtype], + ) + .rename("reordered") + .to_dataset() + ) + elif len(dim) == 1: + return ( + xr.apply_ufunc( + _reordering_1d, + ds.sim, + ds.ref, + input_core_dims=[dim, dim], + output_core_dims=[dim], + vectorize=True, + dask="parallelized", + output_dtypes=[ds.sim.dtype], + ) + .rename("reordered") + .to_dataset() + ) + else: + raise ValueError( + f"Reordering can only be done along one dimension." + f" If there is more than one, they should be `window` and `time`." + f" The dimensions are {dim}." ) - .rename("reordered") - .to_dataset() - )