Skip to content

Commit

Permalink
Merge branch 'master' into prepare-v0480
Browse files Browse the repository at this point in the history
  • Loading branch information
aulemahal authored Feb 19, 2024
2 parents 77a05f8 + 0c7820e commit 0335cfd
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 15 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog

v0.48 (2024-02-19)
------------------
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`).
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`), Gabriel Rondeau-Genesse (:user:`RondeauG`).

Announcements
^^^^^^^^^^^^^
Expand All @@ -25,6 +25,8 @@ New features and enhancements
* Added option ``never_reached`` to ``degree_days_exceedance_date`` to assign a custom value when the sum threshold is never reached. (:issue:`1459`, :pull:`1647`).
* Added option ``min_members`` to ensemble statistics to mask elements when the number of valid members is under a threshold. (:issue:`1459`, :pull:`1647`).
* Distribution instances can now be passed to the ``dist`` argument of most statistical indices. (:pull:`1644`).
* Added a new ``xclim.indices.generic.select_rolling_resample_op`` function to allow for computing rolling statistics. (:issue:`1480`, :pull:`1643`).
* Add the possibility to use a group with a window in ``xc.sdba.processing.reordering``. (:pull:`1566`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
41 changes: 41 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,47 @@ def test_season(self, q_series):
assert o[0] == 31 + 29


class TestSelectRollingResampleOp:
def test_rollingmax(self, q_series):
q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap
o = generic.select_rolling_resample_op(
q, "max", window=14, window_center=False, window_op="mean"
)
np.testing.assert_array_equal(
[
np.mean(np.arange(353, 366 + 1)),
np.mean(np.arange(353 + 365, 366 + 365 + 1)),
np.mean(np.arange(353 + 365 * 2, 366 + 365 * 2 + 1)),
],
o.values,
)
assert o.attrs["units"] == "m3 s-1"

def test_rollingmaxindexer(self, q_series):
q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap
o = generic.select_rolling_resample_op(
q, "min", window=14, window_center=False, window_op="max", season="DJF"
)
np.testing.assert_array_equal(
[14, 367, 367 + 365], o.values
) # 14th day for 1st year, then Jan 1st for the next two
assert o.attrs["units"] == "m3 s-1"

def test_freq(self, q_series):
q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap
o = generic.select_rolling_resample_op(
q, "max", window=3, window_center=True, window_op="integral", freq="MS"
)
np.testing.assert_array_equal(
[
np.sum([30, 31, 32]) * 86400,
np.sum([30 + 29, 31 + 29, 32 + 29]) * 86400,
], # m3 s-1 being summed by the frequency of the data
o.isel(time=slice(0, 2)).values,
)
assert o.attrs["units"] == "m3"


class TestThresholdCount:
def test_simple(self, tas_series):
ts = tas_series(np.arange(365))
Expand Down
26 changes: 26 additions & 0 deletions tests/test_sdba/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
import xarray as xr

from xclim.core.calendar import date_range
from xclim.core.units import units
from xclim.sdba.adjustment import EmpiricalQuantileMapping
from xclim.sdba.base import Grouper
Expand Down Expand Up @@ -185,6 +186,31 @@ def test_reordering():
assert out.attrs == y.attrs


def test_reordering_with_window():
time = list(
date_range("2000-01-01", "2000-01-04", freq="D", calendar="noleap")
) + list(date_range("2001-01-01", "2001-01-04", freq="D", calendar="noleap"))

x = xr.DataArray(
np.arange(1, 9, 1),
dims=("time"),
coords={"time": time},
)

y = xr.DataArray(
np.arange(8, 0, -1),
dims=("time"),
coords={"time": time},
)

group = Grouper(group="time.dayofyear", window=3)
out = reordering(x, y, group=group)

np.testing.assert_array_equal(out, [3.0, 3.0, 2.0, 2.0, 7.0, 7.0, 6.0, 6.0])
out.attrs.pop("history")
assert out.attrs == y.attrs


def test_to_additive(pr_series, hurs_series):
# log
pr = pr_series(np.array([0, 1e-5, 1, np.e**10]))
Expand Down
46 changes: 46 additions & 0 deletions xclim/indices/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,52 @@ def select_resample_op(
return to_agg_units(out, da, op)


def select_rolling_resample_op(
da: xr.DataArray,
op: str,
window: int,
window_center: bool = True,
window_op: str = "mean",
freq: str = "YS",
out_units=None,
**indexer,
) -> xr.DataArray:
"""Apply operation over each period that is part of the index selection, using a rolling window before the operation.
Parameters
----------
da : xr.DataArray
Input data.
op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral', 'argmax', 'argmin'} or func
Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray.
window : int
Size of the rolling window (centered).
window_center : bool
If True, the window is centered on the date. If False, the window is right-aligned.
window_op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral'}
Operation to apply to the rolling window. Default: 'mean'.
freq : str
Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. Applied after the rolling window.
out_units : str, optional
Output units to assign. Only necessary if `op` is function not supported by :py:func:`xclim.core.units.to_agg_units`.
indexer : {dim: indexer, }, optional
Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are
considered.
Returns
-------
xr.DataArray
The array for which the operation has been applied over each period.
"""
rolled = getattr(
da.rolling(time=window, center=window_center),
window_op.replace("integral", "sum"),
)()
rolled = to_agg_units(rolled, da, window_op)
return select_resample_op(rolled, op=op, freq=freq, out_units=out_units, **indexer)


def doymax(da: xr.DataArray) -> xr.DataArray:
"""Return the day of year of the maximum value."""
i = da.argmax(dim="time")
Expand Down
58 changes: 44 additions & 14 deletions xclim/sdba/_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _normalize(
)


@map_groups(reordered=[Grouper.DIM], main_only=True)
@map_groups(reordered=[Grouper.DIM], main_only=False)
def _reordering(ds, *, dim):
"""Group-wise reordering.
Expand All @@ -159,17 +159,47 @@ def _reordering(ds, *, dim):
def _reordering_1d(data, ordr):
return np.sort(data)[np.argsort(np.argsort(ordr))]

return (
xr.apply_ufunc(
_reordering_1d,
ds.sim,
ds.ref,
input_core_dims=[[dim], [dim]],
output_core_dims=[[dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
def _reordering_2d(data, ordr):
data_r = data.ravel()
ordr_r = ordr.ravel()
reorder = np.sort(data_r)[np.argsort(np.argsort(ordr_r))]
return reorder.reshape(data.shape)[
:, int(data.shape[1] / 2)
] # pick the middle of the window

if {"window", "time"} == set(dim):
return (
xr.apply_ufunc(
_reordering_2d,
ds.sim,
ds.ref,
input_core_dims=[["time", "window"], ["time", "window"]],
output_core_dims=[["time"]],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
)
.rename("reordered")
.to_dataset()
)
elif len(dim) == 1:
return (
xr.apply_ufunc(
_reordering_1d,
ds.sim,
ds.ref,
input_core_dims=[dim, dim],
output_core_dims=[dim],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
)
.rename("reordered")
.to_dataset()
)
else:
raise ValueError(
f"Reordering can only be done along one dimension."
f" If there is more than one, they should be `window` and `time`."
f" The dimensions are {dim}."
)
.rename("reordered")
.to_dataset()
)

0 comments on commit 0335cfd

Please sign in to comment.