Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reorder with window #1566

Merged
merged 13 commits into from
Feb 19, 2024
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ New features and enhancements
* Support ``indexer`` keyword in YAML indicator description. (:issue:`1522`, :pull:`1561`).
* New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release.
* New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`).
* Add the possibility to use a group with a window in ``xc.sdba.processing.reordering``. (:pull:`1566`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
26 changes: 26 additions & 0 deletions tests/test_sdba/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
import xarray as xr

from xclim.core.calendar import date_range
from xclim.core.units import units
from xclim.sdba.adjustment import EmpiricalQuantileMapping
from xclim.sdba.base import Grouper
Expand Down Expand Up @@ -185,6 +186,31 @@ def test_reordering():
assert out.attrs == y.attrs


def test_reordering_with_window():
time = list(
date_range("2000-01-01", "2000-01-04", freq="D", calendar="noleap")
) + list(date_range("2001-01-01", "2001-01-04", freq="D", calendar="noleap"))

x = xr.DataArray(
np.arange(1, 9, 1),
dims=("time"),
coords={"time": time},
)

y = xr.DataArray(
np.arange(8, 0, -1),
dims=("time"),
coords={"time": time},
)

group = Grouper(group="time.dayofyear", window=3)
out = reordering(x, y, group=group)

np.testing.assert_array_equal(out, [3.0, 3.0, 2.0, 2.0, 7.0, 7.0, 6.0, 6.0])
out.attrs.pop("history")
assert out.attrs == y.attrs


def test_to_additive(pr_series, hurs_series):
# log
pr = pr_series(np.array([0, 1e-5, 1, np.e**10]))
Expand Down
58 changes: 44 additions & 14 deletions xclim/sdba/_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _normalize(
)


@map_groups(reordered=[Grouper.DIM], main_only=True)
@map_groups(reordered=[Grouper.DIM], main_only=False)
def _reordering(ds, *, dim):
"""Group-wise reordering.

Expand All @@ -159,17 +159,47 @@ def _reordering(ds, *, dim):
def _reordering_1d(data, ordr):
return np.sort(data)[np.argsort(np.argsort(ordr))]

return (
xr.apply_ufunc(
_reordering_1d,
ds.sim,
ds.ref,
input_core_dims=[[dim], [dim]],
output_core_dims=[[dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
def _reordering_2d(data, ordr):
data_r = data.ravel()
ordr_r = ordr.ravel()
reorder = np.sort(data_r)[np.argsort(np.argsort(ordr_r))]
return reorder.reshape(data.shape)[
:, int(data.shape[1] / 2)
] # pick the middle of the window

if "window" in dim:
juliettelavoie marked this conversation as resolved.
Show resolved Hide resolved
return (
xr.apply_ufunc(
_reordering_2d,
ds.sim,
ds.ref,
input_core_dims=[["time", "window"], ["time", "window"]],
output_core_dims=[["time"]],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
)
.rename("reordered")
.to_dataset()
)
elif len(dim) == 1:
return (
xr.apply_ufunc(
_reordering_1d,
ds.sim,
ds.ref,
input_core_dims=[dim, dim],
output_core_dims=[dim],
vectorize=True,
dask="parallelized",
output_dtypes=[ds.sim.dtype],
)
.rename("reordered")
.to_dataset()
)
else:
raise ValueError(
f"Reordering can only be done along one dimension."
f" If there is more than one, one of the dimension should be `window`."
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f" If there is more than one, one of the dimension should be `window`."
f" If there is more than one, they should be `window` and `time`."

f" The dimensions are {dim}."
)
.rename("reordered")
.to_dataset()
)
Loading