Skip to content

Commit

Permalink
Better degree_days_exceedance_date and ensemble stats (#1647)
Browse files Browse the repository at this point in the history
### What kind of change does this PR introduce?
Implements two solutions for #1459.

* `min_members` argument to ensemble stats functions. Simply mask
elements when there are fewer valid (non-nan) members than this
threshold. Similar to `min_periods` of xarray's `rolling`, but with a
different default value.
* `never_reached` argument to `degree_days_exceedance_date`. A value
(int, doy) to assign when the `sum_thresh` is not reached at the end of
the period, to differentiate from missing values.

### Does this PR introduce a breaking change?
No. Both default options preserve the previous behaviour (which means
`min_members` doesn't have the same default as it's inspiration
`min_periods`).
  • Loading branch information
Zeitsperre authored Feb 16, 2024
2 parents 628cf9a + a5e1d7e commit d1741bd
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 10 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ New features and enhancements
* New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release.
* New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`).
* Added new option for UTCI calculation to cap low wind velocities to a minimum of 0.5 m/s following Bröde (2012) guidelines. (:issue:`1634`, :pull:`1635`).
* Added option ``never_reached`` to ``degree_days_exceedance_date`` to assign a custom value when the sum threshold is never reached. (:issue:`1459`, :pull:`1647`).
* Added option ``min_members`` to ensemble statistics to mask elements when the number of valid members is under a threshold. (:issue:`1459`, :pull:`1647`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
28 changes: 28 additions & 0 deletions tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,34 @@ def test_calc_mean_std_min_max(self, ensemble_dataset_objects, open_dataset):
out1.tg_mean_min[0, 5, 5], out2.tg_mean_min[0, 5, 5]
)

@pytest.mark.parametrize(
"aggfunc", [ensembles.ensemble_percentiles, ensembles.ensemble_mean_std_max_min]
)
def test_stats_min_members(self, ensemble_dataset_objects, open_dataset, aggfunc):
ds_all = [open_dataset(n) for n in ensemble_dataset_objects["nc_files_simple"]]
ens = ensembles.create_ensemble(ds_all).isel(lat=0, lon=0)
ens = ens.where(ens.realization > 0)
ens = xr.where((ens.realization == 1) & (ens.time.dt.year == 1950), np.NaN, ens)

def first(ds):
return ds[list(ds.data_vars.keys())[0]]

# Default, no masking
out = first(aggfunc(ens))
assert not out.isnull().any()

# A number
out = first(aggfunc(ens, min_members=3))
# Only 1950 is null
np.testing.assert_array_equal(
out.isnull(), [True] + [False] * (ens.time.size - 1)
)

# Special value
out = first(aggfunc(ens, min_members=None))
# All null
assert out.isnull().all()


@pytest.mark.slow
class TestEnsembleReduction:
Expand Down
21 changes: 21 additions & 0 deletions tests/test_temperature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,27 @@ def test_degree_days_exceedance_date(open_dataset):
np.testing.assert_array_equal(out, np.array([[np.nan, 280, 241, 244]]).T)


@pytest.mark.parametrize(
"never_reached,exp", [(None, np.NaN), (300, 300), ("12-01", 335)]
)
def test_degree_days_exceedance_date_never_reached(open_dataset, never_reached, exp):
tas = open_dataset("FWI/GFWED_sample_2017.nc").tas
tas.attrs.update(
cell_methods="time: mean within days", standard_name="air_temperature"
)
# Default -> NaN
out = atmos.degree_days_exceedance_date(
tas=tas,
thresh="4 degC",
op=">",
sum_thresh="1000 K days",
after_date="07-01",
never_reached=never_reached,
freq="YS",
).squeeze("time")
np.testing.assert_array_equal(out, np.array([exp, 242, 222, 223]))


class TestWarmSpellDurationIndex:
def test_warm_spell_duration_index(self, open_dataset):
tasmax = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").tasmax
Expand Down
7 changes: 7 additions & 0 deletions xclim/core/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"datetime_to_decimal_year",
"days_in_year",
"days_since_to_doy",
"doy_from_string",
"doy_to_days_since",
"ensure_cftime_array",
"get_calendar",
Expand Down Expand Up @@ -88,6 +89,12 @@ def days_in_year(year: int, calendar: str = "default") -> int:
)


def doy_from_string(doy: DayOfYearStr, year: int, calendar: str) -> int:
"""Return the day-of-year corresponding to a "MM-DD" string for a given year and calendar."""
MM, DD = doy.split("-")
return datetime_classes[calendar](year, int(MM), int(DD)).timetuple().tm_yday


def date_range(
*args, calendar: str = "default", **kwargs
) -> pd.DatetimeIndex | CFTimeIndex:
Expand Down
10 changes: 5 additions & 5 deletions xclim/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ def decorator(func):
def wrapper(*args, **kwargs):
msg = (
f"`{func.__name__}` is deprecated"
f"{' from version {}'.format(from_version) if from_version else ''} "
f"{f' from version {from_version}' if from_version else ''} "
"and will be removed in a future version of xclim"
f"{'. Use `{}` instead'.format(suggested) if suggested else ''}. "
f"{f'. Use `{suggested}` instead' if suggested else ''}. "
"Please update your scripts accordingly."
)
warnings.warn(
Expand Down Expand Up @@ -683,6 +683,9 @@ def infer_kind_from_parameter(param) -> InputKind:
if annot == {"Quantified"}:
return InputKind.QUANTIFIED

if "DayOfYearStr" in annot:
return InputKind.DAY_OF_YEAR

if annot.issubset({"int", "float"}):
return InputKind.NUMBER

Expand All @@ -692,9 +695,6 @@ def infer_kind_from_parameter(param) -> InputKind:
if annot == {"str"}:
return InputKind.STRING

if annot == {"DayOfYearStr"}:
return InputKind.DAY_OF_YEAR

if annot == {"DateStr"}:
return InputKind.DATE

Expand Down
24 changes: 23 additions & 1 deletion xclim/ensembles/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def create_ensemble(


def ensemble_mean_std_max_min(
ens: xr.Dataset, weights: xr.DataArray | None = None
ens: xr.Dataset, min_members: int | None = 1, weights: xr.DataArray | None = None
) -> xr.Dataset:
"""Calculate ensemble statistics between a results from an ensemble of climate simulations.
Expand All @@ -138,6 +138,10 @@ def ensemble_mean_std_max_min(
----------
ens : xr.Dataset
Ensemble dataset (see xclim.ensembles.create_ensemble).
min_members : int, optional
The minimum number of valid ensemble members for a statistic to be valid.
Passing None is equivalent to setting min_members to the size of the realization dimension.
The default (1) essentially skips this check.
weights : xr.DataArray, optional
Weights to apply along the 'realization' dimension. This array cannot contain missing values.
Expand All @@ -158,6 +162,8 @@ def ensemble_mean_std_max_min(
# Calculate ensemble statistics:
ens_mean_std = ensemble_mean_std_max_min(ens)
"""
if min_members is None:
min_members = ens.realization.size
ds_out = xr.Dataset(attrs=ens.attrs)
for v in ens.data_vars:
if weights is None:
Expand All @@ -170,9 +176,14 @@ def ensemble_mean_std_max_min(
ds_out[f"{v}_max"] = ens[v].max(dim="realization")
ds_out[f"{v}_min"] = ens[v].min(dim="realization")

if min_members != 1:
enough = ens[v].notnull().sum("realization") >= min_members

# Re-add attributes
for stat in ["mean", "stdev", "max", "min"]:
vv = f"{v}_{stat}"
if min_members != 1:
ds_out[vv] = ds_out[vv].where(enough)
ds_out[vv].attrs = ens[v].attrs
if "description" in ds_out[vv].attrs.keys():
vv.split()
Expand All @@ -182,6 +193,7 @@ def ensemble_mean_std_max_min(
+ vv.split("_")[-1]
+ " of ensemble"
)

ds_out.attrs["history"] = update_history(
f"Computation of statistics on {ens.realization.size} ensemble members.", ds_out
)
Expand All @@ -192,6 +204,7 @@ def ensemble_percentiles(
ens: xr.Dataset | xr.DataArray,
values: Sequence[int] | None = None,
keep_chunk_size: bool | None = None,
min_members: int | None = 1,
weights: xr.DataArray | None = None,
split: bool = True,
) -> xr.DataArray | xr.Dataset:
Expand All @@ -211,6 +224,10 @@ def ensemble_percentiles(
so that the chunks keep the same size (approximately).
If False, no shrinking is performed, resulting in much larger chunks.
If not defined, the function decides which is best.
min_members : int, optional
The minimum number of valid ensemble members for a statistic to be valid.
Passing None is equivalent to setting min_members to the size of the realization dimension.
The default (1) essentially skips this check.
weights : xr.DataArray, optional
Weights to apply along the 'realization' dimension. This array cannot contain missing values.
When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm.
Expand Down Expand Up @@ -244,6 +261,8 @@ def ensemble_percentiles(
"""
if values is None:
values = [10, 50, 90]
if min_members is None:
min_members = ens.realization.size

if isinstance(ens, xr.Dataset):
out = xr.merge(
Expand All @@ -253,6 +272,7 @@ def ensemble_percentiles(
values,
keep_chunk_size=keep_chunk_size,
split=split,
min_members=min_members,
weights=weights,
)
for da in ens.data_vars.values()
Expand Down Expand Up @@ -313,6 +333,8 @@ def ensemble_percentiles(
.rename({"quantile": "percentiles"})
)

if min_members != 1:
out = out.where(ens.notnull().sum("realization") >= min_members)
out = out.assign_coords(
percentiles=xr.DataArray(list(values), dims=("percentiles",))
)
Expand Down
24 changes: 20 additions & 4 deletions xclim/indices/_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import xarray

from xclim.core.calendar import get_calendar, select_time
from xclim.core.calendar import doy_from_string, get_calendar, select_time
from xclim.core.missing import at_least_n_valid
from xclim.core.units import (
convert_units_to,
Expand Down Expand Up @@ -2966,6 +2966,7 @@ def degree_days_exceedance_date(
sum_thresh: Quantified = "25 K days",
op: str = ">",
after_date: DayOfYearStr | None = None,
never_reached: DayOfYearStr | int | None = None,
freq: str = "YS",
) -> xarray.DataArray:
r"""Degree-days exceedance date.
Expand All @@ -2986,7 +2987,12 @@ def degree_days_exceedance_date(
equivalent to '<', they are computed as `thresh - tas`.
after_date: str, optional
Date at which to start the cumulative sum.
In "mm-dd" format, defaults to the start of the sampling period.
In "MM-DD" format, defaults to the start of the sampling period.
never_reached: int, str, optional
What to do when `sum_thresh` is never exceeded.
If an int, the value to assign as a day-of-year.
If a string, must be in "MM-DD" format, the day-of-year of that date is assigned.
Default (None) assigns "NaN".
freq : str
Resampling frequency. If `after_date` is given, `freq` should be annual.
Expand Down Expand Up @@ -3030,12 +3036,22 @@ def _exceedance_date(grp):
strt_idx.size == 0
): # The date is not within the group. Happens at boundaries.
return xarray.full_like(grp.isel(time=0), np.nan, float).drop_vars("time") # type: ignore
cumsum = grp.where(grp.time >= grp.time[strt_idx][0]).cumsum("time")

return rl.first_run_after_date(
grp.where(grp.time >= grp.time[strt_idx][0]).cumsum("time") > sum_thresh,
out = rl.first_run_after_date(
cumsum > sum_thresh,
window=1,
date=None,
)
if never_reached is None:
# This is slightly faster in numpy and generates fewer tasks in dask
return out
never_reached_val = (
doy_from_string(never_reached, grp.time.dt.year[0], grp.time.dt.calendar)
if isinstance(never_reached, str)
else never_reached
)
return xarray.where((cumsum <= sum_thresh).all("time"), never_reached_val, out)

out = c.clip(0).resample(time=freq).map(_exceedance_date)
out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas))
Expand Down

0 comments on commit d1741bd

Please sign in to comment.