diff --git a/CHANGES.rst b/CHANGES.rst index 670bebdd4..0f4cb12cd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,6 +22,8 @@ New features and enhancements * New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release. * New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`). * Added new option for UTCI calculation to cap low wind velocities to a minimum of 0.5 m/s following Bröde (2012) guidelines. (:issue:`1634`, :pull:`1635`). +* Added option ``never_reached`` to ``degree_days_exceedance_date`` to assign a custom value when the sum threshold is never reached. (:issue:`1459`, :pull:`1647`). +* Added option ``min_members`` to ensemble statistics to mask elements when the number of valid members is under a threshold. (:issue:`1459`, :pull:`1647`). Breaking changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index bc28380aa..67aac38aa 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -290,6 +290,34 @@ def test_calc_mean_std_min_max(self, ensemble_dataset_objects, open_dataset): out1.tg_mean_min[0, 5, 5], out2.tg_mean_min[0, 5, 5] ) + @pytest.mark.parametrize( + "aggfunc", [ensembles.ensemble_percentiles, ensembles.ensemble_mean_std_max_min] + ) + def test_stats_min_members(self, ensemble_dataset_objects, open_dataset, aggfunc): + ds_all = [open_dataset(n) for n in ensemble_dataset_objects["nc_files_simple"]] + ens = ensembles.create_ensemble(ds_all).isel(lat=0, lon=0) + ens = ens.where(ens.realization > 0) + ens = xr.where((ens.realization == 1) & (ens.time.dt.year == 1950), np.NaN, ens) + + def first(ds): + return ds[list(ds.data_vars.keys())[0]] + + # Default, no masking + out = first(aggfunc(ens)) + assert not out.isnull().any() + + # A number + out = first(aggfunc(ens, min_members=3)) + # Only 1950 is null + np.testing.assert_array_equal( + out.isnull(), [True] + [False] * (ens.time.size - 1) + ) + + # Special value + out = first(aggfunc(ens, min_members=None)) + # All null + assert out.isnull().all() + @pytest.mark.slow class TestEnsembleReduction: diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 41a71865c..5b069fd34 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -1289,6 +1289,27 @@ def test_degree_days_exceedance_date(open_dataset): np.testing.assert_array_equal(out, np.array([[np.nan, 280, 241, 244]]).T) +@pytest.mark.parametrize( + "never_reached,exp", [(None, np.NaN), (300, 300), ("12-01", 335)] +) +def test_degree_days_exceedance_date_never_reached(open_dataset, never_reached, exp): + tas = open_dataset("FWI/GFWED_sample_2017.nc").tas + tas.attrs.update( + cell_methods="time: mean within days", standard_name="air_temperature" + ) + # Default -> NaN + out = atmos.degree_days_exceedance_date( + tas=tas, + thresh="4 degC", + op=">", + sum_thresh="1000 K days", + after_date="07-01", + never_reached=never_reached, + freq="YS", + ).squeeze("time") + np.testing.assert_array_equal(out, np.array([exp, 242, 222, 223])) + + class TestWarmSpellDurationIndex: def test_warm_spell_duration_index(self, open_dataset): tasmax = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").tasmax diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index a2b34aa4f..3da213480 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -39,6 +39,7 @@ "datetime_to_decimal_year", "days_in_year", "days_since_to_doy", + "doy_from_string", "doy_to_days_since", "ensure_cftime_array", "get_calendar", @@ -88,6 +89,12 @@ def days_in_year(year: int, calendar: str = "default") -> int: ) +def doy_from_string(doy: DayOfYearStr, year: int, calendar: str) -> int: + """Return the day-of-year corresponding to a "MM-DD" string for a given year and calendar.""" + MM, DD = doy.split("-") + return datetime_classes[calendar](year, int(MM), int(DD)).timetuple().tm_yday + + def date_range( *args, calendar: str = "default", **kwargs ) -> pd.DatetimeIndex | CFTimeIndex: diff --git a/xclim/core/utils.py b/xclim/core/utils.py index b994ca80f..60010b25c 100644 --- a/xclim/core/utils.py +++ b/xclim/core/utils.py @@ -139,9 +139,9 @@ def decorator(func): def wrapper(*args, **kwargs): msg = ( f"`{func.__name__}` is deprecated" - f"{' from version {}'.format(from_version) if from_version else ''} " + f"{f' from version {from_version}' if from_version else ''} " "and will be removed in a future version of xclim" - f"{'. Use `{}` instead'.format(suggested) if suggested else ''}. " + f"{f'. Use `{suggested}` instead' if suggested else ''}. " "Please update your scripts accordingly." ) warnings.warn( @@ -683,6 +683,9 @@ def infer_kind_from_parameter(param) -> InputKind: if annot == {"Quantified"}: return InputKind.QUANTIFIED + if "DayOfYearStr" in annot: + return InputKind.DAY_OF_YEAR + if annot.issubset({"int", "float"}): return InputKind.NUMBER @@ -692,9 +695,6 @@ def infer_kind_from_parameter(param) -> InputKind: if annot == {"str"}: return InputKind.STRING - if annot == {"DayOfYearStr"}: - return InputKind.DAY_OF_YEAR - if annot == {"DateStr"}: return InputKind.DATE diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index 45886d4de..a76fed8d1 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -127,7 +127,7 @@ def create_ensemble( def ensemble_mean_std_max_min( - ens: xr.Dataset, weights: xr.DataArray | None = None + ens: xr.Dataset, min_members: int | None = 1, weights: xr.DataArray | None = None ) -> xr.Dataset: """Calculate ensemble statistics between a results from an ensemble of climate simulations. @@ -138,6 +138,10 @@ def ensemble_mean_std_max_min( ---------- ens : xr.Dataset Ensemble dataset (see xclim.ensembles.create_ensemble). + min_members : int, optional + The minimum number of valid ensemble members for a statistic to be valid. + Passing None is equivalent to setting min_members to the size of the realization dimension. + The default (1) essentially skips this check. weights : xr.DataArray, optional Weights to apply along the 'realization' dimension. This array cannot contain missing values. @@ -158,6 +162,8 @@ def ensemble_mean_std_max_min( # Calculate ensemble statistics: ens_mean_std = ensemble_mean_std_max_min(ens) """ + if min_members is None: + min_members = ens.realization.size ds_out = xr.Dataset(attrs=ens.attrs) for v in ens.data_vars: if weights is None: @@ -170,9 +176,14 @@ def ensemble_mean_std_max_min( ds_out[f"{v}_max"] = ens[v].max(dim="realization") ds_out[f"{v}_min"] = ens[v].min(dim="realization") + if min_members != 1: + enough = ens[v].notnull().sum("realization") >= min_members + # Re-add attributes for stat in ["mean", "stdev", "max", "min"]: vv = f"{v}_{stat}" + if min_members != 1: + ds_out[vv] = ds_out[vv].where(enough) ds_out[vv].attrs = ens[v].attrs if "description" in ds_out[vv].attrs.keys(): vv.split() @@ -182,6 +193,7 @@ def ensemble_mean_std_max_min( + vv.split("_")[-1] + " of ensemble" ) + ds_out.attrs["history"] = update_history( f"Computation of statistics on {ens.realization.size} ensemble members.", ds_out ) @@ -192,6 +204,7 @@ def ensemble_percentiles( ens: xr.Dataset | xr.DataArray, values: Sequence[int] | None = None, keep_chunk_size: bool | None = None, + min_members: int | None = 1, weights: xr.DataArray | None = None, split: bool = True, ) -> xr.DataArray | xr.Dataset: @@ -211,6 +224,10 @@ def ensemble_percentiles( so that the chunks keep the same size (approximately). If False, no shrinking is performed, resulting in much larger chunks. If not defined, the function decides which is best. + min_members : int, optional + The minimum number of valid ensemble members for a statistic to be valid. + Passing None is equivalent to setting min_members to the size of the realization dimension. + The default (1) essentially skips this check. weights : xr.DataArray, optional Weights to apply along the 'realization' dimension. This array cannot contain missing values. When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm. @@ -244,6 +261,8 @@ def ensemble_percentiles( """ if values is None: values = [10, 50, 90] + if min_members is None: + min_members = ens.realization.size if isinstance(ens, xr.Dataset): out = xr.merge( @@ -253,6 +272,7 @@ def ensemble_percentiles( values, keep_chunk_size=keep_chunk_size, split=split, + min_members=min_members, weights=weights, ) for da in ens.data_vars.values() @@ -313,6 +333,8 @@ def ensemble_percentiles( .rename({"quantile": "percentiles"}) ) + if min_members != 1: + out = out.where(ens.notnull().sum("realization") >= min_members) out = out.assign_coords( percentiles=xr.DataArray(list(values), dims=("percentiles",)) ) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 91fedebf6..5fff2322e 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -6,7 +6,7 @@ import numpy as np import xarray -from xclim.core.calendar import get_calendar, select_time +from xclim.core.calendar import doy_from_string, get_calendar, select_time from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, @@ -2966,6 +2966,7 @@ def degree_days_exceedance_date( sum_thresh: Quantified = "25 K days", op: str = ">", after_date: DayOfYearStr | None = None, + never_reached: DayOfYearStr | int | None = None, freq: str = "YS", ) -> xarray.DataArray: r"""Degree-days exceedance date. @@ -2986,7 +2987,12 @@ def degree_days_exceedance_date( equivalent to '<', they are computed as `thresh - tas`. after_date: str, optional Date at which to start the cumulative sum. - In "mm-dd" format, defaults to the start of the sampling period. + In "MM-DD" format, defaults to the start of the sampling period. + never_reached: int, str, optional + What to do when `sum_thresh` is never exceeded. + If an int, the value to assign as a day-of-year. + If a string, must be in "MM-DD" format, the day-of-year of that date is assigned. + Default (None) assigns "NaN". freq : str Resampling frequency. If `after_date` is given, `freq` should be annual. @@ -3030,12 +3036,22 @@ def _exceedance_date(grp): strt_idx.size == 0 ): # The date is not within the group. Happens at boundaries. return xarray.full_like(grp.isel(time=0), np.nan, float).drop_vars("time") # type: ignore + cumsum = grp.where(grp.time >= grp.time[strt_idx][0]).cumsum("time") - return rl.first_run_after_date( - grp.where(grp.time >= grp.time[strt_idx][0]).cumsum("time") > sum_thresh, + out = rl.first_run_after_date( + cumsum > sum_thresh, window=1, date=None, ) + if never_reached is None: + # This is slightly faster in numpy and generates fewer tasks in dask + return out + never_reached_val = ( + doy_from_string(never_reached, grp.time.dt.year[0], grp.time.dt.calendar) + if isinstance(never_reached, str) + else never_reached + ) + return xarray.where((cumsum <= sum_thresh).all("time"), never_reached_val, out) out = c.clip(0).resample(time=freq).map(_exceedance_date) out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas))