diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 116283737..4e119f8cf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,11 @@ Breaking changes ^^^^^^^^^^^^^^^^ * As updated in ``cf_xarray>=0.9.3``, dimensionless quantities now use the "1" units attribute as specified by the CF conventions, previously an empty string was returned. (:pull:`1814`). +Breaking changes +^^^^^^^^^^^^^^^^ +* The definitions of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and `xclim`'s notion of `season` in general. Indicator and indices signature have changed. (:pull:`1845`). +* Season length indicators have been modified to return ``0`` for all cases where a proper season was not found, but the data is valid. Previously, a ``nan`` was given if neither a start or an end were found, even if the data was valid, and a ``0`` was given if an end was found but no start. (:pull:`1845`). + Bug fixes ^^^^^^^^^ * Fixed the indexer bug in the ``xclim.indices.standardized_index_fit_params`` when multiple or non-array indexers are specified and fitted parameters are reloaded from netCDF. (:issue:`1842`, :pull:`1843`). @@ -26,6 +31,7 @@ Internal changes ^^^^^^^^^^^^^^^^ * Changed the French translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`). * In order to adapt to changes in `pytest`, the doctest fixtures have been split from the main testing suite and doctests are now run using ``$ python -c 'from xclim.testing.utils import run_doctests; run_doctests()'``. (:pull:`1632`). +* Added ``xclim.indices.generic.season`` to make season start, end, and length indices. Added a ``stat`` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. (:pull:`1845`). CI changes ^^^^^^^^^^ diff --git a/tests/test_indices.py b/tests/test_indices.py index 9ed3e4d03..a5981e6cc 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -1160,7 +1160,7 @@ class TestGrowingSeasonLength: [ ("1950-01-01", "1951-01-01", 0), # No growing season ("2000-01-01", "2000-12-31", 365), # All year growing season - ("2000-07-10", "2001-01-01", np.nan), # End happens before start + ("2000-07-10", "2001-01-01", 0), # End happens before start ("2000-06-15", "2001-01-01", 199), # No end ("2000-06-15", "2000-07-15", 31), # Normal case ], diff --git a/tests/test_run_length.py b/tests/test_run_length.py index d0e10ae7e..b0dc1501a 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -470,7 +470,7 @@ class TestRunsWithDates: [ ("07-01", 210, 70), ("07-01", 190, 50), - ("04-01", 150, np.nan), # date falls early + ("04-01", 150, 0), # date falls early ("11-01", 150, 165), # date ends late (None, 150, 10), # no date, real length ], @@ -492,7 +492,7 @@ def test_season_length(self, tas_series, date, end, expected, use_dask, ufunc): runs, window=1, dim="time", - date=date, + mid_date=date, ) np.testing.assert_array_equal(np.mean(out.load()), expected) @@ -625,7 +625,7 @@ def test_run_with_dates_different_calendars(self, calendar, expected): out = ( (tas > 0) .resample(time="YS-MAR") - .map(rl.season_length, date="03-02", window=2) + .map(rl.season_length, mid_date="03-02", window=2) ) np.testing.assert_array_equal(out.values[1:], [250, 250]) @@ -643,9 +643,7 @@ def test_run_with_dates_different_calendars(self, calendar, expected): ) np.testing.assert_array_equal(out.values[1:], np.array(expected) + 1) - @pytest.mark.parametrize( - "func", [rl.first_run_after_date, rl.season_length, rl.run_end_after_date] - ) + @pytest.mark.parametrize("func", [rl.first_run_after_date, rl.run_end_after_date]) def test_too_many_dates(self, func, tas_series): tas = tas_series(np.zeros(730), start="2000-01-01") with pytest.raises(ValueError, match="More than 1 instance of date"): diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 3331d0d79..e91cee728 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -368,7 +368,7 @@ def test_real_data(self, atmosds): class TestFrostSeasonLength: def test_simple(self, tasmin_series): - a = np.zeros(730) + K2C + 15 + a = np.zeros(731) + K2C + 15 a[300:400] = K2C - 5 a[404:407] = K2C - 5 tasmin = tasmin_series(a, start="2000-01-01") @@ -380,7 +380,7 @@ def test_simple(self, tasmin_series): np.testing.assert_array_equal(out, [np.nan, 100, np.nan]) out = atmos.frost_season_length(tasmin=tasmin, mid_date="07-01", freq="YS") - np.testing.assert_array_equal(out, [np.nan, np.nan]) + np.testing.assert_array_equal(out, [0, 181]) class TestColdSpellDays: diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 2d6341440..dc33bb60f 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -26,6 +26,7 @@ cumulative_difference, domain_count, first_day_threshold_reached, + season, spell_length_statistics, threshold_count, ) @@ -381,19 +382,8 @@ def snd_season_end( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - resampled = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .end - ) - resampled = resampled.assign_attrs( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snd) - ) - snd_se = resampled.where(~valid) + out = season(snd, thresh, window=window, op=">=", stat="end", freq=freq) + snd_se = out.where(~valid) return snd_se @@ -430,19 +420,8 @@ def snw_season_end( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - resampled = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .end - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snw) - ) - snw_se = resampled.where(~valid) + out = season(snw, thresh, window=window, op=">=", stat="end", freq=freq) + snw_se = out.where(~valid) return snw_se @@ -479,24 +458,8 @@ def snd_season_start( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - resampled = ( - cond.resample(time=freq) - .map( - rl.season, - window=window, - dim="time", - coord="dayofyear", - ) - .start - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snd) - ) - snd_ss = resampled.where(~valid) + out = season(snd, thresh, window=window, op=">=", stat="start", freq=freq) + snd_ss = out.where(~valid) return snd_ss @@ -534,24 +497,8 @@ def snw_season_start( """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - resampled = ( - cond.resample(time=freq) - .map( - rl.season, - window=window, - dim="time", - coord="dayofyear", - ) - .start - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snw) - ) - snw_ss = resampled.where(~valid) + out = season(snw, thresh, window=window, op=">=", stat="start", freq=freq) + snw_ss = out.where(~valid) return snw_ss @@ -588,16 +535,8 @@ def snd_season_length( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - snd_sl = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .length - ) - snd_sl = to_agg_units(snd_sl.where(~valid), snd, "count") + out = season(snd, thresh, window=window, op=">=", stat="length", freq=freq) + snd_sl = out.where(~valid) return snd_sl @@ -634,16 +573,8 @@ def snw_season_length( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - snw_sl = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .length - ) - snw_sl = to_agg_units(snw_sl.where(~valid), snw, "count") + out = season(snw, thresh, window=window, op=">=", stat="length", freq=freq) + snw_sl = out.where(~valid) return snw_sl @@ -978,14 +909,21 @@ def growing_degree_days( def growing_season_start( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", + mid_date: DayOfYearStr | None = "07-01", window: int = 5, freq: str = "YS", op: Literal[">", ">=", "gt", "ge"] = ">=", ) -> xarray.DataArray: r"""Start of the growing season. - Day of the year of the start of a sequence of days with mean daily temperatures consistently above or equal to a - given threshold (default: 5℃). + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. + + Warnings + -------- + The default `freq` and `mid_date` parameters are valid for the northern hemisphere. Parameters ---------- @@ -993,6 +931,9 @@ def growing_season_start( Mean daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. + mid_date : str, optional + Date of the year before which the season must start. Should have the format '%m-%d'. + ``None`` removes that constraint. window : int Minimum number of days with temperature above threshold needed for evaluation. freq : str @@ -1003,42 +944,35 @@ def growing_season_start( Returns ------- xarray.DataArray, [dimensionless] - Day of the year when temperature is superior to a threshold over a given number of days for the first time. - If there is no such day or if a growing season is not detected, returns np.nan. - - Notes - ----- - Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 - to 365 or 366. The start date of the start of growing season is given by the smallest index :math:`i`: - - .. math:: - - \prod_{j=i}^{i+w} [x_j >= thresh] - - where :math:`w` is the number of days the temperature threshold should be met or exceeded, - and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. + Start of the growing season. """ - thresh = convert_units_to(thresh, tas) - cond = compare(tas, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map(rl.first_run, window=window, coord="dayofyear") - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas)) - return out + return season( + tas, + thresh=thresh, + mid_date=mid_date, + window=window, + freq=freq, + op=op, + constrain=(">", ">="), + stat="start", + ) @declare_units(tas="[temperature]", thresh="[temperature]") def growing_season_end( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", window: int = 5, freq: str = "YS", - op: Literal["<", "<=", "lt", "le"] = "<", + op: Literal[">", ">=", "lt", "le"] = ">", ) -> xarray.DataArray: r"""End of the growing season. - Day of the year of the start of a sequence of `N` (default: 5) days with mean temperatures consistently below a - given threshold (default: 5℃), occurring after a given calendar date (default: July 1). + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. Warnings -------- @@ -1050,21 +984,21 @@ def growing_season_end( Mean daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. - mid_date : str + mid_date : str, optional Date of the year after which to look for the end of the season. Should have the format '%m-%d'. + ``None`` removes that constraint. window : int Minimum number of days with temperature below threshold needed for evaluation. freq : str Resampling frequency. - op : {"<", "<=", "lt", "le"} - Comparison operation. Default: "<". + op : {">", ">=", "gt", "ge"} + Comparison operation. Default: ">". Note that this comparison is what defines the season. + The end of the season happens when the condition is NOT met for `window` consecutive days. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when temperature is inferior to a threshold over a given number of days for the first time. - If there is no such day or if a growing season is not detected, returns np.nan. - If the growing season does not end within the time period, returns the last day of the period. + End of the growing season. Notes ----- @@ -1078,20 +1012,16 @@ def growing_season_end( where :math:`w` is the number of days where temperature should be inferior to a given threshold after a given date, and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. """ - thresh = convert_units_to(thresh, tas) - - # Note: The following operation is inverted here so that there is less confusion for users. - cond = ~compare(tas, op, thresh, constrain=("<=", "<")) - - out = cond.resample(time=freq).map( - rl.run_end_after_date, + return season( + tas, + thresh=thresh, + mid_date=mid_date, window=window, - date=mid_date, - dim="time", - coord="dayofyear", + freq=freq, + op=op, + constrain=(">", ">="), + stat="end", ) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas)) - return out @declare_units(tas="[temperature]", thresh="[temperature]") @@ -1099,16 +1029,18 @@ def growing_season_length( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", window: int = 6, - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", freq: str = "YS", op: str = ">=", ) -> xarray.DataArray: r"""Growing season length. - The number of days between the first occurrence of at least `N` (default: 6) consecutive days with mean daily - temperature over a threshold (default: 5℃) and the first occurrence of at least `N` consecutive days with mean - daily temperature below the same threshold after a certain date, usually July 1st (06-01) in the northern emispher - and January 1st (01-01) in the southern hemisphere. + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. + If the season starts but never ends, the length is computed up to the end of the resampling period. + If no season start is found, but the data is valid, a length of 0 is returned. Warnings -------- @@ -1122,8 +1054,9 @@ def growing_season_length( Threshold temperature on which to base evaluation. window : int Minimum number of days with temperature above threshold to mark the beginning and end of growing season. - mid_date : str - Date of the year after which to look for the end of the season. Should have the format '%m-%d'. + mid_date : str, optional + Date of the year before which the season must start and after which it can end. Should have the format '%m-%d'. + ``None`` removes that constraint. freq : str Resampling frequency. op : {">", ">=", "gt", "ge"} @@ -1167,16 +1100,16 @@ def growing_season_length( :cite:cts:`project_team_eca&d_algorithm_2013` """ - thresh = convert_units_to(thresh, tas) - cond = compare(tas, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tas, + thresh=thresh, + mid_date=mid_date, window=window, - date=mid_date, - dim="time", + freq=freq, + op=op, + constrain=(">", ">="), + stat="length", ) - return to_agg_units(out, tas, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -1207,7 +1140,7 @@ def frost_season_length( Minimum number of days with temperature below threshold to mark the beginning and end of frost season. mid_date : str, optional Date the must be included in the season. It is the earliest the end of the season can be. - If None, there is no limit. + ``None`` removes that constraint. thresh : Quantified Threshold temperature on which to base evaluation. freq : str @@ -1248,16 +1181,16 @@ def frost_season_length( >>> fsl_sh = frost_season_length(tasmin, freq="YS") """ - thresh = convert_units_to(thresh, tasmin) - cond = compare(tasmin, op, thresh, constrain=("<=", "<")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", + op=op, + stat="length", + freq=freq, + mid_date=mid_date, + constrain=("<", "<="), ) - return to_agg_units(out, tasmin, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -1265,13 +1198,16 @@ def frost_free_season_start( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", window: int = 5, + mid_date: DayOfYearStr | None = "07-01", + op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: r"""Start of the frost free season. - Day of the year of the start of a sequence of days with minimum temperatures consistently above or equal to a - threshold (default: 0℃), after a period of `N` days (default: 5) with minimum temperatures consistently - above the same threshold. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- @@ -1280,53 +1216,59 @@ def frost_free_season_start( thresh : Quantified Threshold temperature on which to base evaluation. window : int - Minimum number of days with temperature above threshold needed for evaluation. + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date that must be included in the season. + ``None`` removes that constraint. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when minimum temperature is superior to a threshold - over a given number of days for the first time. - If there is no such day or if a frost free season is not detected, returns np.nan. + Day of the year when the frost free season starts. Notes ----- Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 - to 365 or 366. The start date of the start of growing season is given by the smallest index :math:`i`: + to 365 or 366. The start date of the season is given by the smallest index :math:`i`: .. math:: \prod_{j=i}^{i+w} [x_j >= thresh] where :math:`w` is the number of days the temperature threshold should be met or exceeded, - and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. + and `i` must be earlier than `mid_date`. """ - thresh = convert_units_to(thresh, tasmin) - over = tasmin >= thresh - out = over.resample(time=freq).map(rl.first_run, window=window, coord="dayofyear") - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tasmin)) - return out + return season( + tasmin, + thresh=thresh, + window=window, + op=op, + stat="start", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), + ) @declare_units(tasmin="[temperature]", thresh="[temperature]") def frost_free_season_end( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", - mid_date: DayOfYearStr = "07-01", window: int = 5, + mid_date: DayOfYearStr | None = "07-01", + op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: r"""End of the frost free season. - Day of the year of the start of a sequence of days with minimum temperatures consistently below a threshold - (default: 0℃), after a period of `N` days (default: 5) with minimum temperatures consistently above the same - threshold. - - Warnings - -------- - The default `freq` and `mid_date` parameters are valid for the northern hemisphere. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- @@ -1334,89 +1276,105 @@ def frost_free_season_end( Minimum daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. - mid_date : str - Date of the year after which to look for the end of the season. Should have the format '%m-%d'. window : int - Minimum number of days with temperature below threshold needed for evaluation. + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date what must be included in the season. ``None`` removes that constraint. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when minimum temperature is inferior to a threshold over a given number of days for the first time. - If there is no such day or if a frost free season is not detected, returns np.nan. - If the frost free season does not end within the time period, returns the last day of the period. - """ - thresh = convert_units_to(thresh, tasmin) - cond = tasmin >= thresh + Day of the year when the frost free season starts. - out = cond.resample(time=freq).map( - rl.run_end_after_date, + Notes + ----- + Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 + to 365 or 366. The start date is given by the smallest index :math:`i`: + + .. math:: + + \prod_{k=i}^{i+w} [x_k >= thresh] + + while the end date is given bt the largest index :math:`j`: + + .. math:: + + \prod_{k=j}^{j+w} [x_k < thresh] + + where :math:`w` is the number of days the temperature threshold should be exceeded/subceeded. + An end is only valid if a start is also found and the end must happen later than `mid_date` + while the start must happen earlier. + """ + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", - coord="dayofyear", + op=op, + stat="end", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), ) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tasmin)) - return out @declare_units(tasmin="[temperature]", thresh="[temperature]") def frost_free_season_length( tasmin: xarray.DataArray, + thresh: Quantified = "0.0 degC", window: int = 5, mid_date: DayOfYearStr | None = "07-01", - thresh: Quantified = "0.0 degC", - freq: str = "YS", op: str = ">=", + freq: str = "YS", ) -> xarray.DataArray: - r"""Frost free season length. + r"""Length of the frost free season. - The number of days between the first occurrence of at least `N` (default: 5) consecutive days with minimum daily - temperature above a threshold (default: 0℃) and the first occurrence of at least `N` consecutive days with - minimum daily temperature below the same threshold. - A mid-date can be given to limit the earliest day the end of season can take. - - Warnings - -------- - The default `freq` and `mid_date` parameters are valid for the northern hemisphere. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- tasmin : xarray.DataArray Minimum daily temperature. - window : int - Minimum number of days with temperature above threshold to mark the beginning and end of frost free season. - mid_date : str, optional - Date the must be included in the season. It is the earliest the end of the season can be. - If None, there is no limit. thresh : Quantified Threshold temperature on which to base evaluation. + window : int + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date what must be included in the season. ``None`` removes that constraint. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. - op : {">", ">=", "gt", "ge"} - Comparison operation. Default: ">=". Returns ------- xarray.DataArray, [time] - Frost free season length. + Length of the frost free season. Notes ----- - Let :math:`TN_{ij}` be the minimum temperature at day :math:`i` of period :math:`j`. Then counted is - the number of days between the first occurrence of at least N consecutive days with: + Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 + to 365 or 366. The start date is given by the smallest index :math:`i`: .. math:: - TN_{ij} >= 0 ℃ + \prod_{k=i}^{i+w} [x_k >= thresh] - and the first subsequent occurrence of at least N consecutive days with: + while the end date is given bt the largest index :math:`j`: .. math:: - TN_{ij} < 0 ℃ + \prod_{k=j}^{j+w} [x_k < thresh] + + where :math:`w` is the number of days the temperature threshold should be exceeded/subceeded. + An end is only valid if a start is also found and the end must happen later than `mid_date` + while the start must happen earlier. Examples -------- @@ -1431,16 +1389,16 @@ def frost_free_season_length( >>> ffsl_sh = frost_free_season_length(tasmin, freq="YS-JUL") """ - thresh = convert_units_to(thresh, tasmin) - cond = compare(tasmin, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", + op=op, + stat="length", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), ) - return to_agg_units(out, tasmin, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -1452,17 +1410,17 @@ def frost_free_spell_max_length( op: str = ">=", resample_before_rl: bool = True, ) -> xarray.DataArray: - r"""Longest cold spell. + r"""Longest frost free spell. - Longest spell of low temperatures over a given period. - Longest series of at least {window} consecutive days with temperature at or below {thresh}. + Longest spell of warm temperatures over a given period. + Longest series of at least {window} consecutive days with temperature at or above the threshold. Parameters ---------- tasmin : xarray.DataArray Minimum daily temperature. thresh : Quantified - The temperature threshold needed to trigger a cold spell. + The temperature threshold needed to trigger a frost free spell. window : int Minimum number of days with temperatures above thresholds to qualify as a frost free day. freq : str diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 39831bc1f..282a4f40c 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -51,8 +51,10 @@ "get_zones", "interday_diurnal_temperature_range", "last_occurrence", + "season", "select_resample_op", "spell_length", + "spell_length_statistics", "statistics", "temperature_sum", "threshold_count", @@ -475,6 +477,89 @@ def spell_length_statistics( return to_agg_units(out, data, "count") +@declare_relative_units(thresh="") +def season( + data: xarray.DataArray, + thresh: Quantified, + window: int, + op: str, + stat: str, + freq: str, + mid_date: DayOfYearStr | None = None, + constrain: Sequence[str] | None = None, +) -> xarray.DataArray: + r"""Season. + + A season starts when a variable respects some condition for a consecutive run of `N` days. It stops + when the condition is inverted for `N` days. Runs where the condition is not met for fewer than `N` days + are thus allowed. Additionally a middle date can serve as a maximal start date and minimum end date. + + Parameters + ---------- + data : xarray.DataArray + Variable. + thresh : Quantified + Threshold on which to base evaluation. + window : int + Minimum number of days that the condition must be met / not met for the start / end of the season. + op : str + Comparison operation. + stat : {'start', 'end', 'length'} + Which season facet to return. + freq : str + Resampling frequency. + mid_date : DayOfYearStr, optional + An optional middle date. The start must happen before and the end after for the season to be valid. + constrain : Sequence of strings, optional + A list of acceptable comparison operators. Optional, but indicators wrapping this function should inject it. + + Returns + ------- + xarray.DataArray, [dimensionless] or [time] + Depends on 'stat'. If 'start' or 'end', this is the day of year of the season's start or end. + If 'length', this is the length of the season. + + Examples + -------- + >>> season(tas, thresh="0 °C", window=5, op=">", stat="start", freq="YS") + + Returns the start of the "frost-free" season. The season starts with 5 consecutive days with mean temperature + above 0°C and ends with as many days under or equal to 0°C. And end does not need to be found for a start to be valid. + + >>> season( + ... pr, + ... thresh="2 mm/d", + ... window=7, + ... op="<=", + ... mid_date="08-01", + ... stat="length", + ... freq="YS", + ... ) + + Returns the length of the "dry" season. The season starts with 7 consecutive days with precipitation under or equal to + 2 mm/d and ends with as many days above 2 mm/d. If no start is found before the first of august, the season is invalid. + If a start is found but no end, the end is set to the last day of the period (December 31st if the dataset is complete). + + See Also + -------- + xclim.indices.run_length.season_start + xclim.indices.run_length.season_length + xclim.indices.run_length.season_end + """ + thresh = convert_units_to(thresh, data, context="infer") + cond = compare(data, op, thresh, constrain=constrain) + FUNC = {"start": rl.season_start, "end": rl.season_end, "length": rl.season_length} + map_kwargs = dict(window=window, mid_date=mid_date) + if stat in ["start", "end"]: + map_kwargs["coord"] = "dayofyear" + out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) + if stat == "length": + return to_agg_units(out, data, "count") + # else, a date + out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(data)) + return out + + # CF-INDEX-META Indices diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 090e52872..f1bf6b0cb 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -755,18 +755,125 @@ def extract_events( return runs +def season_start( + da: xr.DataArray, + window: int, + mid_date: DayOfYearStr | None = None, + dim: str = "time", + coord: str | bool | None = False, +) -> xr.DataArray: + """Start of a season. + + See :py:func:`season`. + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive values to start and end the season. + mid_date : DayOfYearStr, optional + The date (in MM-DD format) that a season must include to be considered valid. + dim : str + Dimension along which to calculate the season (default: 'time'). + coord : Optional[str] + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (ex: 'dayofyear'). + + Returns + ------- + xr.DataArray + Start of the season, units depend on `coord`. + + See Also + -------- + season + season_end + season_length + """ + return first_run_before_date(da, window=window, date=mid_date, dim=dim, coord=coord) + + +def season_end( + da: xr.DataArray, + window: int, + mid_date: DayOfYearStr | None = None, + dim: str = "time", + coord: str | bool | None = False, + _beg: xr.DataArray | None = None, +) -> xr.DataArray: + """End of a season. + + See :py:func:`season`. Similar to :py:func:`first_run_after_date` but here a season + must have a start for an end to be valid. Also, if no end is found but a start was found + the end is set to the last element of the series. + + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive values to start and end the season. + mid_date : DayOfYearStr, optional + The date (in MM-DD format) that a run must include to be considered valid. + dim : str + Dimension along which to calculate consecutive run (default: 'time'). + coord : str, optional + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (ex: 'dayofyear'). + + Returns + ------- + xr.DataArray + End of the season, units depend on `coord`. + If there is a start is found but no end, the end is set to the last element. + + See Also + -------- + season + season_start + season_length + """ + # Fast path for `season` and `season_length` + if _beg is not None: + beg = _beg + else: + beg = season_start(da, window=window, dim=dim, mid_date=mid_date, coord=False) + # Invert the condition and mask all values after beginning + # we fillna(0) as so to differentiate series with no runs and all-nan series + not_da = (~da).where(da[dim].copy(data=np.arange(da[dim].size)) >= beg.fillna(0)) + end = first_run_after_date( + not_da, window=window, dim=dim, date=mid_date, coord=False + ) + if _beg is None: + # Where end is null and beg is not null (valid data, no end detected), put the last index + # Don't do this in the fast path, so that the length can use this information + end = xr.where(end.isnull() & beg.notnull(), da[dim].size - 1, end) + end = end.where(beg.notnull()) + if coord: + crd = da[dim] + if isinstance(coord, str): + crd = getattr(crd.dt, coord) + end = lazy_indexing(crd, end) + return end + + def season( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", coord: str | bool | None = False, ) -> xr.Dataset: """Calculate the bounds of a season along a dimension. A "season" is a run of True values that may include breaks under a given length (`window`). - The start is computed as the first run of `window` True values, then end as the first subsequent run - of `window` False values. If a date is passed, it must be included in the season. + The start is computed as the first run of `window` True values, and the end as the first subsequent run + of `window` False values. The end element is the first element after the season. + If a date is given, it must be included in the season, i.e. the start cannot occur later and the end cannot occur earlier. Parameters ---------- @@ -774,7 +881,7 @@ def season( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). @@ -786,7 +893,10 @@ def season( Returns ------- xr.Dataset - "dim" is reduced to "season_bnds" with 2 elements : season start and season end, both indices of da[dim]. + Dataset variables: + start : start of the season (index or units depending on ``coord``) + end : end of the season (index or units depending on ``coord``) + length : length of the season (in number of elements along ``dim``) Notes ----- @@ -797,52 +907,39 @@ def season( Example : Length of the "warm season", where T > 25°C, with date = 1st August. Let's say the temperature is over 25 for all June, but July and august have very cold temperatures. Instead of returning 30 days (June), the function will return 61 days (July + June). - """ - beg = first_run(da, window=window, dim=dim) - # Invert the condition and mask all values after beginning - # we fillna(0) as so to differentiate series with no runs and all-nan series - not_da = (~da).where(da[dim].copy(data=np.arange(da[dim].size)) >= beg.fillna(0)) - - # Mask also values after "date" - mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0) - if mid_idx.size == 0: - # The date is not within the group. Happens at boundaries. - base = da.isel({dim: 0}) # To have the proper shape - beg = xr.full_like(base, np.nan, float).drop_vars(dim) - end = xr.full_like(base, np.nan, float).drop_vars(dim) - length = xr.full_like(base, np.nan, float).drop_vars(dim) - else: - if date is not None: - # If the beginning was after the mid date, both bounds are NaT. - valid_start = beg < mid_idx.squeeze() - else: - valid_start = True - - not_da = not_da.where(da[dim] >= da[dim][mid_idx][0]) - end = first_run( - not_da, - window=window, - dim=dim, - ) - # If there was a beginning but no end, season goes to the end of the array - no_end = beg.notnull() & end.isnull() - - # Length - length = end - beg - # No end: length is actually until the end of the array, so it is missing 1 - length = xr.where(no_end, da[dim].size - beg, length) - # Where the beginning was before the mid-date, invalid. - length = length.where(valid_start) - # Where there were data points, but no season : put 0 length - length = xr.where(beg.isnull() & end.notnull(), 0, length) + The season's length is always the difference between the end and the start. Except if no + season end was found before the end of the data. In that case the end is set to last element and + the length is set to the data size minus the start index. Thus, for the specific case, :math:`length = end - start + 1`, + because the end falls on the last element of the season instead of the subsequent one. - # No end: end defaults to the last element (this differs from length, but heh) - end = xr.where(no_end, da[dim].size - 1, end) - - # Where the beginning was before the mid-date - beg = beg.where(valid_start) - end = end.where(valid_start) + See Also + -------- + season_start + season_end + season_length + """ + beg = season_start(da, window=window, dim=dim, mid_date=mid_date, coord=False) + # Use fast path in season_end : no recomputing of start, no masking of end where beg.isnull() and don't set end if none found + end = season_end( + da, window=window, dim=dim, mid_date=mid_date, _beg=beg, coord=False + ) + # Three cases : + # start no start + # end e - s 0 + # no end size - s 0 + # da is boolean, so we have no way of knowing if the absence of season is due to missing values or to an actual absence. + length = xr.where( + beg.isnull(), + 0, + # Where there is no end, from the start to the boundary + xr.where(end.isnull(), da[dim].size - beg, end - beg), + ) + # Now masks ends + # Still give an end if we didn't find any : put the last element + # This breaks the length = end - beg, but yields a truer length + end = xr.where(end.isnull() & beg.notnull(), da[dim].size - 1, end) + end = end.where(beg.notnull()) if coord: crd = da[dim] @@ -857,7 +954,6 @@ def season( coordstr = "index" out = xr.Dataset({"start": beg, "end": end, "length": length}) - out.start.attrs.update( long_name="Start of the season.", description=f"First {coordstr} of a run of at least {window} steps respecting the condition.", @@ -877,14 +973,10 @@ def season( def season_length( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", ) -> xr.DataArray: - """Return the length of the longest semi-consecutive run of True values (optionally including a given date). - - A "season" is a run of True values that may include breaks under a given length (`window`). - The start is computed as the first run of `window` True values, then end as the first subsequent run - of `window` False values. If a date is passed, it must be included in the season. + """Length of a season. Parameters ---------- @@ -892,7 +984,7 @@ def season_length( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). @@ -900,20 +992,15 @@ def season_length( Returns ------- xr.DataArray, [int] - Length of the longest run of True values along a given dimension (inclusive of a given date) - without breaks longer than a given length. - - Notes - ----- - The run can include holes of False or NaN values, so long as they do not exceed the window size. + Length of the season, in number of elements along dimension `time`. - If a date is given, the season start and end are forced to be on each side of this date. This means that - even if the "real" season has been over for a long time, this is the date used in the length calculation. - Example : Length of the "warm season", where T > 25°C, with date = 1st August. Let's say the temperature is over - 25 for all June, but July and august have very cold temperatures. Instead of returning 30 days (June), the function - will return 61 days (July + June). + See Also + -------- + season + season_start + season_end """ - seas = season(da, window, date, dim, coord=False) + seas = season(da, window, mid_date, dim, coord=False) return seas.length @@ -1053,6 +1140,54 @@ def last_run_before_date( return last_run(run, window=window, dim=dim, coord=coord) +def first_run_before_date( + da: xr.DataArray, + window: int, + date: DayOfYearStr | None = "07-01", + dim: str = "time", + coord: bool | str | None = "dayofyear", +) -> xr.DataArray: + """Return the index of the first item of the first run before a given date. + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive run to accumulate values. + date : DayOfYearStr + The date before which to look for the run. + dim : str + Dimension along which to calculate consecutive run (default: 'time'). + coord : bool or str, optional + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (e.g. 'dayofyear'). + + Returns + ------- + xr.DataArray + Index (or coordinate if `coord` is not False) of first item in the first valid run. + Returns np.nan if there are no valid runs. + """ + if date is not None: + mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0) + if ( + mid_idx.size == 0 + ): # The date is not within the group. Happens at boundaries. + return xr.full_like(da.isel({dim: 0}), np.nan, float).drop_vars(dim) + # Mask anything after the mid_date + window - 1 + # Thus, the latest run possible can begin on the day just before mid_idx + da = da.where(da[dim] < da[dim][mid_idx + window - 1][0]) + + return first_run( + da, + window=window, + dim=dim, + coord=coord, + ) + + @njit def _rle_1d(ia): y = ia[1:] != ia[:-1] # pairwise unequal (string safe)