diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb index 26d8f0b9b..2e325ce3b 100644 --- a/docs/notebooks/example.ipynb +++ b/docs/notebooks/example.ipynb @@ -238,7 +238,7 @@ "Almost all `xclim` indicators convert daily data to lower time frequencies, such as seasonal or annual values. This is done using `xarray.DataArray.resample` method. Resampling creates a grouped object over which you apply a reduction operation (e.g. mean, min, max). The list of available frequency is given in the link below, but the most often used are:\n", "\n", "- `YS`: annual starting in January\n", - "- `YS-JUL`: annual starting in July\n", + "- `AS-JUL`: annual starting in July\n", "- `MS`: monthly\n", "- `QS-DEC`: seasonal starting in December\n", "\n", diff --git a/docs/notebooks/xclim_training/XCLIM_calculate_index-Exemple.ipynb b/docs/notebooks/xclim_training/XCLIM_calculate_index-Exemple.ipynb index a5e055d0b..f1f699e5a 100644 --- a/docs/notebooks/xclim_training/XCLIM_calculate_index-Exemple.ipynb +++ b/docs/notebooks/xclim_training/XCLIM_calculate_index-Exemple.ipynb @@ -248,7 +248,7 @@ "All `xclim` indicators convert daily data to lower time frequencies, such as monthly or annual values. This is done using `xarray.DataArray.resample` method. Resampling creates a grouped object over which you apply a reduction operation (e.g. mean, min, max). The list of available frequency is given in the link below, but the most often used are: \n", "\n", "- YS: annual starting in January\n", - "- YS-JUL: annual starting in July\n", + "- AS-JUL: annual starting in July\n", "- MS: monthly\n", "- QS-DEC: seasonal starting in December\n", "- 7D: 7 day (weekly)\n", diff --git a/pyproject.toml b/pyproject.toml index c2f107faa..331754a72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,8 +44,8 @@ dependencies = [ "lmoments3>=1.0.5", "numba", "numpy>=1.16", - "pandas>=0.23,<2.0; python_version == '3.8'", - "pandas>=0.23; python_version >= '3.9'", + "pandas>=1.0,<2.0; python_version == '3.8'", + "pandas>=1.0; python_version >= '3.9'", "pint>=0.10", "pyyaml", "scikit-learn>=0.21.3", diff --git a/tests/test_atmos.py b/tests/test_atmos.py index 01ebcb3ea..214e0dbfc 100644 --- a/tests/test_atmos.py +++ b/tests/test_atmos.py @@ -272,7 +272,7 @@ def test_wind_power_potential_from_3h_series(): from xclim.testing.helpers import test_timeseries w = test_timeseries( - np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3H" + np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3h" ) out = atmos.wind_power_potential(wind_speed=w) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 018808958..26aaf8b1d 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -57,7 +57,7 @@ def da(index): ) -@pytest.mark.parametrize("freq", ["6480H", "302431T", "23144781S"]) +@pytest.mark.parametrize("freq", ["6480h", "302431min", "23144781s"]) def test_time_bnds(freq, datetime_index, cftime_index): da_datetime = da(datetime_index).resample(time=freq) da_cftime = da(cftime_index).resample(time=freq) @@ -146,7 +146,7 @@ def test_percentile_doy_invalid(): tas = xr.DataArray( [0, 1], dims=("time",), - coords={"time": pd.date_range("2000-01-01", periods=2, freq="H")}, + coords={"time": pd.date_range("2000-01-01", periods=2, freq="h")}, ) with pytest.raises(ValueError): percentile_doy(tas) @@ -155,10 +155,10 @@ def test_percentile_doy_invalid(): @pytest.mark.parametrize( "freqA,op,freqB,exp", [ - ("D", ">", "H", True), + ("D", ">", "h", True), ("2YS", "<=", "QS-DEC", False), ("4W", "==", "3W", False), - ("24H", "==", "D", True), + ("24h", "==", "D", True), ], ) def test_compare_offsets(freqA, op, freqB, exp): @@ -275,8 +275,8 @@ def test_get_calendar_errors(obj): ("standard", "noleap", True, "D"), ("noleap", "default", True, "D"), ("noleap", "all_leap", False, "D"), - ("proleptic_gregorian", "noleap", False, "4H"), - ("default", "noleap", True, "4H"), + ("proleptic_gregorian", "noleap", False, "4h"), + ("default", "noleap", True, "4h"), ], ) def test_convert_calendar(source, target, target_as_str, freq): @@ -311,7 +311,7 @@ def test_convert_calendar(source, target, target_as_str, freq): [ ("standard", "360_day", "D"), ("360_day", "default", "D"), - ("proleptic_gregorian", "360_day", "4H"), + ("proleptic_gregorian", "360_day", "4h"), ], ) @pytest.mark.parametrize("align_on", ["date", "year"]) @@ -356,7 +356,7 @@ def test_convert_calendar_360_days_random(): dims=("time",), coords={ "time": date_range( - "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default" + "2004-01-01", "2004-12-31T23:59:59", freq="12h", calendar="default" ) }, ) @@ -365,7 +365,7 @@ def test_convert_calendar_360_days_random(): dims=("time",), coords={ "time": date_range( - "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" + "2004-01-01", "2004-12-30T23:59:59", freq="12h", calendar="360_day" ) }, ) @@ -394,7 +394,7 @@ def test_convert_calendar_360_days_random(): "source,target,freq", [ ("standard", "noleap", "D"), - ("noleap", "default", "4H"), + ("noleap", "default", "4h"), ("noleap", "all_leap", "M"), ("360_day", "noleap", "D"), ("noleap", "360_day", "D"), @@ -415,7 +415,7 @@ def test_convert_calendar_missing(source, target, freq): np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") - assert xr.infer_freq(out.time) == freq + assert fix_freq(xr.infer_freq(out.time), warn=False) == freq if source == "360_day": assert out.time[-1].dt.day == 31 @@ -624,7 +624,7 @@ def test_doy_to_days_since(): [ ("4AS-JUL", 4, "A", True, "JUL"), ("M", 1, "M", False, None), - ("YS", 1, "A", True, "JAN"), + ("YS", 1, "Y", True, "JAN"), ("3A", 3, "A", False, "DEC"), ("D", 1, "D", True, None), ("3W", 21, "D", True, None), @@ -710,11 +710,14 @@ def test_convert_doy(): "freq,exp,to,warn", [ ("Y", "YE-DEC", "new", "end-anchored"), - ("YS", "YS-JAN", "new", "Pandas changed"), + ("AS", "YS-JAN", "new", "Pandas changed"), ("T", "min", "new", "T should now be written min"), ("56S", "56s", "new", "S should now be written s"), ("MS", "MS", "new", None), - ("AS-MAR", "YS-MAR", "old", None), + ("Y", "A-DEC", "inter", None), + ("AS", "AS-JAN", "inter", None), + ("T", "min", "inter", "T should now be written min"), + ("AS-MAR", "AS-MAR", "old", None), ("min", "T", "old", None), ("4Q-DEC", "4Q-DEC", "old", None), ], diff --git a/tests/test_checks.py b/tests/test_checks.py index 7f54dd66f..e5b36acc3 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -149,25 +149,25 @@ def test_check_hourly(self, date_range, random): } n = 100 - time = date_range("2000-01-01", freq="H", periods=n) + time = date_range("2000-01-01", freq="h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) - datachecks.check_freq(da, "H") + datachecks.check_freq(da, "h") - time = date_range("2000-01-01", freq="3H", periods=n) + time = date_range("2000-01-01", freq="3h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) with pytest.raises(ValidationError): - datachecks.check_freq(da, "H") + datachecks.check_freq(da, "h") with pytest.raises(ValidationError): - datachecks.check_freq(da, ["H", "D"]) + datachecks.check_freq(da, ["h", "D"]) - datachecks.check_freq(da, "H", strict=False) - datachecks.check_freq(da, ["H", "D"], strict=False) - datachecks.check_freq(da, "3H") - datachecks.check_freq(da, ["H", "3H"]) + datachecks.check_freq(da, "h", strict=False) + datachecks.check_freq(da, ["h", "D"], strict=False) + datachecks.check_freq(da, "3h") + datachecks.check_freq(da, ["h", "3h"]) with pytest.raises(ValidationError, match="Unable to infer the frequency of"): - datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3H") + datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3h") def test_common_time(self, tas_series, date_range, random): tas_attrs = { @@ -176,7 +176,7 @@ def test_common_time(self, tas_series, date_range, random): } n = 100 - time = date_range("2000-01-01", freq="H", periods=n) + time = date_range("2000-01-01", freq="h", periods=n) da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) # No freq @@ -187,7 +187,7 @@ def test_common_time(self, tas_series, date_range, random): datachecks.check_common_time([db, da]) # Not same freq - time = date_range("2000-01-01", freq="6H", periods=n) + time = date_range("2000-01-01", freq="6h", periods=n) db = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs) with pytest.raises(ValidationError, match="Inputs have different frequencies"): datachecks.check_common_time([db, da]) @@ -197,6 +197,6 @@ def test_common_time(self, tas_series, date_range, random): db["time"] = db.time + pd.Timedelta(30, "min") with pytest.raises( ValidationError, - match=r"All inputs have the same frequency \(H\), but they are not anchored on the same minutes", + match=r"All inputs have the same frequency \(h\), but they are not anchored on the same minutes", ): datachecks.check_common_time([db, da]) diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index f8718e659..6f5c0083b 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -104,7 +104,7 @@ def test_missing(self, ndq_series): np.testing.assert_array_equal(out.sel(time="1902").isnull(), True) def test_3hourly(self, pr_hr_series, random): - pr = pr_hr_series(random.random(366 * 24)).resample(time="3H").mean() + pr = pr_hr_series(random.random(366 * 24)).resample(time="3h").mean() out = generic.stats(pr, freq="MS", op="var") assert out.units == "kg^2 m-4 s-2" assert out.long_name == "Variance of variable" diff --git a/tests/test_helpers.py b/tests/test_helpers.py index fc808570d..2cb66ee39 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -88,7 +88,7 @@ def test_day_lengths(method): def test_cosine_of_solar_zenith_angle(): - time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="H") + time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="h") time = xr.DataArray(time, dims=("time",), coords={"time": time}, name="time") lat = xr.DataArray( [0, 45, 70], dims=("site",), name="lat", attrs={"units": "degree_north"} diff --git a/tests/test_indices.py b/tests/test_indices.py index 77989b7b8..03b3191e0 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -974,7 +974,7 @@ def test_southhemisphere(self, tas_series): tas = tas_series(np.zeros(2 * 365), start="2000/1/1") warm_period = tas.sel(time=slice("2000-11-01", "2001-03-01")) tas = tas.where(~tas.time.isin(warm_period.time), 280) - gsl = xci.growing_season_length(tas, mid_date="01-01", freq="AS-Jul") + gsl = xci.growing_season_length(tas, mid_date="01-01", freq="AS-JUL") np.testing.assert_array_equal(gsl.sel(time="2000-07-01"), 121) diff --git a/tests/test_missing.py b/tests/test_missing.py index f30c8ec84..f4b39091b 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -17,9 +17,9 @@ class TestMissingBase: def test_3hourly_input(self, random): """Creating array with 21 days of 3h""" n = 21 * 8 - time = xr.cftime_range(start="2002-01-01", periods=n, freq="3H") + time = xr.cftime_range(start="2002-01-01", periods=n, freq="3h") ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) - mb = missing.MissingBase(ts, freq="MS", src_timestep="3H") + mb = missing.MissingBase(ts, freq="MS", src_timestep="3h") # Make sure count is 31 * 8, because we're requesting a MS freq. assert mb.count == 31 * 8 @@ -35,7 +35,7 @@ def test_monthly_input(self, random): n = 5 time = xr.cftime_range(start="2002-06-01", periods=n, freq="MS") ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) - mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA") + mb = missing.MissingBase(ts, freq="YS", src_timestep="M", season="JJA") assert mb.count == 3 def test_seasonal_input(self, random): @@ -139,14 +139,14 @@ def test_no_freq(self, tasmin_series): t = list(range(31)) t.pop(5) ts2 = ts.isel(time=t) - miss = missing.missing_any(ts2, freq=None, src_timestep="H") + miss = missing.missing_any(ts2, freq=None, src_timestep="h") np.testing.assert_array_equal(miss, True) # With indexer miss = missing.missing_any(ts, freq=None, month=[7]) np.testing.assert_array_equal(miss, False) - miss = missing.missing_any(ts2, freq=None, month=[7], src_timestep="H") + miss = missing.missing_any(ts2, freq=None, month=[7], src_timestep="h") np.testing.assert_array_equal(miss, True) def test_hydro(self, open_dataset): @@ -264,7 +264,7 @@ def pr(self, pr_hr_series): def test_any(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.missing_any(pr, "D", src_timestep="H") + out = missing.missing_any(pr, "D", src_timestep="h") np.testing.assert_array_equal( out, [True] + 8 * [False] + [True], @@ -272,7 +272,7 @@ def test_any(self, pr_hr_series): def test_pct(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.missing_pct(pr, "D", src_timestep="H", tolerance=0.1) + out = missing.missing_pct(pr, "D", src_timestep="h", tolerance=0.1) np.testing.assert_array_equal( out, 9 * [False] + [True], @@ -280,7 +280,7 @@ def test_pct(self, pr_hr_series): def test_at_least_n_valid(self, pr_hr_series): pr = self.pr(pr_hr_series) - out = missing.at_least_n_valid(pr, "D", src_timestep="H", n=20) + out = missing.at_least_n_valid(pr, "D", src_timestep="h", n=20) np.testing.assert_array_equal( out, 9 * [False] + [True], diff --git a/tests/test_precip.py b/tests/test_precip.py index 17a1bacc0..a6f6af2c8 100644 --- a/tests/test_precip.py +++ b/tests/test_precip.py @@ -575,7 +575,7 @@ def test_days_over_precip_thresh__seasonal_indexer(open_dataset): per = pr.quantile(0.8, "time", keep_attrs=True) # WHEN out = atmos.days_over_precip_thresh( - pr, per, freq="AS", date_bounds=("01-10", "12-31") + pr, per, freq="YS", date_bounds=("01-10", "12-31") ) # THEN np.testing.assert_almost_equal(out[0], np.array([81.0, 66.0, 66.0, 75.0])) diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 918df2b41..3cbb97551 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -1144,7 +1144,7 @@ def test_tx90p__seasonal_indexer(self, tasmax_series): # create cold spell in june tas[175:180] = 1 # WHEN - out = atmos.tx90p(tas, t90, freq="AS", season="JJA") + out = atmos.tx90p(tas, t90, freq="YS", season="JJA") # THEN assert out[0] == 87 # non regression test @@ -1489,7 +1489,7 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_frequency(tg, freq="AS") + out = atmos.cold_spell_frequency(tg, freq="YS") np.testing.assert_array_equal(out, 1) @@ -1500,7 +1500,7 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_max_length(tg, freq="AS") + out = atmos.cold_spell_max_length(tg, freq="YS") np.testing.assert_array_equal(out, 5) @@ -1511,5 +1511,5 @@ def test_simple(self, tas_series): tg = tas_series(a + K2C, start="1/1/2000") - out = atmos.cold_spell_total_length(tg, freq="AS") + out = atmos.cold_spell_total_length(tg, freq="YS") np.testing.assert_array_equal(out, 8) diff --git a/xclim/core/bootstrapping.py b/xclim/core/bootstrapping.py index f32cf8318..3bc2df1ec 100644 --- a/xclim/core/bootstrapping.py +++ b/xclim/core/bootstrapping.py @@ -13,7 +13,7 @@ import xclim.core.utils -from .calendar import convert_calendar, parse_offset, percentile_doy +from .calendar import construct_offset, convert_calendar, parse_offset, percentile_doy BOOTSTRAP_DIM = "_bootstrap" @@ -202,12 +202,7 @@ def bootstrap_func(compute_index_func: Callable, **kwargs) -> xarray.DataArray: def _get_bootstrap_freq(freq): _, base, start_anchor, anchor = parse_offset(freq) # noqa - bfreq = "A" - if start_anchor: - bfreq += "S" - if base in ["A", "Q"] and anchor is not None: - bfreq = f"{bfreq}-{anchor}" - return bfreq + return construct_offset(1, "Y", start_anchor, anchor) def _get_year_label(year_dt) -> str: diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index fa3e00dbc..06d032ec3 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -24,7 +24,7 @@ from .formatting import update_xclim_history -FREQ_PATT = re.compile(r"(?P\d*)(?P[A-Za-z]+)(-(?P[A-Z]{3}))?") +FREQ_PATT = re.compile(r"-?(?P\d*)(?P[A-Za-z]+)(-(?P[A-Z]{3}))?") __all__ = [ "DayOfYearStr", @@ -273,7 +273,8 @@ def convert_doy( Name of the temporal dimension. """ source_cal = source_cal or source.attrs.get("calendar", get_calendar(source[dim])) - is_calyear = xr.infer_freq(source[dim]) in ("AS-JAN", "A-DEC") + # FIXME: Fix choices when we pin pandas >= 2.2 + is_calyear = xr.infer_freq(source[dim]) in ("AS-JAN", "YS-JAN", "A-DEC", "YE-DEC") if is_calyear: # Fast path year_of_the_doy = source[dim].dt.year @@ -765,6 +766,10 @@ def compare_offsets(freqA: str, op: str, freqB: str) -> bool: # noqa """ from ..indices.generic import get_op # pylint: disable=import-outside-toplevel + # FIXME: Remove when we pin pandas >= 2.2 + freqA = fix_freq(freqA) + freqB = fix_freq(freqB) + # Get multiplier and base frequency t_a, b_a, _, _ = parse_offset(freqA) t_b, b_b, _, _ = parse_offset(freqB) @@ -801,6 +806,21 @@ def compare_offsets(freqA: str, op: str, freqB: str) -> bool: # noqa ] +def _get_freq_version(version: str | None = None) -> str: + if version is None: + if Version("2023.11.0") <= Version(xr.__version__) and Version( + "2.2" + ) <= Version(pd.__version__): + version = "new" + elif Version("2023.11.0") <= Version(xr.__version__) or Version( + "2.2" + ) <= Version(pd.__version__): + version = "inter" + else: + version = "old" + return version + + def parse_offset(freq: str) -> Sequence[str]: """Parse an offset string. @@ -847,9 +867,7 @@ def parse_offset(freq: str) -> Sequence[str]: if base in "AYQ" and anchor is None: anchor = "JAN" if start else "DEC" - if base == "A": - base = "Y" - elif base == "W": + if base == "W": mult = 7 * mult base = "D" anchor = None @@ -882,7 +900,7 @@ def construct_offset( If True and base in [Y, A, Q, M], adds the "S" flag. If False and base in [Y, A, Q, M] and the "new" version is needed, adds the "E" flag. anchor: str, optional - The month anchor of the offset. Defaults to JAN for bases AS, Y and QS and to DEC for bases A, AE, Q and QE. + The month anchor of the offset. Defaults to JAN for bases AS, YS and QS and to DEC for bases A, Y, YE, Q and QE. version : {'new', 'old'}, optional Which version to return. If None (default), the new version is returned if xarray >= 2023.11.0 or pandas >= 2.2. @@ -896,13 +914,8 @@ def construct_offset( ----- This provides the mirror opposite functionality of :py:func:`parse_offset`. """ - if ( - version is None - and ( - Version("2023.11.0") <= Version(xr.__version__) - or Version("2.2") <= Version(pd.__version__) - ) - ) or version == "new": + version = _get_freq_version(version) + if version == "new": defend = "E" else: defend = "" @@ -912,6 +925,11 @@ def construct_offset( start = "" if anchor is None and base in "AQY": anchor = "JAN" if start_anchored else "DEC" + if version in ["inter", "old"] and base == "Y": + if anchor == "JAN": + anchor = "" + else: + base = "A" return ( f"{mult if mult > 1 else ''}{base}{start}{'-' if anchor else ''}{anchor or ''}" ) @@ -928,32 +946,55 @@ def construct_offset( # TODO: This can be removed when we pin pandas >= 2.2 -def fix_freq(freq: str, version: str | None = None): +def fix_freq(freq: str, version: str | None = None, warn: bool = True): """ Convert the given freq code to the requested version, defaulting to the one appropriate for the installed packages. - Warn if the old version was given but the new one is needed. + Warn if the old version was given but a new one is needed. + + Parameters + ---------- + freq : str + A frequency code as defined by pandas but with the same restrictions for cftime as xarray. + Any code supported by pandas > 1.5 are supported. + version : {'new', 'inter', 'old'}, optional + If None (default), the version is guessed from the version of the installed packages. + "new" means the default syntax supported by pandas >= 2.2 and xarray >= 2023.11.0. + "old" means the syntax of pandas < 2.2 and xarray < 2023.11.0. + "inter" is for the special case where pandas < 2.2 but xarray >= 2023.11.0. + This is the same as "new" for subdaily frequencies, and the same as "old" for coarser frequencies. + warn : bool + Whether to emit a warning or not if an old frequency was passed where a new one is needed. + + Returns + ------- + str : Frequency code """ + if freq is None: + return freq mult, base, start, anchor = parse_offset(freq) - warn = "" - if version == "new" or ( - version is None - and ( - Version("2023.11.0") <= Version(xr.__version__) - or Version("2.2") <= Version(pd.__version__) - ) - ): + msgs = [] + version = _get_freq_version(version) + if version == "new": + if base == "A": + base = "Y" + msgs.append("annual frequency is now only written with 'Y'") + if base in "YQM" and (not start and "E" not in freq): + msgs.append( + f"end-anchored {base} periods should now explicitly say it with {base}E" + ) + version = "new" + if version in ["new", "inter"]: # we want new version if base in FREQ_OLD_NEW: prev_base = base base = FREQ_OLD_NEW[base] - warn = f": {prev_base} should now be written {base}" - elif base in "YQM" and (not start and "E" not in freq): - warn = f": end-anchored {base} periods should now explicitly say it with {base}E" - correct = construct_offset(mult, base, start, anchor, version="new") - if correct != freq: + msgs.append(f"{prev_base} should now be written {base}") + + correct = construct_offset(mult, base, start, anchor, version=version) + if warn and msgs: warnings.warn( - f"Pandas changed the default frequenies syntax {warn} ({freq} -> {correct})", + f"Pandas changed the default frequenies syntax {', '.join(msgs)} ({freq} -> {correct})", FutureWarning, ) else: @@ -992,6 +1033,9 @@ def is_offset_divisor(divisor: str, offset: str): >>> is_offset_divisor("D", "M") True """ + divisor = fix_freq(divisor) + offset = fix_freq(offset) + if compare_offsets(divisor, ">", offset): return False # Reconstruct offsets anchored at the start of the period @@ -1003,7 +1047,7 @@ def is_offset_divisor(divisor: str, offset: str): offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) - if bA in "WDHTLUN" or bB in "WDHTLUN": + if bA not in "YAQM" or bB not in "YAQM": # Simple length comparison is sufficient for submonthly freqs # In case one of bA or bB is > W, we test many to be sure. tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13) @@ -1152,13 +1196,13 @@ def time_bnds( # noqa: C901 time : DataArray, Dataset, CFTimeIndex, DatetimeIndex, DataArrayResample or DatasetResample Object which contains a time index as a proxy representation for a period index. freq : str, optional - String specifying the frequency/offset such as 'MS', '2D', or '3T' + String specifying the frequency/offset such as 'MS', '2D', or '3min' If not given, it is inferred from the time index, which means that index must have at least three elements. precision : str, optional A timedelta representation that :py:class:`pandas.Timedelta` understands. The time bounds will be correct up to that precision. If not given, - 1 ms ("1U") is used for CFtime indexes and 1 ns ("1N") for numpy datetime64 indexes. + 1 ms is used for CFtime indexes and 1 ns for numpy datetime64 indexes. Returns ------- @@ -1170,11 +1214,11 @@ def time_bnds( # noqa: C901 Notes ----- xclim assumes that indexes for greater-than-day frequencies are "floored" down to a daily resolution. - For example, the coordinate "2000-01-31 00:00:00" with a "M" frequency is assumed to mean a period + For example, the coordinate "2000-01-31 00:00:00" with a "ME" frequency is assumed to mean a period going from "2000-01-01 00:00:00" to "2000-01-31 23:59:59.999999". Similarly, it assumes that daily and finer frequencies yield indexes pointing to the period's start. - So "2000-01-31 00:00:00" with a "3H" frequency, means a period going from "2000-01-31 00:00:00" to + So "2000-01-31 00:00:00" with a "3h" frequency, means a period going from "2000-01-31 00:00:00" to "2000-01-31 02:59:59.999999". """ if isinstance(time, (xr.DataArray, xr.Dataset)): @@ -1200,32 +1244,33 @@ def time_bnds( # noqa: C901 elif hasattr(freq, "freqstr"): # When freq is a Offset freq = freq.freqstr + elif isinstance(freq, str): + freq = fix_freq(freq) freq_base, freq_is_start = parse_offset(freq)[1:3] # Normalizing without using `.normalize` because cftime doesn't have it - floor = {"hour": 0, "minute": 0, "second": 0, "microsecond": 0, "nanosecond": 0} - if freq_base in "HTSLUN": # This is verbose, is there a better way? - floor.pop("hour") - if freq_base in "TSLUN": - floor.pop("minute") - if freq_base in "SLUN": - floor.pop("second") - if freq_base in "UN": - floor.pop("microsecond") - if freq_base in "N": - floor.pop("nanosecond") + norm_targets = [ + ("hour", "h"), + ("minute", "min"), + ("second", "s"), + ("microsecond", "us"), + ("nanosecond", "ns"), + ] + floor = { + name: 0 for name, code in norm_targets if compare_offsets(freq_base, ">", code) + } if isinstance(time, xr.CFTimeIndex): period = xr.coding.cftime_offsets.to_offset(freq) is_on_offset = period.onOffset - eps = pd.Timedelta(precision or "1U").to_pytimedelta() + eps = pd.Timedelta(precision or "1us").to_pytimedelta() day = pd.Timedelta("1D").to_pytimedelta() - floor.pop("nanosecond") # unsuported by cftime + floor.pop("nanosecond", None) # unsuported by cftime else: period = pd.tseries.frequencies.to_offset(freq) is_on_offset = period.is_on_offset - eps = pd.Timedelta(precision or "1N") + eps = pd.Timedelta(precision or "1ns") day = pd.Timedelta("1D") def shift_time(t): @@ -1506,7 +1551,7 @@ def date_range_like(source: xr.DataArray, calendar: str) -> xr.DataArray: Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month, then the 31st is appended to the range. """ - freq = xr.infer_freq(source) + freq = fix_freq(xr.infer_freq(source), warn=False) if freq is None: raise ValueError( "`date_range_like` was unable to generate a range as the source frequency was not inferrable." diff --git a/xclim/core/datachecks.py b/xclim/core/datachecks.py index f1dba1e5e..05e8e5f70 100644 --- a/xclim/core/datachecks.py +++ b/xclim/core/datachecks.py @@ -10,7 +10,7 @@ import xarray as xr -from .calendar import compare_offsets, parse_offset +from .calendar import compare_offsets, fix_freq, parse_offset from .options import datacheck from .utils import ValidationError @@ -24,12 +24,12 @@ def check_freq(var: xr.DataArray, freq: str | Sequence[str], strict: bool = True var : xr.DataArray Input array. freq : str or sequence of str - The expected temporal frequencies, using Pandas frequency terminology ({'A', 'M', 'D', 'H', 'T', 'S', 'L', 'U'}) + The expected temporal frequencies, using Pandas frequency terminology ({'Y', 'M', 'D', 'h', 'min', 's', 'ms', 'us'}) and multiples thereof. To test strictly for 'W', pass '7D' with `strict=True`. This ignores the start flag and the anchor (ex: 'AS-JUL' will validate against 'Y'). strict : bool - Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3H' series - will not raise an error if freq is set to 'H'. + Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3h' series + will not raise an error if freq is set to 'h'. Raises ------ @@ -39,8 +39,8 @@ def check_freq(var: xr.DataArray, freq: str | Sequence[str], strict: bool = True """ if isinstance(freq, str): freq = [freq] - exp_base = [parse_offset(frq)[1] for frq in freq] - v_freq = xr.infer_freq(var.time) + exp_base = [parse_offset(fix_freq(frq))[1] for frq in freq] + v_freq = fix_freq(xr.infer_freq(var.time), warn=False) if v_freq is None: raise ValidationError( "Unable to infer the frequency of the time series. " @@ -83,7 +83,7 @@ def check_common_time(inputs: Sequence[xr.DataArray]): Input arrays. """ # Check all have the same freq - freqs = [xr.infer_freq(da.time) for da in inputs] + freqs = [fix_freq(xr.infer_freq(da.time), warn=False) for da in inputs] if None in freqs: raise ValidationError( "Unable to infer the frequency of the time series. " @@ -98,7 +98,8 @@ def check_common_time(inputs: Sequence[xr.DataArray]): # Check if anchor is the same freq = freqs[0] base = parse_offset(freq)[1] - fmt = {"H": ":%M", "D": "%H:%M"} + # FIXME: Remove H when we pin pandas >= 2.2 + fmt = {"H": ":%M", "h": ":%M", "D": "%H:%M"} if base in fmt: outs = {da.indexes["time"][0].strftime(fmt[base]) for da in inputs} if len(outs) > 1: diff --git a/xclim/core/dataflags.py b/xclim/core/dataflags.py index 86f0606a0..6c7078daa 100644 --- a/xclim/core/dataflags.py +++ b/xclim/core/dataflags.py @@ -16,7 +16,7 @@ from ..indices.generic import binary_ops from ..indices.run_length import suspicious_run -from .calendar import climatological_mean_doy, within_bnds_doy +from .calendar import climatological_mean_doy, fix_freq, within_bnds_doy from .formatting import update_xclim_history from .units import convert_units_to, declare_units, infer_context, str2pint from .utils import ( @@ -703,7 +703,7 @@ def _missing_vars(function, dataset: xarray.Dataset, var_provided: str): # Aggregation if freq is not None: - out = out.resample(time=freq).any() + out = out.resample(time=fix_freq(freq)).any() if dims is not None: out = out.any(dims) diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index af0d359ef..f9d9bdb6b 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -113,7 +113,7 @@ def format_field(self, value, format_spec): The base values may be given using unix shell-like patterns: >>> fmt = AttrFormatter( - ... {"AS-*": ["annuel", "annuelle"], "MS": ["mensuel", "mensuelle"]}, + ... {"YS-*": ["annuel", "annuelle"], "MS": ["mensuel", "mensuelle"]}, ... ["m", "f"], ... ) >>> fmt.format( @@ -163,6 +163,7 @@ def _match_value(self, value): # Arguments to "freq" "D": ["daily", "days"], "YS": ["annual", "years"], + "YS-*": ["annual", "years"], "AS-*": ["annual", "years"], "MS": ["monthly", "months"], "QS-*": ["seasonal", "seasons"], @@ -571,7 +572,7 @@ def _gen_parameters_section(parameters: dict, allowed_periods: list[str] = None) desc_str = param.description if param.kind == InputKind.FREQ_STR: desc_str += ( - " See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset" + " See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects" "-aliases for available options." ) if allowed_periods is not None: diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index d24a0c213..dbba1f928 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -120,7 +120,7 @@ from .. import indices from . import datachecks -from .calendar import parse_offset, select_time +from .calendar import fix_freq, parse_offset, select_time from .cfchecks import cfcheck_from_name from .formatting import ( AttrFormatter, @@ -906,6 +906,8 @@ def _parse_variables_from_call(self, args, kwds) -> tuple[OrderedDict, dict]: # If a non-optional variable OR None, store the arg if param.kind == InputKind.VARIABLE or data is not None: das[name] = data + elif param.kind == InputKind.FREQ_STR and params[name] is not None: + params[name] = fix_freq(params[name]) else: params[name] = param.value @@ -1478,7 +1480,7 @@ class ResamplingIndicator(CheckMissingIndicator): Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. allowed_periods : Sequence[str], optional A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be - computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the + computed annually only will have `allowed_periods=["Y"]`. `None` means "any period" or that the indicator doesn't take a `freq` argument. """ @@ -1559,7 +1561,7 @@ class Daily(ResamplingIndicator): class Hourly(ResamplingIndicator): """Class for hourly inputs and resampling computes.""" - src_freq = "H" + src_freq = fix_freq("h") base_registry["Indicator"] = Indicator diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 1eb95eebc..b4bd223a2 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -28,7 +28,9 @@ import xarray as xr from .calendar import ( + compare_offsets, date_range, + fix_freq, get_calendar, is_offset_divisor, parse_offset, @@ -51,7 +53,8 @@ "register_missing_method", ] -_np_timedelta64 = {"D": "timedelta64[D]", "H": "timedelta64[h]"} +# FIXME: Remove H when we pin xarray >= 2023.11.0 +_np_timedelta64 = {"D": "timedelta64[D]", "h": "timedelta64[h]", "H": "timedelta64[h]"} class MissingBase: @@ -65,7 +68,7 @@ class MissingBase: def __init__(self, da, freq, src_timestep, **indexer): if src_timestep is None: - src_timestep = xr.infer_freq(da.time) + src_timestep = fix_freq(xr.infer_freq(da.time), warn=False) if src_timestep is None: raise ValueError( "`src_timestep` must be given as it cannot be inferred." @@ -129,6 +132,7 @@ def prepare(self, da, freq, src_timestep, **indexer): flagged. """ # This function can probably be made simpler once CFPeriodIndex is implemented. + freq = fix_freq(freq) if freq is not None else None null = self.is_null(da, freq, **indexer) p_freq, _ = self.split_freq(freq) @@ -215,7 +219,7 @@ class MissingAny(MissingBase): Input array. freq: str Resampling frequency. - src_timestep: {"D", "H", "M"} + src_timestep: {"D", "h", "M"} Expected input frequency. indexer: {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter @@ -291,11 +295,11 @@ def __init__(self, da, freq, src_timestep, **indexer): @classmethod def execute(cls, da, freq, src_timestep, options, indexer): """Create the instance and call it in one operation.""" - if freq[0] not in ["Y", "A", "Q", "M"]: + if compare_offsets(freq, "<", "MS"): raise ValueError( "MissingWMO can only be used with Monthly or longer frequencies." ) - obj = cls(da, "M", src_timestep, **indexer) + obj = cls(da, "MS", src_timestep, **indexer) miss = obj(**options) # Replace missing months by NaNs mda = miss.where(miss == 0) @@ -334,7 +338,7 @@ class MissingPct(MissingBase): Resampling frequency. tolerance : float Fraction of missing values that are tolerated [0,1]. - src_timestep : {"D", "H"} + src_timestep : {"D", "h"} Expected input frequency. indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, @@ -371,7 +375,7 @@ class AtLeastNValid(MissingBase): Resampling frequency. n : int Minimum of valid values required. - src_timestep : {"D", "H"} + src_timestep : {"D", "h"} Expected input frequency. indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter @@ -386,7 +390,7 @@ class AtLeastNValid(MissingBase): def __init__(self, da, freq, src_timestep, **indexer): # No need to compute count, so no check required on `src_timestep`. - self.null = self.is_null(da, freq, **indexer) + self.null = self.is_null(da, fix_freq(freq), **indexer) self.count = None # Not needed def is_missing(self, null, count, n: int = 20): @@ -442,29 +446,29 @@ def execute(cls, da, freq, src_timestep, options, indexer): def missing_any(da, freq, src_timestep=None, **indexer): # noqa: D103 - src_timestep = src_timestep or xr.infer_freq(da.time) + src_timestep = src_timestep or fix_freq(xr.infer_freq(da.time), warn=False) return MissingAny(da, freq, src_timestep, **indexer)() def missing_wmo(da, freq, nm=11, nc=5, src_timestep=None, **indexer): # noqa: D103 - src_timestep = src_timestep or xr.infer_freq(da.time) + src_timestep = src_timestep or fix_freq(xr.infer_freq(da.time), warn=False) return MissingWMO.execute( da, freq, src_timestep, options=dict(nm=nm, nc=nc), indexer=indexer ) def missing_pct(da, freq, tolerance, src_timestep=None, **indexer): # noqa: D103 - src_timestep = src_timestep or xr.infer_freq(da.time) + src_timestep = src_timestep or fix_freq(xr.infer_freq(da.time), warn=False) return MissingPct(da, freq, src_timestep, **indexer)(tolerance=tolerance) def at_least_n_valid(da, freq, n=1, src_timestep=None, **indexer): # noqa: D103 - src_timestep = src_timestep or xr.infer_freq(da.time) + src_timestep = src_timestep or fix_freq(xr.infer_freq(da.time), warn=False) return AtLeastNValid(da, freq, src_timestep, **indexer)(n=n) def missing_from_context(da, freq, src_timestep=None, **indexer): # noqa: D103 - src_timestep = src_timestep or xr.infer_freq(da.time) + src_timestep = src_timestep or fix_freq(xr.infer_freq(da.time), warn=False) return FromContext.execute(da, freq, src_timestep, options={}, indexer=indexer) diff --git a/xclim/core/units.py b/xclim/core/units.py index 8f1b31ff7..918d0d28d 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -26,7 +26,7 @@ from boltons.funcutils import wraps from yaml import safe_load -from .calendar import date_range, get_calendar, parse_offset +from .calendar import date_range, fix_freq, get_calendar, parse_offset from .options import datacheck from .utils import InputKind, Quantified, ValidationError, infer_kind_from_parameter @@ -457,6 +457,7 @@ def cf_conversion(standard_name: str, conversion: str, direction: str) -> str | FREQ_UNITS = { "N": "ns", + "U": "us", "L": "ms", "S": "s", "T": "min", @@ -502,11 +503,11 @@ def infer_sampling_units( dimmed = getattr(da, dim) freq = xr.infer_freq(dimmed) if freq is None: - freq = deffreq + freq = fix_freq(deffreq) multi, base, _, _ = parse_offset(freq) try: - out = multi, FREQ_UNITS[base] + out = multi, base if base in FREQ_UNITS.values() else FREQ_UNITS[base] except KeyError as err: raise ValueError( f"Sampling frequency {freq} has no corresponding units." @@ -636,7 +637,7 @@ def _rate_and_amount_converter( ) from err if freq is not None: multi, base, start_anchor, _ = parse_offset(freq) - if base in ["M", "Q", "A"]: + if base in "YAQM": start = time.indexes[dim][0] if not start_anchor: # Anchor is on the end of the period, substract 1 period. @@ -653,13 +654,14 @@ def _rate_and_amount_converter( attrs=da[dim].attrs, ) else: - m, u = multi, FREQ_UNITS[base] + m = multi + u = base if base in FREQ_UNITS.values() else FREQ_UNITS[base] # Freq is month, season or year, which are not constant units, or simply freq is not inferrable. if u is None: # Get sampling period lengths in nanoseconds # In the case with no freq, last period as the same length as the one before. - # In the case with freq in M, Q, A, this has been dealt with above in `time` + # In the case with freq in M, Q, Y, this has been dealt with above in `time` # and `label` has been updated accordingly. dt = ( time.diff(dim, label=label) diff --git a/xclim/data/anuclim.yml b/xclim/data/anuclim.yml index bdbde1308..a691521ee 100644 --- a/xclim/data/anuclim.yml +++ b/xclim/data/anuclim.yml @@ -15,7 +15,7 @@ references: ANUCLIM https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6) base: ResamplingIndicator indicators: P10_MeanTempWarmestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tg_mean_warmcold_quarter cf_attrs: @@ -25,7 +25,7 @@ indicators: parameters: op: warmest P11_MeanTempColdestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tg_mean_warmcold_quarter cf_attrs: @@ -35,7 +35,7 @@ indicators: parameters: op: coldest P12_AnnualPrecip: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: prcptot cf_attrs: @@ -45,7 +45,7 @@ indicators: units: mm context: hydro P13_PrecipWettestPeriod: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_period cf_attrs: @@ -56,7 +56,7 @@ indicators: op: wettest context: hydro P14_PrecipDriestPeriod: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_period cf_attrs: @@ -67,7 +67,7 @@ indicators: op: driest context: hydro P15_PrecipSeasonality: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: precip_seasonality cf_attrs: @@ -76,7 +76,7 @@ indicators: "The standard deviation of the precipitation estimates expressed as a percentage of the mean of those estimates." P16_PrecipWettestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_quarter cf_attrs: @@ -86,7 +86,7 @@ indicators: parameters: op: wettest P17_PrecipDriestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_quarter cf_attrs: @@ -97,7 +97,7 @@ indicators: op: driest P18_PrecipWarmestQuarter: src_freq: ['D', '7D', 'M'] - allowed_periods: [A] + allowed_periods: [Y] compute: prcptot_warmcold_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -107,7 +107,7 @@ indicators: op: warmest P19_PrecipColdestQuarter: src_freq: ['D', '7D', 'M'] - allowed_periods: [A] + allowed_periods: [Y] compute: prcptot_warmcold_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -116,7 +116,7 @@ indicators: parameters: op: coldest P1_AnnMeanTemp: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tg_mean cf_attrs: @@ -125,7 +125,7 @@ indicators: long_name: Annual Mean Temperature standard_name: air_temperature P2_MeanDiurnalRange: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: daily_temperature_range cf_attrs: @@ -133,14 +133,14 @@ indicators: long_name: Mean Diurnal Range cell_methods: "time: range" P3_Isothermality: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: isothermality cf_attrs: cell_methods: "time: range" description: "The mean diurnal range (P2) divided by the Annual Temperature Range (P7)." P4_TempSeasonality: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: temperature_seasonality cf_attrs: @@ -150,7 +150,7 @@ indicators: For this calculation, the mean in degrees Kelvin is used. This avoids the possibility of having to divide by zero, but it does mean that the values are usually quite small." P5_MaxTempWarmestPeriod: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tx_max cf_attrs: @@ -160,7 +160,7 @@ indicators: units: K cell_methods: "time: maximum" P6_MinTempColdestPeriod: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tn_min cf_attrs: @@ -170,7 +170,7 @@ indicators: units: K cell_methods: "time: minimum" P7_TempAnnualRange: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: extreme_temperature_range input: @@ -184,7 +184,7 @@ indicators: freq: default: YS P8_MeanTempWettestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tg_mean_wetdry_quarter cf_attrs: @@ -194,7 +194,7 @@ indicators: parameters: op: wettest P9_MeanTempDriestQuarter: - allowed_periods: [A] + allowed_periods: [Y] src_freq: ['D', '7D', 'M'] compute: tg_mean_wetdry_quarter cf_attrs: diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 2db49841e..a1cc95aab 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -21,6 +21,13 @@ "annuelles", "années" ], + "YS-*": [ + "annuel", + "annuelle", + "annuels", + "annuelles", + "années" + ], "AS-*": [ "annuel", "annuelle", diff --git a/xclim/data/schema.yml b/xclim/data/schema.yml index cc3a8f74d..44511ae23 100644 --- a/xclim/data/schema.yml +++ b/xclim/data/schema.yml @@ -9,7 +9,7 @@ variables: map(include('variable'), key=regex(r'^[\w]+$'), required=False) --- indicator: abstract: str(required=False) - allowed_periods: list(enum('A', 'Q', 'M', 'W'), required=False) + allowed_periods: list(enum('Y', 'Q', 'M', 'W'), required=False) src_freq: list(str(), required=False) base: str(required=False) compute: str(required=False) diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index 6dd626cad..da9f74b45 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -11,7 +11,12 @@ import numpy as np import xarray as xr -from xclim.core.calendar import common_calendar, convert_calendar, get_calendar +from xclim.core.calendar import ( + common_calendar, + convert_calendar, + fix_freq, + get_calendar, +) from xclim.core.formatting import update_history from xclim.core.utils import calc_perc @@ -105,7 +110,7 @@ def create_ensemble( ds = _ens_align_datasets( datasets, multifile, - resample_freq, + fix_freq(resample_freq) if resample_freq else None, calendar=calendar, cal_kwargs=cal_kwargs or {}, **xr_kwargs, diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index af70b63a4..b611d7847 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -648,8 +648,8 @@ def dryness_index( :cite:cts:`tonietto_multicriteria_2004,riou_determinisme_1994` """ - if parse_offset(freq) != (1, "A", True, "JAN"): - raise ValueError(f"Freq not allowed: {freq}. Must be `YS` or `AS-JAN`") + if parse_offset(freq) != (1, "Y", True, "JAN"): + raise ValueError(f"Freq not allowed: {freq}. Must be `AS-JAN`") # Resample all variables to monthly totals in mm units. evspsblpot = ( @@ -922,7 +922,7 @@ def rain_season( method_dry_end: str = "per_day", date_min_end: DayOfYearStr = "09-01", date_max_end: DayOfYearStr = "12-31", - freq="AS-JAN", + freq="YS", ): """Find the length of the rain season and the day of year of its start and its end. diff --git a/xclim/indices/_anuclim.py b/xclim/indices/_anuclim.py index 7608131a7..ddf21c550 100644 --- a/xclim/indices/_anuclim.py +++ b/xclim/indices/_anuclim.py @@ -584,7 +584,7 @@ def _to_quarter( else: raise NotImplementedError( - f'Unknown input time frequency "{freq}": must be one of "D", "W" or "M".' + f'Unknown input time frequency "{freq}": must be one of "D", "W" or "MS".' ) if tas is not None: diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index e4cba0018..fbb31a3b9 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -1062,7 +1062,7 @@ def growing_season_length( For the Northern Hemisphere: - >>> gsl_nh = growing_season_length(tas, mid_date="07-01", freq="AS") + >>> gsl_nh = growing_season_length(tas, mid_date="07-01", freq="YS") If working in the Southern Hemisphere, one can use: diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index f552eb5b3..a8d49e9c8 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -17,6 +17,7 @@ from xclim.core.calendar import ( convert_calendar, doy_to_days_since, + fix_freq, get_calendar, select_time, ) @@ -132,7 +133,7 @@ def default_freq(**indexer) -> str: else: raise ValueError(f"Unknown group `{group}`.") freq = "AS-" + _MONTH_ABBREVIATIONS[month] - return freq + return fix_freq(freq, warn=False) def get_op(op: str, constrain: Sequence[str] | None = None) -> Callable: diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 2fe52b7e9..0a25496ef 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -8,7 +8,7 @@ import numpy as np import xarray as xr -from xclim.core.calendar import resample_doy, select_time +from xclim.core.calendar import fix_freq, resample_doy, select_time from xclim.core.formatting import prefix_attrs, unprefix_attrs, update_history from xclim.core.units import convert_units_to from xclim.core.utils import Quantified, uses_dask @@ -624,7 +624,7 @@ def preprocess_standardized_index( """ # We could allow a more general frequency in this function and move # the constraint {"D", "MS"} in specific indices such as SPI / SPEI. - final_freq = freq or xr.infer_freq(da.time) + final_freq = fix_freq(freq or xr.infer_freq(da.time)) try: group = {"D": "time.dayofyear", "MS": "time.month"}[final_freq] except KeyError(): @@ -634,7 +634,7 @@ def preprocess_standardized_index( ) if freq is not None: - da = da.resample(time=freq).mean(keep_attrs=True) + da = da.resample(time=fix_freq(freq)).mean(keep_attrs=True) if uses_dask(da) and len(da.chunks[da.get_axis_num("time")]) > 1: warnings.warn( @@ -716,6 +716,7 @@ def standardized_index_fit_params( with xr.set_options(keep_attrs=True): da = da + convert_units_to(offset, da, context="hydro") + freq = fix_freq(freq) if freq is not None else None da, group = preprocess_standardized_index(da, freq, window, **indexer) params = da.groupby(group).map(fit, dist=dist, method=method) cal_range = ( diff --git a/xclim/sdba/processing.py b/xclim/sdba/processing.py index 7fef219bf..2c6e52d19 100644 --- a/xclim/sdba/processing.py +++ b/xclim/sdba/processing.py @@ -467,11 +467,18 @@ def _get_number_of_elements_by_year(time): mult, freq, _, _ = parse_offset(xr.infer_freq(time)) days_in_year = max_doy[cal] - elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "H": days_in_year * 24} + # FIXME: Remove "H" when we pin xarray >= 2023.11.0 + elements_in_year = { + "Q": 4, + "M": 12, + "D": days_in_year, + "H": days_in_year * 24, + "h": days_in_year * 24, + } N_in_year = elements_in_year.get(freq, 1) / mult if N_in_year % 1 != 0: raise ValueError( - f"Sampling frequency of the data must be Q, M, D or H and evenly divide a year (got {mult}{freq})." + f"Sampling frequency of the data must be Q, M, D or h and evenly divide a year (got {mult}{freq})." ) return int(N_in_year) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index fbe42d7a0..8a9ada248 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -24,7 +24,6 @@ MULTIPLICATIVE = "*" ADDITIVE = "+" -loffsets = {"MS": "14d", "M": "15d", "YS": "181d", "Y": "182d", "QS": "45d", "Q": "46d"} def _ecdf_1d(x, value):