From 4a3ad2269b9d6b33ec028acd0b3cdb358663878c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 19 Jan 2024 11:55:42 +0100 Subject: [PATCH 01/17] unify freq strings (independent of pd version) --- xarray/coding/cftime_offsets.py | 75 +++++++++++++++++++++-------- xarray/core/groupby.py | 4 +- xarray/tests/test_cftime_offsets.py | 17 +++---- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_groupby.py | 6 +-- xarray/tests/test_missing.py | 2 +- xarray/tests/test_plot.py | 2 +- 7 files changed, 69 insertions(+), 39 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 100f3b249d2..77094f4f2c7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -746,7 +746,7 @@ def _emit_freq_deprecation_warning(deprecated_freq): emit_user_level_warning(message, FutureWarning) -def to_offset(freq): +def to_offset(freq, warn=True): """Convert a frequency string to the appropriate subclass of BaseCFTimeOffset.""" if isinstance(freq, BaseCFTimeOffset): @@ -758,7 +758,7 @@ def to_offset(freq): raise ValueError("Invalid frequency string provided") freq = freq_data["freq"] - if freq in _DEPRECATED_FREQUENICES: + if warn and freq in _DEPRECATED_FREQUENICES: _emit_freq_deprecation_warning(freq) multiples = freq_data["multiple"] multiples = 1 if multiples is None else int(multiples) @@ -1226,7 +1226,8 @@ def date_range( start=start, end=end, periods=periods, - freq=freq, + # TODO remove translation once requiring pandas >= 2.2 + freq=_new_to_legacy_freq(freq), tz=tz, normalize=normalize, name=name, @@ -1254,6 +1255,54 @@ def date_range( ) +def _new_to_legacy_freq(freq): + # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd + # frequencies, but older versions of pandas do not support these as + # frequency strings. Until xarray's minimum pandas version is 2.2 or above, + # we add logic to continue using the deprecated "M" and "Q" frequency + # strings in these circumstances. + + # TODO: remove once requiring pandas >= 2.2 + + if Version(pd.__version__) < Version("2.2"): + freq_as_offset = to_offset(freq) + if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: + freq = freq.replace("ME", "M") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: + freq = freq.replace("QE", "Q") + elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: + freq = freq.replace("YS", "AS") + elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("Y-", "A-") + + return freq + + +def _legacy_to_new_freq(freq): + # to avoid internal deprecation warnings when freq is determined using pandas < 2.2 + + # TODO: remove once requiring pandas >= 2.2 + + if Version(pd.__version__) < Version("2.2"): + freq_as_offset = to_offset(freq, warn=False) + if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: + freq = freq.replace("M", "ME") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq: + freq = freq.replace("Q", "QE") + elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: + freq = freq.replace("AS", "YS") + elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("A-", "Y-") + + return freq + + def date_range_like(source, calendar, use_cftime=None): """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. @@ -1298,24 +1347,8 @@ def date_range_like(source, calendar, use_cftime=None): "`date_range_like` was unable to generate a range as the source frequency was not inferable." ) - # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd - # frequencies, but older versions of pandas do not support these as - # frequency strings. Until xarray's minimum pandas version is 2.2 or above, - # we add logic to continue using the deprecated "M" and "Q" frequency - # strings in these circumstances. - if Version(pd.__version__) < Version("2.2"): - freq_as_offset = to_offset(freq) - if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: - freq = freq.replace("ME", "M") - elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: - freq = freq.replace("QE", "Q") - elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: - freq = freq.replace("YS", "AS") - elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq: - # Check for and replace "Y-" instead of just "Y" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "Y-MAY" -> "A-MAY". - freq = freq.replace("Y-", "A-") + # TODO remove once requiring pandas >= 2.2 + freq = _legacy_to_new_freq(freq) use_cftime = _should_cftime_be_used(source, calendar, use_cftime) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ebb488d42c9..fcc4dcb68fb 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd +from xarray.coding.cftime_offsets import _new_to_legacy_freq from xarray.core import dtypes, duck_array_ops, nputils, ops from xarray.core._aggregations import ( DataArrayGroupByAggregations, @@ -532,7 +533,8 @@ def __post_init__(self) -> None: ) else: index_grouper = pd.Grouper( - freq=grouper.freq, + # TODO remove once requiring pandas >= 2.2 + freq=_new_to_legacy_freq(grouper.freq), closed=grouper.closed, label=grouper.label, origin=grouper.origin, diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 0ffcb5e8ab9..2aaae9ce11c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1313,7 +1313,7 @@ def test_calendar_year_length( assert len(result) == expected_number_of_days -@pytest.mark.parametrize("freq", ["Y", "M", "D"]) +@pytest.mark.parametrize("freq", ["Y", "ME", "D"]) def test_dayofweek_after_cftime_range(freq: str) -> None: result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek @@ -1392,29 +1392,24 @@ def test_date_range_errors() -> None: ) def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd): expected_xarray_freq = freq + expected_pandas_freq = freq # pandas changed what is returned for infer_freq in version 2.2. The # development version of xarray follows this, but we need to adapt this test # to still handle older versions of pandas. if Version(pd.__version__) < Version("2.2"): if "ME" in freq: - freq = freq.replace("ME", "M") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("ME", "M") elif "QE" in freq: - freq = freq.replace("QE", "Q") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("QE", "Q") elif "YS" in freq: - freq = freq.replace("YS", "AS") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("YS", "AS") elif "Y-" in freq: - freq = freq.replace("Y-", "A-") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("Y-", "A-") elif "h" in freq: expected_pandas_freq = freq.replace("h", "H") else: raise ValueError(f"Test not implemented for freq {freq!r}") - else: - expected_pandas_freq = freq source = date_range(start, periods=12, freq=freq, calendar=cal_src) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 664d108b89c..bd3e79d0ebe 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6945,7 +6945,7 @@ def test_differentiate_datetime(dask) -> None: @pytest.mark.parametrize("dask", [True, False]) def test_differentiate_cftime(dask) -> None: rs = np.random.RandomState(42) - coord = xr.cftime_range("2000", periods=8, freq="2M") + coord = xr.cftime_range("2000", periods=8, freq="2ME") da = xr.DataArray( rs.randn(8, 6), diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e45d8ed0bef..72b64f3caae 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1754,19 +1754,19 @@ def test_resample_doctest(self, use_cftime: bool) -> None: time=( "time", xr.date_range( - "2001-01-01", freq="M", periods=6, use_cftime=use_cftime + "2001-01-01", freq="ME", periods=6, use_cftime=use_cftime ), ), labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ), ) - actual = da.resample(time="3M").count() + actual = da.resample(time="3ME").count() expected = DataArray( [1, 3, 1], dims="time", coords={ "time": xr.date_range( - "2001-01-01", freq="3M", periods=3, use_cftime=use_cftime + "2001-01-01", freq="3ME", periods=3, use_cftime=use_cftime ) }, ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 45a649605f3..5dba4d91025 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): @requires_cftime @pytest.mark.parametrize( - ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]) + ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1ME", "1Y"]) ) def test_get_clean_interp_index_dt(cf_da, calendar, freq): """In the gregorian case, the index should be proportional to normal datetimes.""" diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 697db9c5e80..22aad173918 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2955,7 +2955,7 @@ def setUp(self) -> None: """ # case for 1d array data = np.random.rand(4, 12) - time = xr.cftime_range(start="2017", periods=12, freq="1M", calendar="noleap") + time = xr.cftime_range(start="2017", periods=12, freq="1ME", calendar="noleap") darray = DataArray(data, dims=["x", "time"]) darray.coords["time"] = time From ed214d7b993022e7ebcd59e3aea8ff978c13471b Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 22 Jan 2024 10:18:42 +0100 Subject: [PATCH 02/17] Update xarray/tests/test_cftime_offsets.py Co-authored-by: Spencer Clark --- xarray/tests/test_cftime_offsets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index b391caecb08..c23e52d0186 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1316,7 +1316,8 @@ def test_calendar_year_length( @pytest.mark.parametrize("freq", ["Y", "ME", "D"]) def test_dayofweek_after_cftime_range(freq: str) -> None: result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek - expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek + # NOTE: uses pd.date_range under the hood + expected = date_range("2000-02-01", periods=3, freq=freq).dayofweek np.testing.assert_array_equal(result, expected) From 8173e1bfb0ae8c35ff3d23042a18b0618c846c6f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 22 Jan 2024 18:34:33 +0100 Subject: [PATCH 03/17] update code and tests --- xarray/coding/cftime_offsets.py | 44 ++++++---- xarray/coding/frequencies.py | 6 +- xarray/core/pdcompat.py | 1 + xarray/tests/__init__.py | 1 + xarray/tests/test_accessor_dt.py | 4 +- xarray/tests/test_calendar_ops.py | 6 +- xarray/tests/test_cftime_offsets.py | 98 ++++++++++++++++++++--- xarray/tests/test_cftimeindex.py | 4 +- xarray/tests/test_cftimeindex_resample.py | 38 +++++---- xarray/tests/test_dataset.py | 3 +- xarray/tests/test_groupby.py | 14 ++-- xarray/tests/test_interp.py | 2 +- xarray/tests/test_units.py | 9 +-- 13 files changed, 165 insertions(+), 65 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index fdc61f4c798..ecf69fa4e00 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1268,7 +1268,7 @@ def _new_to_legacy_freq(freq): # TODO: remove once requiring pandas >= 2.2 - if Version(pd.__version__) < Version("2.2"): + if freq and Version(pd.__version__) < Version("2.2"): freq_as_offset = to_offset(freq) if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: freq = freq.replace("ME", "M") @@ -1276,11 +1276,19 @@ def _new_to_legacy_freq(freq): freq = freq.replace("QE", "Q") elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: freq = freq.replace("YS", "AS") - elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq: - # Check for and replace "Y-" instead of just "Y" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "Y-MAY" -> "A-MAY". - freq = freq.replace("Y-", "A-") + elif isinstance(freq_as_offset, YearEnd): + # testing for "Y" is required as this was valid in xarray 2023.11 - 2024.01 + if "Y-" in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("Y-", "A-") + elif "YE-" in freq: + freq = freq.replace("YE-", "A-") + elif "A-" not in freq and freq.endswith("Y"): + freq = freq.replace("Y", "A") + elif freq.endswith("YE"): + freq = freq.replace("YE", "A") return freq @@ -1290,7 +1298,7 @@ def _legacy_to_new_freq(freq): # TODO: remove once requiring pandas >= 2.2 - if Version(pd.__version__) < Version("2.2"): + if freq and Version(pd.__version__) < Version("2.2"): freq_as_offset = to_offset(freq, warn=False) if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: freq = freq.replace("M", "ME") @@ -1298,14 +1306,20 @@ def _legacy_to_new_freq(freq): freq = freq.replace("Q", "QE") elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: freq = freq.replace("AS", "YS") - elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq: - # Check for and replace "Y-" instead of just "Y" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "Y-MAY" -> "A-MAY". - freq = freq.replace("A-", "Y-") - elif isinstance(freq_as_offset, YearEnd) and "YE" in freq: - freq = freq.replace("YE", "A") - + elif isinstance(freq_as_offset, YearEnd): + if "A-" in freq: + # Check for and replace "A-" instead of just "A" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "A-MAY" -> "YE-MAY". + freq = freq.replace("A-", "YE-") + elif "Y-" in freq: + freq = freq.replace("Y-", "YE-") + elif freq.endswith("A"): + # the "A-MAY" case is already handled above + freq = freq.replace("A", "YE") + elif "YE" not in freq and freq.endswith("Y"): + # the "Y-MAY" case is already handled above + freq = freq.replace("Y", "YE") return freq diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index c401fb95f1c..ae7eb9f6465 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -44,7 +44,7 @@ import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS +from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS, _legacy_to_new_freq from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.common import _contains_datetime_like_objects @@ -98,7 +98,7 @@ def infer_freq(index): inferer = _CFTimeFrequencyInferer(index) return inferer.get_freq() - return pd.infer_freq(index) + return _legacy_to_new_freq(pd.infer_freq(index)) class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer): @@ -183,7 +183,7 @@ def _get_annual_rule(self): if len(np.unique(self.index.month)) > 1: return None - return {"cs": "YS", "ce": "Y"}.get(month_anchor_check(self.index)) + return {"cs": "YS", "ce": "YE"}.get(month_anchor_check(self.index)) def _get_quartely_rule(self): if len(self.month_deltas) > 1: diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index c2db154d614..c09dd82b074 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -83,6 +83,7 @@ def _convert_base_to_offset(base, freq, index): from xarray.coding.cftimeindex import CFTimeIndex if isinstance(index, pd.DatetimeIndex): + freq = cftime_offsets._new_to_legacy_freq(freq) freq = pd.tseries.frequencies.to_offset(freq) if isinstance(freq, pd.offsets.Tick): return pd.Timedelta(base * freq.nanos // freq.n) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 207caba48f0..807df4acbf6 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -109,6 +109,7 @@ def _importorskip( has_pint, requires_pint = _importorskip("pint") has_numexpr, requires_numexpr = _importorskip("numexpr") has_flox, requires_flox = _importorskip("flox") +has_pandas_gt_2_2, __ = _importorskip("pandas", "2.2") # some special cases diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 387929d3fe9..6bf3663c174 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -248,7 +248,9 @@ def test_dask_accessor_method(self, method, parameters) -> None: assert_equal(actual.compute(), expected.compute()) def test_seasons(self) -> None: - dates = pd.date_range(start="2000/01/01", freq="M", periods=12) + dates = xr.date_range( + start="2000/01/01", freq="ME", periods=12, use_cftime=False + ) dates = dates.append(pd.Index([np.datetime64("NaT")])) dates = xr.DataArray(dates) seasons = xr.DataArray( diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index ab0ee8d0f71..c28990dd14b 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -89,17 +89,17 @@ def test_convert_calendar_360_days(source, target, freq, align_on): if align_on == "date": np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], ) elif target == "360_day": np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], ) else: np.testing.assert_array_equal( - conv.time.resample(time="M").last().dt.day, + conv.time.resample(time="ME").last().dt.day, [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], ) if source == "360_day" and align_on == "year": diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index c23e52d0186..651dc985cb0 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -26,6 +26,8 @@ YearBegin, YearEnd, _days_in_month, + _legacy_to_new_freq, + _new_to_legacy_freq, cftime_range, date_range, date_range_like, @@ -35,7 +37,12 @@ ) from xarray.coding.frequencies import infer_freq from xarray.core.dataarray import DataArray -from xarray.tests import _CFTIME_CALENDARS, has_cftime, requires_cftime +from xarray.tests import ( + _CFTIME_CALENDARS, + has_cftime, + has_pandas_gt_2_2, + requires_cftime, +) cftime = pytest.importorskip("cftime") @@ -1235,13 +1242,13 @@ def test_cftime_range_name(): @pytest.mark.parametrize( ("start", "end", "periods", "freq", "inclusive"), [ - (None, None, 5, "Y", None), - ("2000", None, None, "Y", None), - (None, "2000", None, "Y", None), + (None, None, 5, "YE", None), + ("2000", None, None, "YE", None), + (None, "2000", None, "YE", None), ("2000", "2001", None, None, None), (None, None, None, None, None), - ("2000", "2001", None, "Y", "up"), - ("2000", "2001", 5, "Y", None), + ("2000", "2001", None, "YE", "up"), + ("2000", "2001", 5, "YE", None), ], ) def test_invalid_cftime_range_inputs( @@ -1313,17 +1320,20 @@ def test_calendar_year_length( assert len(result) == expected_number_of_days -@pytest.mark.parametrize("freq", ["Y", "ME", "D"]) +@pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_dayofweek_after_cftime_range(freq: str) -> None: result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek - # NOTE: uses pd.date_range under the hood - expected = date_range("2000-02-01", periods=3, freq=freq).dayofweek + # TODO: remove once requiring pandas 2.2+ + freq = _new_to_legacy_freq(freq) + expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek np.testing.assert_array_equal(result, expected) -@pytest.mark.parametrize("freq", ["Y", "M", "D"]) +@pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_dayofyear_after_cftime_range(freq: str) -> None: result = cftime_range("2000-02-01", periods=3, freq=freq).dayofyear + # TODO: remove once requiring pandas 2.2+ + freq = _new_to_legacy_freq(freq) expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofyear np.testing.assert_array_equal(result, expected) @@ -1537,3 +1547,71 @@ def test_to_offset_deprecation_warning(freq): # Test for deprecations outlined in GitHub issue #8394 with pytest.warns(FutureWarning, match="is deprecated"): to_offset(freq) + + +@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.parametrize( + "freq, expected", + ( + ["Y", "YE"], + ["A", "YE"], + ["Q", "QE"], + ["M", "ME"], + ["AS", "YS"], + ["YE", "YE"], + ["QE", "QE"], + ["ME", "ME"], + ["YS", "YS"], + ), +) +@pytest.mark.parametrize("n", ("", "2")) +def test_legacy_to_new_freq(freq, expected, n): + freq = f"{n}{freq}" + result = _legacy_to_new_freq(freq) + + expected = f"{n}{expected}" + + assert result == expected + + +@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.parametrize("year_alias", ("YE", "Y", "A")) +@pytest.mark.parametrize("n", ("", "2")) +def test_legacy_to_new_freq_anchored(year_alias, n): + for month in _MONTH_ABBREVIATIONS.values(): + freq = f"{n}{year_alias}-{month}" + result = _legacy_to_new_freq(freq) + + expected = f"{n}YE-{month}" + + assert result == expected + + +@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.filterwarnings("ignore:'[AY]' is deprecated") +@pytest.mark.parametrize( + "freq, expected", + (["A", "A"], ["YE", "A"], ["Y", "A"], ["QE", "Q"], ["ME", "M"], ["YS", "AS"]), +) +@pytest.mark.parametrize("n", ("", "2")) +def test_new_to_legacy_freq(freq, expected, n): + freq = f"{n}{freq}" + result = _new_to_legacy_freq(freq) + + expected = f"{n}{expected}" + + assert result == expected + + +@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.filterwarnings("ignore:'[AY]-.{3}' is deprecated") +@pytest.mark.parametrize("year_alias", ("A", "Y", "YE")) +@pytest.mark.parametrize("n", ("", "2")) +def test_new_to_legacy_freq_anchored(year_alias, n): + for month in _MONTH_ABBREVIATIONS.values(): + freq = f"{n}{year_alias}-{month}" + result = _new_to_legacy_freq(freq) + + expected = f"{n}A-{month}" + + assert result == expected diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 062756e614b..f6eb15fa373 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -795,7 +795,7 @@ def test_cftimeindex_shift_float_us() -> None: @requires_cftime -@pytest.mark.parametrize("freq", ["YS", "Y", "QS", "QE", "MS", "ME"]) +@pytest.mark.parametrize("freq", ["YS", "YE", "QS", "QE", "MS", "ME"]) def test_cftimeindex_shift_float_fails_for_non_tick_freqs(freq) -> None: a = xr.cftime_range("2000", periods=3, freq="D") with pytest.raises(TypeError, match="unsupported operand type"): @@ -1353,7 +1353,7 @@ def test_infer_freq_invalid_inputs(): "freq", [ "300YS-JAN", - "Y-DEC", + "YE-DEC", "YS-JUL", "2YS-FEB", "QE-NOV", diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 9bdab8a6d7c..58b1f67f0c5 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -9,6 +9,7 @@ from packaging.version import Version import xarray as xr +from xarray.coding.cftime_offsets import _new_to_legacy_freq from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.resample_cftime import CFTimeGrouper @@ -35,21 +36,21 @@ ("3MS", "MS"), ("3MS", "6MS"), ("3MS", "85D"), - ("7M", "3M"), - ("7M", "14M"), - ("7M", "2QS-APR"), + ("7ME", "3ME"), + ("7ME", "14ME"), + ("7ME", "2QS-APR"), ("43QS-AUG", "21QS-AUG"), ("43QS-AUG", "86QS-AUG"), - ("43QS-AUG", "11A-JUN"), - ("11Q-JUN", "5Q-JUN"), - ("11Q-JUN", "22Q-JUN"), - ("11Q-JUN", "51MS"), - ("3AS-MAR", "AS-MAR"), - ("3AS-MAR", "6AS-MAR"), - ("3AS-MAR", "14Q-FEB"), - ("7A-MAY", "3A-MAY"), - ("7A-MAY", "14A-MAY"), - ("7A-MAY", "85M"), + ("43QS-AUG", "11YE-JUN"), + ("11QE-JUN", "5QE-JUN"), + ("11QE-JUN", "22QE-JUN"), + ("11QE-JUN", "51MS"), + ("3YS-MAR", "YS-MAR"), + ("3YS-MAR", "6YS-MAR"), + ("3YS-MAR", "14QE-FEB"), + ("7YE-MAY", "3YE-MAY"), + ("7YE-MAY", "14YE-MAY"), + ("7YE-MAY", "85ME"), ] @@ -136,9 +137,12 @@ def test_resample(freqs, closed, label, base, offset) -> None: start = "2000-01-01T12:07:01" loffset = "12h" origin = "start" - index_kwargs = dict(start=start, periods=5, freq=initial_freq) - datetime_index = pd.date_range(**index_kwargs) - cftime_index = xr.cftime_range(**index_kwargs) + index_kwargs = dict(start=start, periods=5) + # TODO: move `freq` back to `index_kwargs` + datetime_index = pd.date_range( + **index_kwargs, freq=_new_to_legacy_freq(initial_freq) + ) + cftime_index = xr.cftime_range(**index_kwargs, freq=initial_freq) da_datetimeindex = da(datetime_index) da_cftimeindex = da(cftime_index) @@ -167,7 +171,7 @@ def test_resample(freqs, closed, label, base, offset) -> None: ("MS", "left"), ("QE", "right"), ("QS", "left"), - ("Y", "right"), + ("YE", "right"), ("YS", "left"), ], ) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a0a1ed93364..53b2e991301 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4071,7 +4071,8 @@ def test_virtual_variable_same_name(self) -> None: assert_identical(actual, expected) def test_time_season(self) -> None: - ds = Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + time = xr.date_range("2000-01-01", periods=12, freq="ME", use_cftime=False) + ds = Dataset({"t": time}) seas = ["DJF"] * 2 + ["MAM"] * 3 + ["JJA"] * 3 + ["SON"] * 3 + ["DJF"] assert_array_equal(seas, ds["t.season"]) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 72b64f3caae..6b1d850e42f 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -516,7 +516,7 @@ def test_da_groupby_assign_coords() -> None: coords={ "z": ["a", "b", "c", "a", "b", "c"], "x": [1, 1, 1, 2, 2, 3, 4, 5, 3, 4], - "t": pd.date_range("2001-01-01", freq="M", periods=24), + "t": xr.date_range("2001-01-01", freq="ME", periods=24, use_cftime=False), "month": ("t", list(range(1, 13)) * 2), }, ) @@ -2027,7 +2027,7 @@ def test_upsample_interpolate(self): def test_upsample_interpolate_bug_2197(self): dates = pd.date_range("2007-02-01", "2007-03-01", freq="D") da = xr.DataArray(np.arange(len(dates)), [("time", dates)]) - result = da.resample(time="M").interpolate("linear") + result = da.resample(time="ME").interpolate("linear") expected_times = np.array( [np.datetime64("2007-02-28"), np.datetime64("2007-03-31")] ) @@ -2322,7 +2322,7 @@ def test_resample_ds_da_are_the_same(self): } ) assert_allclose( - ds.resample(time="M").mean()["foo"], ds.foo.resample(time="M").mean() + ds.resample(time="ME").mean()["foo"], ds.foo.resample(time="ME").mean() ) def test_ds_resample_apply_func_args(self): @@ -2397,21 +2397,21 @@ def test_resample_cumsum(method: str, expected_array: list[float]) -> None: ds = xr.Dataset( {"foo": ("time", [1, 2, 3, 1, 2, np.nan])}, coords={ - "time": pd.date_range("01-01-2001", freq="M", periods=6), + "time": xr.date_range("01-01-2001", freq="ME", periods=6, use_cftime=False), }, ) - actual = getattr(ds.resample(time="3M"), method)(dim="time") + actual = getattr(ds.resample(time="3ME"), method)(dim="time") expected = xr.Dataset( {"foo": (("time",), expected_array)}, coords={ - "time": pd.date_range("01-01-2001", freq="M", periods=6), + "time": xr.date_range("01-01-2001", freq="ME", periods=6, use_cftime=False), }, ) # TODO: Remove drop_vars when GH6528 is fixed # when Dataset.cumsum propagates indexes, and the group variable? assert_identical(expected.drop_vars(["time"]), actual) - actual = getattr(ds.foo.resample(time="3M"), method)(dim="time") + actual = getattr(ds.foo.resample(time="3ME"), method)(dim="time") expected.coords["time"] = ds.time assert_identical(expected.drop_vars(["time"]).foo, actual) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index de0020b4d00..a7644ac9d2b 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -747,7 +747,7 @@ def test_datetime_interp_noerror() -> None: @requires_cftime @requires_scipy def test_3641() -> None: - times = xr.cftime_range("0001", periods=3, freq="500Y") + times = xr.cftime_range("0001", periods=3, freq="500YE") da = xr.DataArray(range(3), dims=["time"], coords=[times]) da.interp(time=["0002-05-01"]) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 21915a9a17c..ce7eddce3a1 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -4,7 +4,6 @@ import operator import numpy as np -import pandas as pd import pytest import xarray as xr @@ -3871,11 +3870,11 @@ def test_computation_objects(self, func, variant, dtype): def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m - time = pd.date_range("10-09-2010", periods=len(array), freq="Y") + time = xr.date_range("10-09-2010", periods=len(array), freq="YE") data_array = xr.DataArray(data=array, coords={"time": time}, dims="time") units = extract_units(data_array) - func = method("resample", time="6M") + func = method("resample", time="6ME") expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() @@ -5374,7 +5373,7 @@ def test_resample(self, variant, dtype): array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1 array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2 - t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="Y") + t = xr.date_range("10-09-2010", periods=array1.shape[0], freq="YE") y = np.arange(5) * dim_unit z = np.arange(8) * dim_unit @@ -5386,7 +5385,7 @@ def test_resample(self, variant, dtype): ) units = extract_units(ds) - func = method("resample", time="6M") + func = method("resample", time="6ME") expected = attach_units(func(strip_units(ds)).mean(), units) actual = func(ds).mean() From 5df78c3cfc0d630d02e1d30b4dca4cde7328b2c7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 23 Jan 2024 18:41:39 +0100 Subject: [PATCH 04/17] make mypy happy --- xarray/tests/test_cftimeindex_resample.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 58b1f67f0c5..023936e4393 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -137,12 +137,11 @@ def test_resample(freqs, closed, label, base, offset) -> None: start = "2000-01-01T12:07:01" loffset = "12h" origin = "start" - index_kwargs = dict(start=start, periods=5) - # TODO: move `freq` back to `index_kwargs` + datetime_index = pd.date_range( - **index_kwargs, freq=_new_to_legacy_freq(initial_freq) + start=start, periods=5, freq=_new_to_legacy_freq(initial_freq) ) - cftime_index = xr.cftime_range(**index_kwargs, freq=initial_freq) + cftime_index = xr.cftime_range(start=start, periods=5, freq=initial_freq) da_datetimeindex = da(datetime_index) da_cftimeindex = da(cftime_index) From b497cf559a7fc3622f9ee0cbbeef3b66553ee5ad Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 23 Jan 2024 19:24:20 +0100 Subject: [PATCH 05/17] add 'YE' to _ANNUAL_OFFSET_TYPES --- xarray/tests/test_cftime_offsets.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 651dc985cb0..509a7c21b85 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -240,7 +240,13 @@ def test_to_offset_sub_annual(freq, expected): assert to_offset(freq) == expected -_ANNUAL_OFFSET_TYPES = {"A": YearEnd, "AS": YearBegin, "Y": YearEnd, "YS": YearBegin} +_ANNUAL_OFFSET_TYPES = { + "A": YearEnd, + "AS": YearBegin, + "Y": YearEnd, + "YS": YearBegin, + "YE": YearEnd, +} @pytest.mark.parametrize( From a900d0fe6b9f904d9c3746551bc88c022f528dbc Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 23 Jan 2024 19:24:49 +0100 Subject: [PATCH 06/17] un x-fail test --- xarray/tests/test_cftime_offsets.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 509a7c21b85..33c1c0161f4 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1394,9 +1394,6 @@ def test_date_range_errors() -> None: @requires_cftime -@pytest.mark.xfail( - reason="https://github.com/pydata/xarray/pull/8636#issuecomment-1902775153" -) @pytest.mark.parametrize( "start,freq,cal_src,cal_tgt,use_cftime,exp0,exp_pd", [ From c1271156f13a4dca1c85ac4e607f3563f3854729 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 1 Feb 2024 10:19:21 +0100 Subject: [PATCH 07/17] adapt more freq strings --- xarray/tests/test_cftime_offsets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 352158c7c48..7b035e0ea03 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1688,7 +1688,9 @@ def test_new_to_legacy_freq_anchored(year_alias, n): @pytest.mark.filterwarnings("ignore:Converting a CFTimeIndex with:") @pytest.mark.parametrize("start", ("2000", "2001")) @pytest.mark.parametrize("end", ("2000", "2001")) -@pytest.mark.parametrize("freq", ("MS", "-1MS", "YS", "-1YS", "M", "-1M", "Y", "-1Y")) +@pytest.mark.parametrize( + "freq", ("MS", "-1MS", "YS", "-1YS", "ME", "-1ME", "YE", "-1YE") +) def test_cftime_range_same_as_pandas(start, end, freq): result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True) result = result.to_datetimeindex() From ad82d70f835897eb90b797db12d9ef6f0c0f7411 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 2 Feb 2024 17:35:46 +0100 Subject: [PATCH 08/17] simplify test --- xarray/tests/test_cftime_offsets.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 7b035e0ea03..b56ae8438c3 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd import pytest -from packaging.version import Version from xarray import CFTimeIndex from xarray.coding.cftime_offsets import ( @@ -1473,25 +1472,7 @@ def test_date_range_errors() -> None: ], ) def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd): - expected_xarray_freq = freq - expected_pandas_freq = freq - - # pandas changed what is returned for infer_freq in version 2.2. The - # development version of xarray follows this, but we need to adapt this test - # to still handle older versions of pandas. - if Version(pd.__version__) < Version("2.2"): - if "ME" in freq: - expected_pandas_freq = expected_pandas_freq.replace("ME", "M") - elif "QE" in freq: - expected_pandas_freq = expected_pandas_freq.replace("QE", "Q") - elif "YS" in freq: - expected_pandas_freq = expected_pandas_freq.replace("YS", "AS") - elif "Y-" in freq: - expected_pandas_freq = expected_pandas_freq.replace("YE-", "A-") - elif "h" in freq: - expected_pandas_freq = freq.replace("h", "H") - else: - raise ValueError(f"Test not implemented for freq {freq!r}") + expected_freq = freq source = date_range(start, periods=12, freq=freq, calendar=cal_src) @@ -1499,10 +1480,7 @@ def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd assert len(out) == 12 - if exp_pd: - assert infer_freq(out) == expected_pandas_freq - else: - assert infer_freq(out) == expected_xarray_freq + assert infer_freq(out) == expected_freq assert out[0].isoformat().startswith(exp0) From afada3cdf36f5968fc5d0e5460a11b339e97cdbd Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 2 Feb 2024 18:20:11 +0100 Subject: [PATCH 09/17] also translate 'h', 'min', 's' --- xarray/coding/cftime_offsets.py | 8 ++++++++ xarray/tests/test_cftime_offsets.py | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 3845925ff20..1e88d7d2854 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1264,6 +1264,8 @@ def _new_to_legacy_freq(freq): # we add logic to continue using the deprecated "M" and "Q" frequency # strings in these circumstances. + # "h" -> "H" conversion not required + # TODO: remove once requiring pandas >= 2.2 if freq and Version(pd.__version__) < Version("2.2"): @@ -1318,6 +1320,12 @@ def _legacy_to_new_freq(freq): elif "YE" not in freq and freq.endswith("Y"): # the "Y-MAY" case is already handled above freq = freq.replace("Y", "YE") + elif isinstance(freq_as_offset, Hour): + freq = freq.replace("H", "h") + elif isinstance(freq_as_offset, Minute): + freq = freq.replace("T", "min") + elif isinstance(freq_as_offset, Second): + freq = freq.replace("S", "s") return freq diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index b56ae8438c3..d29bfc77fe7 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -38,6 +38,7 @@ from xarray.core.dataarray import DataArray from xarray.tests import ( _CFTIME_CALENDARS, + assert_no_warnings, has_cftime, has_pandas_gt_2_2, requires_cftime, @@ -1491,6 +1492,18 @@ def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd assert out.calendar == cal_tgt +@requires_cftime +@pytest.mark.parametrize( + "freq", ("YE", "YS", "YE-MAY", "MS", "ME", "QS", "h", "min", "s") +) +@pytest.mark.parametrize("use_cftime", (True, False)) +def test_date_range_like_no_deprecation(freq, use_cftime): + source = date_range("2000", periods=3, freq=freq, use_cftime=False) + + with assert_no_warnings(): + date_range_like(source, "standard", use_cftime=use_cftime) + + def test_date_range_like_same_calendar(): src = date_range("2000-01-01", periods=12, freq="6h", use_cftime=False) out = date_range_like(src, "standard", use_cftime=False) From b61bc2100f2ef05877eb34e69fc4ea1761541fed Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 7 Feb 2024 10:40:12 +0100 Subject: [PATCH 10/17] add comment --- xarray/tests/test_cftime_offsets.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index d29bfc77fe7..b1c0529ae61 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1498,6 +1498,9 @@ def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd ) @pytest.mark.parametrize("use_cftime", (True, False)) def test_date_range_like_no_deprecation(freq, use_cftime): + # ensure no internal warnings + # TODO: remove once freq string deprecation is finished + source = date_range("2000", periods=3, freq=freq, use_cftime=False) with assert_no_warnings(): From 6c24ae243ab64faf0d4b5b86b179dfe960f1bbd6 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 9 Feb 2024 10:36:52 +0100 Subject: [PATCH 11/17] simplify test --- xarray/tests/test_calendar_ops.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index c28990dd14b..d2792034876 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -1,9 +1,7 @@ from __future__ import annotations import numpy as np -import pandas as pd import pytest -from packaging.version import Version from xarray import DataArray, infer_freq from xarray.coding.calendar_ops import convert_calendar, interp_calendar @@ -135,13 +133,7 @@ def test_convert_calendar_missing(source, target, freq): ) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") - if Version(pd.__version__) < Version("2.2"): - if freq == "4h" and target == "proleptic_gregorian": - expected_freq = "4H" - else: - expected_freq = freq - else: - expected_freq = freq + expected_freq = freq assert infer_freq(out.time) == expected_freq expected = date_range( From 98d3637280c62c8a701de8767e7eb46c09af41c7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 9 Feb 2024 18:12:30 +0100 Subject: [PATCH 12/17] add freqs, invert ifs; add try block --- xarray/coding/cftime_offsets.py | 114 ++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 48 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 476b99e4f2d..7c2facf543f 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1265,31 +1265,37 @@ def _new_to_legacy_freq(freq): # we add logic to continue using the deprecated "M" and "Q" frequency # strings in these circumstances. - # "h" -> "H" conversion not required + # NOTE: other conversions ("h" -> "H", ..., "N" -> "ns") not required # TODO: remove once requiring pandas >= 2.2 + if freq and Version(pd.__version__) >= Version("2.2"): + return freq - if freq and Version(pd.__version__) < Version("2.2"): + try: freq_as_offset = to_offset(freq) - if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: - freq = freq.replace("ME", "M") - elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: - freq = freq.replace("QE", "Q") - elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: - freq = freq.replace("YS", "AS") - elif isinstance(freq_as_offset, YearEnd): - # testing for "Y" is required as this was valid in xarray 2023.11 - 2024.01 - if "Y-" in freq: - # Check for and replace "Y-" instead of just "Y" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "Y-MAY" -> "A-MAY". - freq = freq.replace("Y-", "A-") - elif "YE-" in freq: - freq = freq.replace("YE-", "A-") - elif "A-" not in freq and freq.endswith("Y"): - freq = freq.replace("Y", "A") - elif freq.endswith("YE"): - freq = freq.replace("YE", "A") + except ValueError: + # freq may be valid in pandas but not in xarray + return freq + + if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: + freq = freq.replace("ME", "M") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: + freq = freq.replace("QE", "Q") + elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: + freq = freq.replace("YS", "AS") + elif isinstance(freq_as_offset, YearEnd): + # testing for "Y" is required as this was valid in xarray 2023.11 - 2024.01 + if "Y-" in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("Y-", "A-") + elif "YE-" in freq: + freq = freq.replace("YE-", "A-") + elif "A-" not in freq and freq.endswith("Y"): + freq = freq.replace("Y", "A") + elif freq.endswith("YE"): + freq = freq.replace("YE", "A") return freq @@ -1299,34 +1305,46 @@ def _legacy_to_new_freq(freq): # TODO: remove once requiring pandas >= 2.2 - if freq and Version(pd.__version__) < Version("2.2"): + if freq and Version(pd.__version__) >= Version("2.2"): + return freq + + try: freq_as_offset = to_offset(freq, warn=False) - if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: - freq = freq.replace("M", "ME") - elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq: - freq = freq.replace("Q", "QE") - elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: - freq = freq.replace("AS", "YS") - elif isinstance(freq_as_offset, YearEnd): - if "A-" in freq: - # Check for and replace "A-" instead of just "A" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "A-MAY" -> "YE-MAY". - freq = freq.replace("A-", "YE-") - elif "Y-" in freq: - freq = freq.replace("Y-", "YE-") - elif freq.endswith("A"): - # the "A-MAY" case is already handled above - freq = freq.replace("A", "YE") - elif "YE" not in freq and freq.endswith("Y"): - # the "Y-MAY" case is already handled above - freq = freq.replace("Y", "YE") - elif isinstance(freq_as_offset, Hour): - freq = freq.replace("H", "h") - elif isinstance(freq_as_offset, Minute): - freq = freq.replace("T", "min") - elif isinstance(freq_as_offset, Second): - freq = freq.replace("S", "s") + except ValueError: + # freq may be valid in pandas but not in xarray + return freq + + if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: + freq = freq.replace("M", "ME") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq: + freq = freq.replace("Q", "QE") + elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: + freq = freq.replace("AS", "YS") + elif isinstance(freq_as_offset, YearEnd): + if "A-" in freq: + # Check for and replace "A-" instead of just "A" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "A-MAY" -> "YE-MAY". + freq = freq.replace("A-", "YE-") + elif "Y-" in freq: + freq = freq.replace("Y-", "YE-") + elif freq.endswith("A"): + # the "A-MAY" case is already handled above + freq = freq.replace("A", "YE") + elif "YE" not in freq and freq.endswith("Y"): + # the "Y-MAY" case is already handled above + freq = freq.replace("Y", "YE") + elif isinstance(freq_as_offset, Hour): + freq = freq.replace("H", "h") + elif isinstance(freq_as_offset, Minute): + freq = freq.replace("T", "min") + elif isinstance(freq_as_offset, Second): + freq = freq.replace("S", "s") + elif isinstance(freq_as_offset, Millisecond): + freq = freq.replace("L", "ms") + elif isinstance(freq_as_offset, Microsecond): + freq = freq.replace("U", "us") + return freq From 2d4dc434472fa007097a0f73fbe08a52795f7695 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 12 Feb 2024 11:32:36 +0100 Subject: [PATCH 13/17] properly invert if condition --- xarray/coding/cftime_offsets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 7c2facf543f..a01df6ad122 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1268,7 +1268,7 @@ def _new_to_legacy_freq(freq): # NOTE: other conversions ("h" -> "H", ..., "N" -> "ns") not required # TODO: remove once requiring pandas >= 2.2 - if freq and Version(pd.__version__) >= Version("2.2"): + if not freq or Version(pd.__version__) >= Version("2.2"): return freq try: @@ -1305,7 +1305,7 @@ def _legacy_to_new_freq(freq): # TODO: remove once requiring pandas >= 2.2 - if freq and Version(pd.__version__) >= Version("2.2"): + if not freq or Version(pd.__version__) >= Version("2.2"): return freq try: From 56711b14e8a00cf218b8c7bff18c6a542ff6eac4 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 12 Feb 2024 13:57:13 +0100 Subject: [PATCH 14/17] fix more tests --- xarray/tests/test_cftime_offsets.py | 2 +- xarray/tests/test_cftimeindex_resample.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index b1c0529ae61..821984b56c9 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1315,7 +1315,7 @@ def test_invalid_cftime_arg() -> None: with pytest.warns( FutureWarning, match="Following pandas, the `closed` parameter is deprecated" ): - cftime_range("2000", "2001", None, "Y", closed="left") + cftime_range("2000", "2001", None, "YE", closed="left") _CALENDAR_SPECIFIC_MONTH_END_TESTS = [ diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 023936e4393..5eaa131128f 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -26,7 +26,7 @@ FREQS = [ ("8003D", "4001D"), ("8003D", "16006D"), - ("8003D", "21AS"), + ("8003D", "21YS"), ("6h", "3h"), ("6h", "12h"), ("6h", "400min"), From 8c4be8e2c90b528045a6816636f0a581c9fdc00e Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 12 Feb 2024 13:58:19 +0100 Subject: [PATCH 15/17] fix comment --- xarray/coding/cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a01df6ad122..556bab8504b 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1265,7 +1265,7 @@ def _new_to_legacy_freq(freq): # we add logic to continue using the deprecated "M" and "Q" frequency # strings in these circumstances. - # NOTE: other conversions ("h" -> "H", ..., "N" -> "ns") not required + # NOTE: other conversions ("h" -> "H", ..., "ns" -> "N") not required # TODO: remove once requiring pandas >= 2.2 if not freq or Version(pd.__version__) >= Version("2.2"): From 6670f285a4cd7c1c0d4acb900b0be6340f60fcf1 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 12 Feb 2024 14:08:43 +0100 Subject: [PATCH 16/17] whats new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d51b5da2a88..7293ab20773 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,8 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- :py:func:`infer_freq` always returns the frequency strings as defined in pandas 2.2 + (:issue:`8612`, :pull:`8627`). By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ From 6a8001b6ffff8eb2ae40eb47db2bff7ad655e51b Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 15 Feb 2024 17:27:04 +0100 Subject: [PATCH 17/17] test pd freq strings are passed through --- xarray/tests/__init__.py | 2 +- xarray/tests/test_cftime_offsets.py | 52 ++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 807df4acbf6..df0899509cb 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -109,7 +109,7 @@ def _importorskip( has_pint, requires_pint = _importorskip("pint") has_numexpr, requires_numexpr = _importorskip("numexpr") has_flox, requires_flox = _importorskip("flox") -has_pandas_gt_2_2, __ = _importorskip("pandas", "2.2") +has_pandas_ge_2_2, __ = _importorskip("pandas", "2.2") # some special cases diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 821984b56c9..a0bc678b51c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -40,7 +40,7 @@ _CFTIME_CALENDARS, assert_no_warnings, has_cftime, - has_pandas_gt_2_2, + has_pandas_ge_2_2, requires_cftime, ) @@ -1611,7 +1611,7 @@ def test_to_offset_deprecation_warning(freq): to_offset(freq) -@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only relevant for pandas lt 2.2") @pytest.mark.parametrize( "freq, expected", ( @@ -1636,7 +1636,7 @@ def test_legacy_to_new_freq(freq, expected, n): assert result == expected -@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only relevant for pandas lt 2.2") @pytest.mark.parametrize("year_alias", ("YE", "Y", "A")) @pytest.mark.parametrize("n", ("", "2")) def test_legacy_to_new_freq_anchored(year_alias, n): @@ -1649,7 +1649,7 @@ def test_legacy_to_new_freq_anchored(year_alias, n): assert result == expected -@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only relevant for pandas lt 2.2") @pytest.mark.filterwarnings("ignore:'[AY]' is deprecated") @pytest.mark.parametrize( "freq, expected", @@ -1665,7 +1665,7 @@ def test_new_to_legacy_freq(freq, expected, n): assert result == expected -@pytest.mark.skipif(has_pandas_gt_2_2, reason="only relevant for pandas lt 2.2") +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only relevant for pandas lt 2.2") @pytest.mark.filterwarnings("ignore:'[AY]-.{3}' is deprecated") @pytest.mark.parametrize("year_alias", ("A", "Y", "YE")) @pytest.mark.parametrize("n", ("", "2")) @@ -1679,6 +1679,48 @@ def test_new_to_legacy_freq_anchored(year_alias, n): assert result == expected +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only for pandas lt 2.2") +@pytest.mark.parametrize( + "freq, expected", + ( + # pandas-only freq strings are passed through + ("BH", "BH"), + ("CBH", "CBH"), + ("N", "N"), + ), +) +def test_legacy_to_new_freq_pd_freq_passthrough(freq, expected): + + result = _legacy_to_new_freq(freq) + assert result == expected + + +@pytest.mark.filterwarnings("ignore:'.' is deprecated ") +@pytest.mark.skipif(has_pandas_ge_2_2, reason="only for pandas lt 2.2") +@pytest.mark.parametrize( + "freq, expected", + ( + # these are each valid in pandas lt 2.2 + ("T", "T"), + ("min", "min"), + ("S", "S"), + ("s", "s"), + ("L", "L"), + ("ms", "ms"), + ("U", "U"), + ("us", "us"), + # pandas-only freq strings are passed through + ("bh", "bh"), + ("cbh", "cbh"), + ("ns", "ns"), + ), +) +def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected): + + result = _new_to_legacy_freq(freq) + assert result == expected + + @pytest.mark.filterwarnings("ignore:Converting a CFTimeIndex with:") @pytest.mark.parametrize("start", ("2000", "2001")) @pytest.mark.parametrize("end", ("2000", "2001"))