Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unify freq strings (independent of pd version) #8627

Merged
merged 29 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4a3ad22
unify freq strings (independent of pd version)
mathause Jan 19, 2024
d1aee81
Merge branch 'main' into unify_freq_strings
max-sixty Jan 21, 2024
2fbea46
Merge branch 'main' into unify_freq_strings
mathause Jan 22, 2024
ba040dc
Merge branch 'unify_freq_strings' of https://github.com/mathause/xarr…
mathause Jan 22, 2024
ed214d7
Update xarray/tests/test_cftime_offsets.py
mathause Jan 22, 2024
8173e1b
update code and tests
mathause Jan 22, 2024
41d6bf6
Merge branch 'main' into unify_freq_strings
max-sixty Jan 23, 2024
211a130
Merge branch 'main' into unify_freq_strings
mathause Jan 23, 2024
8d47dae
Merge branch 'unify_freq_strings' of https://github.com/mathause/xarr…
mathause Jan 23, 2024
5df78c3
make mypy happy
mathause Jan 23, 2024
b497cf5
add 'YE' to _ANNUAL_OFFSET_TYPES
mathause Jan 23, 2024
a900d0f
un x-fail test
mathause Jan 23, 2024
3f8fb4b
Merge branch 'main' into unify_freq_strings
mathause Feb 1, 2024
c427227
Merge branch 'main' into unify_freq_strings
mathause Feb 1, 2024
c127115
adapt more freq strings
mathause Feb 1, 2024
ad82d70
simplify test
mathause Feb 2, 2024
afada3c
also translate 'h', 'min', 's'
mathause Feb 2, 2024
b61bc21
add comment
mathause Feb 7, 2024
eefe515
Merge branch 'main' into unify_freq_strings
mathause Feb 8, 2024
bacc617
Merge branch 'main' into unify_freq_strings
mathause Feb 9, 2024
6c24ae2
simplify test
mathause Feb 9, 2024
98d3637
add freqs, invert ifs; add try block
mathause Feb 9, 2024
594c247
Merge branch 'main' into unify_freq_strings
mathause Feb 12, 2024
2d4dc43
properly invert if condition
mathause Feb 12, 2024
56711b1
fix more tests
mathause Feb 12, 2024
8c4be8e
fix comment
mathause Feb 12, 2024
6670f28
whats new
mathause Feb 12, 2024
f86355b
Merge branch 'main' into unify_freq_strings
mathause Feb 15, 2024
6a8001b
test pd freq strings are passed through
mathause Feb 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 54 additions & 21 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def _emit_freq_deprecation_warning(deprecated_freq):
emit_user_level_warning(message, FutureWarning)


def to_offset(freq):
def to_offset(freq, warn=True):
"""Convert a frequency string to the appropriate subclass of
BaseCFTimeOffset."""
if isinstance(freq, BaseCFTimeOffset):
Expand All @@ -758,7 +758,7 @@ def to_offset(freq):
raise ValueError("Invalid frequency string provided")

freq = freq_data["freq"]
if freq in _DEPRECATED_FREQUENICES:
if warn and freq in _DEPRECATED_FREQUENICES:
_emit_freq_deprecation_warning(freq)
multiples = freq_data["multiple"]
multiples = 1 if multiples is None else int(multiples)
Expand Down Expand Up @@ -1226,7 +1226,8 @@ def date_range(
start=start,
end=end,
periods=periods,
freq=freq,
# TODO remove translation once requiring pandas >= 2.2
freq=_new_to_legacy_freq(freq),
tz=tz,
normalize=normalize,
name=name,
Expand Down Expand Up @@ -1254,6 +1255,54 @@ def date_range(
)


def _new_to_legacy_freq(freq):
# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
# frequencies, but older versions of pandas do not support these as
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
# we add logic to continue using the deprecated "M" and "Q" frequency
# strings in these circumstances.

# TODO: remove once requiring pandas >= 2.2

if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq)
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
freq = freq.replace("ME", "M")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
freq = freq.replace("QE", "Q")
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
freq = freq.replace("YS", "AS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("Y-", "A-")

return freq


def _legacy_to_new_freq(freq):
# to avoid internal deprecation warnings when freq is determined using pandas < 2.2

# TODO: remove once requiring pandas >= 2.2

if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq, warn=False)
if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq:
freq = freq.replace("M", "ME")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq:
freq = freq.replace("Q", "QE")
elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq:
freq = freq.replace("AS", "YS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("A-", "Y-")

return freq


def date_range_like(source, calendar, use_cftime=None):
"""Generate a datetime array with the same frequency, start and end as
another one, but in a different calendar.
Expand Down Expand Up @@ -1298,24 +1347,8 @@ def date_range_like(source, calendar, use_cftime=None):
"`date_range_like` was unable to generate a range as the source frequency was not inferable."
)

# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
# frequencies, but older versions of pandas do not support these as
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
# we add logic to continue using the deprecated "M" and "Q" frequency
# strings in these circumstances.
if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq)
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
freq = freq.replace("ME", "M")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
freq = freq.replace("QE", "Q")
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
freq = freq.replace("YS", "AS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("Y-", "A-")
# TODO remove once requiring pandas >= 2.2
freq = _legacy_to_new_freq(freq)

use_cftime = _should_cftime_be_used(source, calendar, use_cftime)

Expand Down
4 changes: 3 additions & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np
import pandas as pd

from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.core import dtypes, duck_array_ops, nputils, ops
from xarray.core._aggregations import (
DataArrayGroupByAggregations,
Expand Down Expand Up @@ -532,7 +533,8 @@ def __post_init__(self) -> None:
)
else:
index_grouper = pd.Grouper(
freq=grouper.freq,
# TODO remove once requiring pandas >= 2.2
freq=_new_to_legacy_freq(grouper.freq),
closed=grouper.closed,
label=grouper.label,
origin=grouper.origin,
Expand Down
17 changes: 6 additions & 11 deletions xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,10 +1313,10 @@
assert len(result) == expected_number_of_days


@pytest.mark.parametrize("freq", ["Y", "M", "D"])
@pytest.mark.parametrize("freq", ["Y", "ME", "D"])
def test_dayofweek_after_cftime_range(freq: str) -> None:
result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek
expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 all-but-dask

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 min-all-deps

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 flaky

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.9

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.11

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME
mathause marked this conversation as resolved.
Show resolved Hide resolved
np.testing.assert_array_equal(result, expected)


Expand Down Expand Up @@ -1392,29 +1392,24 @@
)
def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd):
expected_xarray_freq = freq
expected_pandas_freq = freq

# pandas changed what is returned for infer_freq in version 2.2. The
# development version of xarray follows this, but we need to adapt this test
# to still handle older versions of pandas.
if Version(pd.__version__) < Version("2.2"):
if "ME" in freq:
freq = freq.replace("ME", "M")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("ME", "M")
elif "QE" in freq:
freq = freq.replace("QE", "Q")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("QE", "Q")
elif "YS" in freq:
freq = freq.replace("YS", "AS")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("YS", "AS")
elif "Y-" in freq:
freq = freq.replace("Y-", "A-")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("Y-", "A-")
elif "h" in freq:
expected_pandas_freq = freq.replace("h", "H")
else:
raise ValueError(f"Test not implemented for freq {freq!r}")
else:
expected_pandas_freq = freq

source = date_range(start, periods=12, freq=freq, calendar=cal_src)

Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6945,7 +6945,7 @@ def test_differentiate_datetime(dask) -> None:
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_cftime(dask) -> None:
rs = np.random.RandomState(42)
coord = xr.cftime_range("2000", periods=8, freq="2M")
coord = xr.cftime_range("2000", periods=8, freq="2ME")

da = xr.DataArray(
rs.randn(8, 6),
Expand Down
6 changes: 3 additions & 3 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1754,19 +1754,19 @@ def test_resample_doctest(self, use_cftime: bool) -> None:
time=(
"time",
xr.date_range(
"2001-01-01", freq="M", periods=6, use_cftime=use_cftime
"2001-01-01", freq="ME", periods=6, use_cftime=use_cftime
),
),
labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
),
)
actual = da.resample(time="3M").count()
actual = da.resample(time="3ME").count()
expected = DataArray(
[1, 3, 1],
dims="time",
coords={
"time": xr.date_range(
"2001-01-01", freq="3M", periods=3, use_cftime=use_cftime
"2001-01-01", freq="3ME", periods=3, use_cftime=use_cftime
)
},
)
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar):

@requires_cftime
@pytest.mark.parametrize(
("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"])
("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1ME", "1Y"])
)
def test_get_clean_interp_index_dt(cf_da, calendar, freq):
"""In the gregorian case, the index should be proportional to normal datetimes."""
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2955,7 +2955,7 @@ def setUp(self) -> None:
"""
# case for 1d array
data = np.random.rand(4, 12)
time = xr.cftime_range(start="2017", periods=12, freq="1M", calendar="noleap")
time = xr.cftime_range(start="2017", periods=12, freq="1ME", calendar="noleap")
darray = DataArray(data, dims=["x", "time"])
darray.coords["time"] = time

Expand Down
Loading