Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support additional dtypes in resample #9413

Merged
merged 18 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ Bug fixes
- Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray``
in NumPy 2.0 (:issue:`9312`, :pull:`9393`)
By `Andrew Scherer <https://github.com/andrew-s28>`_.
- Fix support for using ``pandas.DateOffset``, ``pandas.Timedelta``, and
``datetime.timedelta`` objects as ``resample`` frequencies
(:issue:`9408`, :pull:`9413`).
By `Oliver Higgs <https://github.com/oliverhiggs>`_.

Performance
~~~~~~~~~~~
Expand Down
42 changes: 39 additions & 3 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from collections.abc import Mapping
from datetime import datetime, timedelta
from functools import partial
from typing import TYPE_CHECKING, ClassVar, Literal
from typing import TYPE_CHECKING, ClassVar, Literal, TypeVar

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -80,6 +80,7 @@


DayOption: TypeAlias = Literal["start", "end"]
T_FreqStr = TypeVar("T_FreqStr", str, None)


def _nanosecond_precision_timestamp(*args, **kwargs):
Expand Down Expand Up @@ -772,11 +773,18 @@ def _emit_freq_deprecation_warning(deprecated_freq):
emit_user_level_warning(message, FutureWarning)


def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffset:
def to_offset(
freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset,
warn: bool = True,
) -> BaseCFTimeOffset:
"""Convert a frequency string to the appropriate subclass of
BaseCFTimeOffset."""
if isinstance(freq, BaseCFTimeOffset):
return freq
if isinstance(freq, timedelta | pd.Timedelta):
return delta_to_tick(freq)
if isinstance(freq, pd.DateOffset):
freq = _new_freq(freq.freqstr)
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved

match = re.match(_PATTERN, freq)
if match is None:
Expand All @@ -791,6 +799,34 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs
return _FREQUENCIES[freq](n=multiples)


def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick:
"""Adapted from pandas.tslib.delta_to_tick"""
if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0:
# pandas.Timedelta has nanoseconds, but these are not supported
raise ValueError(
"Unable to convert 'pandas.Timedelta' object with non-zero "
"nanoseconds to 'CFTimeOffset' object"
)
if delta.microseconds == 0:
if delta.seconds == 0:
return Day(n=delta.days)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(n=seconds // 3600)
elif seconds % 60 == 0:
return Minute(n=seconds // 60)
else:
return Second(n=seconds)
else:
# Regardless of the days and seconds this will always be a Millisecond
# or Microsecond object
if delta.microseconds % 1_000 == 0:
return Millisecond(n=delta.microseconds // 1_000)
else:
return Microsecond(n=delta.microseconds)


def to_cftime_datetime(date_str_or_date, calendar=None):
if cftime is None:
raise ModuleNotFoundError("No module named 'cftime'")
Expand Down Expand Up @@ -1332,7 +1368,7 @@ def _new_to_legacy_freq(freq):
return freq


def _legacy_to_new_freq(freq):
def _legacy_to_new_freq(freq: T_FreqStr) -> T_FreqStr:
# to avoid internal deprecation warnings when freq is determined using pandas < 2.2

# TODO: remove once requiring pandas >= 2.2
Expand Down
16 changes: 10 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime
import warnings
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping
from contextlib import suppress
Expand All @@ -13,6 +14,7 @@
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.types import ResampleCompatible
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
Expand All @@ -32,8 +34,6 @@


if TYPE_CHECKING:
import datetime

from numpy.typing import DTypeLike

from xarray.core.dataarray import DataArray
Expand Down Expand Up @@ -891,14 +891,14 @@ def rolling_exp(
def _resample(
self,
resample_cls: type[T_Resample],
indexer: Mapping[Hashable, str | Resampler] | None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None,
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
restore_coord_dims: bool | None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> T_Resample:
"""Returns a Resample object for performing resampling operations.

Expand Down Expand Up @@ -1078,14 +1078,18 @@ def _resample(
)

grouper: Resampler
if isinstance(freq, str):
if isinstance(freq, ResampleCompatible):
grouper = TimeResampler(
freq=freq, closed=closed, label=label, origin=origin, offset=offset
)
elif isinstance(freq, Resampler):
grouper = freq
else:
raise ValueError("freq must be a str or a Resampler object")
raise ValueError(
"freq must be an object of type 'str', 'datetime.timedelta', "
"'pandas.Timedelta', 'pandas.DateOffset', or 'TimeResampler'. "
f"Received {type(freq)} instead."
)

rgrouper = ResolvedGrouper(grouper, group, self)

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
Self,
SideOptions,
T_ChunkDimFreq,
Expand Down Expand Up @@ -7269,15 +7270,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Hashable, str | Resampler] | None = None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DataArrayResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -7288,7 +7289,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -7312,7 +7313,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
SideOptions,
T_ChunkDimFreq,
T_DatasetPadConstantValues,
Expand Down Expand Up @@ -10710,15 +10711,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Any, str | Resampler] | None = None,
indexer: Mapping[Any, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DatasetResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -10729,7 +10730,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -10753,7 +10754,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
4 changes: 2 additions & 2 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
from xarray.core.types import SideOptions

if typing.TYPE_CHECKING:
from xarray.core.types import CFTimeDatetime
from xarray.core.types import CFTimeDatetime, ResampleCompatible


class CFTimeGrouper:
Expand All @@ -75,7 +75,7 @@ class CFTimeGrouper:

def __init__(
self,
freq: str | BaseCFTimeOffset,
freq: ResampleCompatible | BaseCFTimeOffset,
closed: SideOptions | None = None,
label: SideOptions | None = None,
origin: str | CFTimeDatetime = "start_day",
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,5 @@ def copy(
Bins = Union[
int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index
]

ResampleCompatible: TypeAlias = str | datetime.timedelta | pd.Timedelta | pd.DateOffset
20 changes: 16 additions & 4 deletions xarray/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@
import numpy as np
import pandas as pd

from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq
from xarray.core import duck_array_ops
from xarray.core.coordinates import Coordinates
from xarray.core.dataarray import DataArray
from xarray.core.groupby import T_Group, _DummyGroup
from xarray.core.indexes import safe_cast_to_index
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
from xarray.core.types import (
Bins,
DatetimeLike,
GroupIndices,
ResampleCompatible,
SideOptions,
)
from xarray.core.variable import Variable

__all__ = [
Expand Down Expand Up @@ -336,7 +342,7 @@ class TimeResampler(Resampler):

Attributes
----------
freq : str
freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset
Frequency to resample to. See `Pandas frequency
aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
for a list of possible values.
Expand All @@ -358,7 +364,7 @@ class TimeResampler(Resampler):
An offset timedelta added to the origin.
"""

freq: str
freq: ResampleCompatible
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
Expand Down Expand Up @@ -388,6 +394,12 @@ def _init_properties(self, group: T_Group) -> None:
offset=offset,
)
else:
if isinstance(self.freq, BaseCFTimeOffset):
raise ValueError(
"'BaseCFTimeOffset' resample frequencies are only supported "
"when resampling a 'CFTimeIndex'"
)

self.index_grouper = pd.Grouper(
# TODO remove once requiring pandas >= 2.2
freq=_new_to_legacy_freq(self.freq),
Expand Down
Loading
Loading