From da78bcc092609bb24aebb5be43f41c95519d78a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 13:30:52 +0100 Subject: [PATCH 01/13] refactor out _maybe_strip_tz_from_timestamp for better readability --- xarray/coding/times.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4622298e152..16ef713b212 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -189,6 +189,14 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date +def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: + # If the ref_date Timestamp is timezone-aware, convert to UTC and + # make it timezone-naive (GH 2649). + if date.tz is not None: + return date.tz_convert("UTC").tz_convert(None) + return date + + def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime @@ -196,10 +204,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(_ref_date) - # If the ref_date Timestamp is timezone-aware, convert to UTC and - # make it timezone-naive (GH 2649). - if ref_date.tz is not None: - ref_date = ref_date.tz_convert(None) + ref_date = _maybe_strip_tz_from_timestamp(ref_date) return time_units, ref_date From b55b9035281fcbd3239a1aa4df9f0caa5a5b1bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:41:01 +0100 Subject: [PATCH 02/13] backwards compatible refactor of time coding, preparing for relaxing nanosecond-restriction --- xarray/coding/times.py | 112 +++++++++++++++++++++--------- xarray/core/pdcompat.py | 17 ++++- xarray/tests/test_coding_times.py | 4 +- 3 files changed, 97 insertions(+), 36 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 16ef713b212..1e652b96200 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -24,7 +24,7 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item -from xarray.core.pdcompat import nanosecond_precision_timestamp +from xarray.core.pdcompat import nanosecond_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type @@ -36,7 +36,11 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, T_DuckArray +from xarray.core.types import ( + CFCalendar, + NPDatetimeUnitOptions, + T_DuckArray, +) T_Name = Union[Hashable, None] @@ -197,15 +201,18 @@ def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: return date -def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: +def _unpack_time_unit_and_ref_date( + units: str, +) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime - time_units, _ref_date = _unpack_netcdf_time_units(units) + time_unit, _ref_date = _unpack_netcdf_time_units(units) + time_unit = _netcdf_to_numpy_timeunit(time_unit) # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(_ref_date) ref_date = _maybe_strip_tz_from_timestamp(ref_date) - return time_units, ref_date + return time_unit, ref_date def _decode_cf_datetime_dtype( @@ -252,6 +259,30 @@ def _decode_datetime_with_cftime( return np.array([], dtype=object) +def _check_date_for_units_since_refdate( + date, unit: str, ref_date: pd.Timestamp +) -> pd.Timestamp: + # check for out-of-bounds floats and raise + if date > np.iinfo("int64").max or date < np.iinfo("int64").min: + raise OutOfBoundsTimedelta( + f"Value {date} can't be represented as Datetime/Timedelta." + ) + delta = date * np.timedelta64(1, unit) + if not np.isnan(delta): + # this will raise on dtype overflow for integer dtypes + if date.dtype.kind in "iu" and not np.int64(delta) == date: + raise OutOfBoundsTimedelta( + "DType overflow in Datetime/Timedelta calculation." + ) + # this will raise on overflow if ref_date + delta + # can't be represented in the current ref_date resolution + return timestamp_as_unit(ref_date + delta, ref_date.unit) + else: + # if date is exactly NaT (np.iinfo("int64").min) return refdate + # to make follow-up checks work + return ref_date + + def _decode_datetime_with_pandas( flat_num_dates: np.ndarray, units: str, calendar: str ) -> np.ndarray: @@ -270,12 +301,8 @@ def _decode_datetime_with_pandas( elif flat_num_dates.dtype.kind == "u": flat_num_dates = flat_num_dates.astype(np.uint64) - time_units, ref_date_str = _unpack_netcdf_time_units(units) - time_units = _netcdf_to_numpy_timeunit(time_units) try: - # TODO: the strict enforcement of nanosecond precision Timestamps can be - # relaxed when addressing GitHub issue #7493. - ref_date = nanosecond_precision_timestamp(ref_date_str) + time_unit, ref_date = _unpack_time_unit_and_ref_date(units) except ValueError as err: # ValueError is raised by pd.Timestamp for non-ISO timestamp # strings, in which case we fall back to using cftime @@ -285,8 +312,12 @@ def _decode_datetime_with_pandas( warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) if flat_num_dates.size > 0: # avoid size 0 datetimes GH1329 - pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date - pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date + _check_date_for_units_since_refdate( + flat_num_dates.min(), time_unit, ref_date + ) + _check_date_for_units_since_refdate( + flat_num_dates.max(), time_unit, ref_date + ) # To avoid integer overflow when converting to nanosecond units for integer # dtypes smaller than np.int64 cast all integer and unsigned integer dtype @@ -299,20 +330,24 @@ def _decode_datetime_with_pandas( elif flat_num_dates.dtype.kind in "f": flat_num_dates = flat_num_dates.astype(np.float64) - # Cast input ordinals to integers of nanoseconds because pd.to_timedelta - # works much faster when dealing with integers (GH 1399). - # properly handle NaN/NaT to prevent casting NaN to int + # keep NaT/nan mask nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min) - flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[time_units] - flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64) - flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min - flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64) + # in case we need to change the unit, we fix the numbers here + # this should be safe, as errors would have been raised above + ns_time_unit = _NS_PER_TIME_DELTA[time_unit] + ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_date.unit] + if flat_num_dates.dtype.kind in "iuf" and (ns_time_unit > ns_ref_date_unit): + flat_num_dates *= np.int64(ns_time_unit / ns_ref_date_unit) + time_unit = ref_date.unit - # Use pd.to_timedelta to safely cast integer values to timedeltas, - # and add those to a Timestamp to safely produce a DatetimeIndex. This - # ensures that we do not encounter integer overflow at any point in the - # process without raising OutOfBoundsDatetime. - return (pd.to_timedelta(flat_num_dates_ns_int, "ns") + ref_date).values + # Cast input ordinals to integers and properly handle NaN/NaT + # to prevent casting NaN to int + flat_num_dates_int = np.zeros_like(flat_num_dates, dtype=np.int64) + flat_num_dates_int[nan] = np.iinfo(np.int64).min + flat_num_dates_int[~nan] = flat_num_dates[~nan].astype(np.int64) + + # cast to timedelta64[time_unit] and add to ref_date + return ref_date + flat_num_dates_int.astype(f"timedelta64[{time_unit}]") def decode_cf_datetime( @@ -344,10 +379,16 @@ def decode_cf_datetime( dates = _decode_datetime_with_cftime( flat_num_dates.astype(float), units, calendar ) + # retrieve cftype + cftype = type(dates[np.nanargmin(num_dates)]) + # "ns" boarders + # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] + lower = cftype(1677, 9, 21, 0, 12, 43, 145224) + upper = cftype(2262, 4, 11, 23, 47, 16, 854775) if ( - dates[np.nanargmin(num_dates)].year < 1678 - or dates[np.nanargmax(num_dates)].year >= 2262 + dates[np.nanargmin(num_dates)] < lower + or dates[np.nanargmax(num_dates)] > upper ): if _is_standard_calendar(calendar): warnings.warn( @@ -768,8 +809,8 @@ def _eagerly_encode_cf_datetime( raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" - time_units, ref_date = _unpack_time_units_and_ref_date(units) - time_delta = _time_units_to_timedelta64(time_units) + time_unit, ref_date = _unpack_time_unit_and_ref_date(units) + time_delta = np.timedelta64(1, time_unit) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from @@ -778,16 +819,17 @@ def _eagerly_encode_cf_datetime( time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 - needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units) + needed_unit, data_ref_date = _unpack_time_unit_and_ref_date(data_units) + needed_units = _numpy_to_netcdf_timeunit(needed_unit) if data_units != units: # this accounts for differences in the reference times ref_delta = abs(data_ref_date - ref_date).to_timedelta64() - data_delta = _time_units_to_timedelta64(needed_units) + data_delta = np.timedelta64(1, needed_unit) if (ref_delta % data_delta) > np.timedelta64(0, "ns"): needed_units = _infer_time_units_from_diff(ref_delta) # needed time delta to encode faithfully to int64 - needed_time_delta = _time_units_to_timedelta64(needed_units) + needed_time_delta = _unit_timedelta_numpy(needed_units) floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: @@ -800,6 +842,7 @@ def _eagerly_encode_cf_datetime( f"Set encoding['dtype'] to floating point dtype to silence this warning." ) elif np.issubdtype(dtype, np.integer) and allow_units_modification: + floor_division = True new_units = f"{needed_units} since {format_timestamp(ref_date)}" emit_user_level_warning( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " @@ -809,9 +852,12 @@ def _eagerly_encode_cf_datetime( ) units = new_units time_delta = needed_time_delta - floor_division = True - num = _division(time_deltas, time_delta, floor_division) + # get resolution of TimedeltaIndex and align time_delta + # todo: check, if this works in any case + num = _division( + time_deltas, time_delta.astype(f"=m8[{time_deltas.unit}]"), floor_division + ) num = reshape(num.values, dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index ae4febd6beb..cefe0c71a0a 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -36,11 +36,13 @@ from __future__ import annotations from enum import Enum -from typing import Literal +from typing import Literal, cast import pandas as pd from packaging.version import Version +from xarray.core.types import PDDatetimeUnitOptions + def count_not_none(*args) -> int: """Compute the number of non-None arguments. @@ -73,6 +75,19 @@ def __repr__(self) -> str: NoDefault = Literal[_NoDefault.no_default] # For typing following pandas +def timestamp_as_unit(date: pd.Timestamp, unit: str) -> pd.Timestamp: + # compatibility function for pandas issue + # where "as_unit" is not defined for pandas.Timestamp + # in pandas versions < 2.2 + # can be removed minimum pandas version is >= 2.2 + unit = cast(PDDatetimeUnitOptions, unit) + if hasattr(date, "as_unit"): + date = date.as_unit(unit) + elif hasattr(date, "_as_unit"): + date = date._as_unit(unit) + return date + + def nanosecond_precision_timestamp(*args, **kwargs) -> pd.Timestamp: """Return a nanosecond-precision Timestamp object. diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9a51ca40d07..685767b71bb 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -167,8 +167,8 @@ def test_decode_cf_datetime_overflow() -> None: units = "days since 2000-01-01 00:00:00" # date after 2262 and before 1678 - days = (-117608, 95795) - expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) + days = (-117710, 95795) + expected = (datetime(1677, 9, 20), datetime(2262, 4, 12)) for i, day in enumerate(days): with warnings.catch_warnings(): From 34ad89eefcbbfd06ba5bd63036f4756bce5a62d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:46:59 +0100 Subject: [PATCH 03/13] add whats-new.rst entry --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dec80590c11..5f3dce53d28 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,6 +65,8 @@ Internal Changes within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. +- Refactor of time coding to prepare for realxing nanosecond restriction (:pull:`9906`). + By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: From 69645eb8d838778f1112bd21e59b4b6d0ed66ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:48:29 +0100 Subject: [PATCH 04/13] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5f3dce53d28..f4cfbafc0ae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,7 +65,7 @@ Internal Changes within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. -- Refactor of time coding to prepare for realxing nanosecond restriction (:pull:`9906`). +- Refactor of time coding to prepare for relaxing nanosecond restriction (:pull:`9906`). By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: From 313bf2e93520123089ee78867d08ee272e14b33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:50:07 +0100 Subject: [PATCH 05/13] Update xarray/coding/times.py --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 1e652b96200..4ee7c839d2c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -381,7 +381,7 @@ def decode_cf_datetime( ) # retrieve cftype cftype = type(dates[np.nanargmin(num_dates)]) - # "ns" boarders + # "ns" borders # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] lower = cftype(1677, 9, 21, 0, 12, 43, 145224) upper = cftype(2262, 4, 11, 23, 47, 16, 854775) From 669539dbc4bb8abb3510610db849ab32ef510c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:48:57 +0100 Subject: [PATCH 06/13] only check for unsigned int --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4ee7c839d2c..fe07f1b7244 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -270,7 +270,7 @@ def _check_date_for_units_since_refdate( delta = date * np.timedelta64(1, unit) if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes - if date.dtype.kind in "iu" and not np.int64(delta) == date: + if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( "DType overflow in Datetime/Timedelta calculation." ) From 678e6d90b9540e37f76c05a9b06a88cade728b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:50:34 +0100 Subject: [PATCH 07/13] return pd.NaT in case of np.iinfo("int64").min --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fe07f1b7244..4ab3a8f5662 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -280,7 +280,7 @@ def _check_date_for_units_since_refdate( else: # if date is exactly NaT (np.iinfo("int64").min) return refdate # to make follow-up checks work - return ref_date + return pd.NaT def _decode_datetime_with_pandas( From fb5a4fb05469b2b23b36ec58cae62071f3b6d60c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:51:52 +0100 Subject: [PATCH 08/13] skip check for `"iuf"`, it's unneeded --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4ab3a8f5662..6886664f951 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -336,7 +336,7 @@ def _decode_datetime_with_pandas( # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_date.unit] - if flat_num_dates.dtype.kind in "iuf" and (ns_time_unit > ns_ref_date_unit): + if ns_time_unit > ns_ref_date_unit: flat_num_dates *= np.int64(ns_time_unit / ns_ref_date_unit) time_unit = ref_date.unit From 70f96d09b66664ae58649e29954ed40bb33912af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:57:49 +0100 Subject: [PATCH 09/13] return pd.Timestamp("NaT") instead pd.NaT (fixes typing) --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 6886664f951..7d393acaddf 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -280,7 +280,7 @@ def _check_date_for_units_since_refdate( else: # if date is exactly NaT (np.iinfo("int64").min) return refdate # to make follow-up checks work - return pd.NaT + return pd.Timestamp("NaT") def _decode_datetime_with_pandas( From de6f3db0a1d354d990fa5b8f40b25e3459d80e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 16:04:33 +0100 Subject: [PATCH 10/13] Update xarray/coding/times.py Co-authored-by: Spencer Clark --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7d393acaddf..326682aab56 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -278,7 +278,7 @@ def _check_date_for_units_since_refdate( # can't be represented in the current ref_date resolution return timestamp_as_unit(ref_date + delta, ref_date.unit) else: - # if date is exactly NaT (np.iinfo("int64").min) return refdate + # if date is exactly NaT (np.iinfo("int64").min) return NaT # to make follow-up checks work return pd.Timestamp("NaT") From 90de1edcacf3829c2f5bb4acd5bea2727deaa34a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 14:16:37 +0000 Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 26840c6fd22..cf55ca1a227 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -262,18 +262,18 @@ def _parse_iso8601(date_type, timestr): replace[attr] = int(value) return default.replace(**replace), resolution - + def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: # If the ref_date Timestamp is timezone-aware, convert to UTC and # make it timezone-naive (GH 2649). if date.tz is not None: return date.tz_convert("UTC").tz_convert(None) - return date - - + return date + + def _unpack_time_unit_and_ref_date( units: str, -) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: +) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime time_unit, _ref_date = _unpack_netcdf_time_units(units) From 21784136c748b4893082a5cdd242cb8f5c136408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 1 Jan 2025 15:21:11 +0100 Subject: [PATCH 12/13] fix indentation --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cf55ca1a227..c02ad902ce1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -263,7 +263,7 @@ def _parse_iso8601(date_type, timestr): return default.replace(**replace), resolution - def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: +def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: # If the ref_date Timestamp is timezone-aware, convert to UTC and # make it timezone-naive (GH 2649). if date.tz is not None: From f4e10de2ce3cf2997b54a3f71390bbf3eafae8d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 2 Jan 2025 15:07:46 +0100 Subject: [PATCH 13/13] add suggestion from code review --- xarray/coding/times.py | 8 +++----- xarray/core/pdcompat.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c02ad902ce1..74ba97af81b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -450,16 +450,14 @@ def decode_cf_datetime( flat_num_dates.astype(float), units, calendar ) # retrieve cftype - cftype = type(dates[np.nanargmin(num_dates)]) + dates_min = dates[np.nanargmin(num_dates)] + cftype = type(dates_min) # "ns" borders # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] lower = cftype(1677, 9, 21, 0, 12, 43, 145224) upper = cftype(2262, 4, 11, 23, 47, 16, 854775) - if ( - dates[np.nanargmin(num_dates)] < lower - or dates[np.nanargmax(num_dates)] > upper - ): + if dates_min < lower or dates[np.nanargmax(num_dates)] > upper: if _is_standard_calendar(calendar): warnings.warn( "Unable to decode time axis into full " diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index cefe0c71a0a..4c54715e2a0 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -36,7 +36,7 @@ from __future__ import annotations from enum import Enum -from typing import Literal, cast +from typing import Literal import pandas as pd from packaging.version import Version @@ -75,12 +75,13 @@ def __repr__(self) -> str: NoDefault = Literal[_NoDefault.no_default] # For typing following pandas -def timestamp_as_unit(date: pd.Timestamp, unit: str) -> pd.Timestamp: - # compatibility function for pandas issue - # where "as_unit" is not defined for pandas.Timestamp - # in pandas versions < 2.2 - # can be removed minimum pandas version is >= 2.2 - unit = cast(PDDatetimeUnitOptions, unit) +def timestamp_as_unit(date: pd.Timestamp, unit: PDDatetimeUnitOptions) -> pd.Timestamp: + """Convert the underlying int64 representation to the given unit. + + Compatibility function for pandas issue where "as_unit" is not defined + for pandas.Timestamp in pandas versions < 2.2. Can be removed minimum + pandas version is >= 2.2. + """ if hasattr(date, "as_unit"): date = date.as_unit(unit) elif hasattr(date, "_as_unit"):