From dd1e8dcedb82eff34f1b460e24d3eeacf5a17169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 6 Oct 2023 09:38:37 +0200 Subject: [PATCH 1/4] fix regression in time-like check when decoding masked data --- doc/whats-new.rst | 3 +++ xarray/coding/variables.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 55e0fbaf177..568fa62aff6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,6 +49,9 @@ Bug fixes - :py:meth:`DataArray.rename` & :py:meth:`Dataset.rename` would emit a warning when the operation was a no-op. (:issue:`8266`) By `Simon Hansen `_. +- Fixed a regression introduced in the previous release checking time-like units + when encoding/decoding masked data (:issue:`8269`, :pull:`8277`). + By `Kai Mühlbauer `_. Documentation diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c583afc93c2..cbd7f282f16 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -218,7 +218,6 @@ def _apply_mask( def _is_time_like(units): # test for time-like time_strings = [ - "since", "days", "hours", "minutes", @@ -227,7 +226,13 @@ def _is_time_like(units): "microseconds", "nanoseconds", ] - return any(tstr in str(units) for tstr in time_strings) + units = str(units) + # to prevent detecting units like `days accumulated` as time-like + # special casing for datetime-units and timedelta-units (GH-8269) + if "since" in units: + return any(tstr in units for tstr in time_strings) + else: + return any(tstr == units for tstr in time_strings) class CFMaskCoder(VariableCoder): From 379cf8ec980ff48ea6563c0829c1520aa31d9239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 6 Oct 2023 13:21:50 +0200 Subject: [PATCH 2/4] add test --- xarray/tests/test_conventions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 5157688b629..0db7da080d8 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -333,6 +333,16 @@ def test_invalid_time_units_raises_eagerly(self) -> None: with pytest.raises(ValueError, match=r"unable to decode time"): decode_cf(ds) + def test_invalid_timedelta_units_do_not_decode(self) -> None: + # regression test for #8269 + ds = Dataset( + {"time": ("time", [0, 1, 20], {"units": "days invalid", "_FillValue": 20})} + ) + expected = Dataset( + {"time": ("time", [0.0, 1.0, np.nan], {"units": "days invalid"})} + ) + assert_identical(expected, decode_cf(ds)) + @requires_cftime def test_dataset_repr_with_netcdf4_datetimes(self) -> None: # regression test for #347 From 996bafc12c9a199e48f8ac616383a900c4fdd50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Oct 2023 11:33:24 +0200 Subject: [PATCH 3/4] use _unpack_netcdf_time_units to check for proper units-string --- xarray/coding/variables.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index cbd7f282f16..df660f90d9e 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -217,6 +217,8 @@ def _apply_mask( def _is_time_like(units): # test for time-like + if units is None: + return False time_strings = [ "days", "hours", @@ -230,7 +232,13 @@ def _is_time_like(units): # to prevent detecting units like `days accumulated` as time-like # special casing for datetime-units and timedelta-units (GH-8269) if "since" in units: - return any(tstr in units for tstr in time_strings) + from xarray.coding.times import _unpack_netcdf_time_units + + try: + _unpack_netcdf_time_units(units) + except ValueError: + return False + return True else: return any(tstr == units for tstr in time_strings) From d29dcf9373618caca0d39e426035bf4ccdf8ef75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Oct 2023 12:54:30 +0200 Subject: [PATCH 4/4] test with decode_times --- xarray/tests/test_conventions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 0db7da080d8..d6d1303a696 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -333,7 +333,8 @@ def test_invalid_time_units_raises_eagerly(self) -> None: with pytest.raises(ValueError, match=r"unable to decode time"): decode_cf(ds) - def test_invalid_timedelta_units_do_not_decode(self) -> None: + @pytest.mark.parametrize("decode_times", [True, False]) + def test_invalid_timedelta_units_do_not_decode(self, decode_times) -> None: # regression test for #8269 ds = Dataset( {"time": ("time", [0, 1, 20], {"units": "days invalid", "_FillValue": 20})} @@ -341,7 +342,7 @@ def test_invalid_timedelta_units_do_not_decode(self) -> None: expected = Dataset( {"time": ("time", [0.0, 1.0, np.nan], {"units": "days invalid"})} ) - assert_identical(expected, decode_cf(ds)) + assert_identical(expected, decode_cf(ds, decode_times=decode_times)) @requires_cftime def test_dataset_repr_with_netcdf4_datetimes(self) -> None: