From b5180749d351f8b85fd39677bf137caaa90288a7 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 25 Jun 2024 15:18:53 +0200 Subject: [PATCH 01/13] switch to unit `"D"` (#9170) --- xarray/tests/test_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 3adcc132b61..da9513a7c71 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -84,7 +84,7 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, non_uniform=False if non_uniform: # construct a datetime index that has irregular spacing - deltas = pd.to_timedelta(rs.normal(size=shape[0], scale=10), unit="d") + deltas = pd.to_timedelta(rs.normal(size=shape[0], scale=10), unit="D") coords = {"time": (pd.Timestamp("2000-01-01") + deltas).sort_values()} else: coords = {"time": pd.date_range("2000-01-01", freq="D", periods=shape[0])} From 07b175633eba30dbfcd6eb0cf514ef1b1da9cf64 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 26 Jun 2024 11:05:23 -0700 Subject: [PATCH 02/13] Slightly improve DataTree repr (#9064) * Improve DataTree repr * Adjust DataTree repr to include full path * More tweaks * Use "Group:" in repr instead of "DataTree:" * Fix errors in new repr tests * Fix repr on windows --- xarray/core/datatree.py | 11 ++++--- xarray/core/datatree_render.py | 11 ++++--- xarray/core/formatting.py | 15 +++------ xarray/core/iterators.py | 19 +++++------ xarray/tests/test_datatree.py | 57 +++++++++++++++++++++++++++++++++ xarray/tests/test_formatting.py | 18 ++++++----- 6 files changed, 94 insertions(+), 37 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 4e4d30885a3..c923ca2eb87 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -1314,11 +1314,12 @@ def match(self, pattern: str) -> DataTree: ... } ... ) >>> dt.match("*/B") - DataTree('None', parent=None) - ├── DataTree('a') - │ └── DataTree('B') - └── DataTree('b') - └── DataTree('B') + + Group: / + ├── Group: /a + │ └── Group: /a/B + └── Group: /b + └── Group: /b/B """ matching_nodes = { node.path: node.ds diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index d069071495e..f10f2540952 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -57,11 +57,12 @@ def __init__(self): >>> s0a = DataTree(name="sub0A", parent=s0) >>> s1 = DataTree(name="sub1", parent=root) >>> print(RenderDataTree(root)) - DataTree('root', parent=None) - ├── DataTree('sub0') - │ ├── DataTree('sub0B') - │ └── DataTree('sub0A') - └── DataTree('sub1') + + Group: / + ├── Group: /sub0 + │ ├── Group: /sub0/sub0B + │ └── Group: /sub0/sub0A + └── Group: /sub1 """ super().__init__("\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 ") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ad65a44d7d5..c15df34b5b1 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -1023,20 +1023,21 @@ def diff_datatree_repr(a: DataTree, b: DataTree, compat): def _single_node_repr(node: DataTree) -> str: """Information about this node, not including its relationships to other nodes.""" - node_info = f"DataTree('{node.name}')" - if node.has_data or node.has_attrs: ds_info = "\n" + repr(node.ds) else: ds_info = "" - return node_info + ds_info + return f"Group: {node.path}{ds_info}" def datatree_repr(dt: DataTree): """A printable representation of the structure of this entire tree.""" renderer = RenderDataTree(dt) - lines = [] + name_info = "" if dt.name is None else f" {dt.name!r}" + header = f"" + + lines = [header] for pre, fill, node in renderer: node_repr = _single_node_repr(node) @@ -1051,12 +1052,6 @@ def datatree_repr(dt: DataTree): else: lines.append(f"{fill}{' ' * len(renderer.style.vertical)}{line}") - # Tack on info about whether or not root node has a parent at the start - first_line = lines[0] - parent = f'"{dt.parent.name}"' if dt.parent is not None else "None" - first_line_with_parent = first_line[:-1] + f", parent={parent})" - lines[0] = first_line_with_parent - return "\n".join(lines) diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py index dd5fa7ee97a..ae748b0066c 100644 --- a/xarray/core/iterators.py +++ b/xarray/core/iterators.py @@ -39,15 +39,16 @@ class LevelOrderIter(Iterator): >>> i = DataTree(name="i", parent=g) >>> h = DataTree(name="h", parent=i) >>> print(f) - DataTree('f', parent=None) - ├── DataTree('b') - │ ├── DataTree('a') - │ └── DataTree('d') - │ ├── DataTree('c') - │ └── DataTree('e') - └── DataTree('g') - └── DataTree('i') - └── DataTree('h') + + Group: / + ├── Group: /b + │ ├── Group: /b/a + │ └── Group: /b/d + │ ├── Group: /b/d/c + │ └── Group: /b/d/e + └── Group: /g + └── Group: /g/i + └── Group: /g/i/h >>> [node.name for node in LevelOrderIter(f)] ['f', 'b', 'g', 'a', 'd', 'i', 'c', 'e', 'h'] >>> [node.name for node in LevelOrderIter(f, maxlevel=3)] diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 58fec20d4c6..b0dc2accd3e 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -623,6 +623,63 @@ def test_operation_with_attrs_but_no_data(self): dt.sel(dim_0=0) +class TestRepr: + def test_repr(self): + dt: DataTree = DataTree.from_dict( + { + "/": xr.Dataset( + {"e": (("x",), [1.0, 2.0])}, + coords={"x": [2.0, 3.0]}, + ), + "/b": xr.Dataset({"f": (("y",), [3.0])}), + "/b/c": xr.Dataset(), + "/b/d": xr.Dataset({"g": 4.0}), + } + ) + + result = repr(dt) + expected = dedent( + """ + + Group: / + │ Dimensions: (x: 2) + │ Coordinates: + │ * x (x) float64 16B 2.0 3.0 + │ Data variables: + │ e (x) float64 16B 1.0 2.0 + └── Group: /b + │ Dimensions: (y: 1) + │ Dimensions without coordinates: y + │ Data variables: + │ f (y) float64 8B 3.0 + ├── Group: /b/c + └── Group: /b/d + Dimensions: () + Data variables: + g float64 8B 4.0 + """ + ).strip() + assert result == expected + + result = repr(dt.b) + expected = dedent( + """ + + Group: /b + │ Dimensions: (y: 1) + │ Dimensions without coordinates: y + │ Data variables: + │ f (y) float64 8B 3.0 + ├── Group: /b/c + └── Group: /b/d + Dimensions: () + Data variables: + g float64 8B 4.0 + """ + ).strip() + assert result == expected + + class TestRestructuring: def test_drop_nodes(self): sue = DataTree.from_dict({"Mary": None, "Kate": None, "Ashley": None}) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index b9d5f401a4a..d7a46eeaefc 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -555,16 +555,17 @@ def test_array_scalar_format(self) -> None: def test_datatree_print_empty_node(self): dt: DataTree = DataTree(name="root") - printout = dt.__str__() - assert printout == "DataTree('root', parent=None)" + printout = str(dt) + assert printout == "\nGroup: /" def test_datatree_print_empty_node_with_attrs(self): dat = xr.Dataset(attrs={"note": "has attrs"}) dt: DataTree = DataTree(name="root", data=dat) - printout = dt.__str__() + printout = str(dt) assert printout == dedent( """\ - DataTree('root', parent=None) + + Group: / Dimensions: () Data variables: *empty* @@ -575,9 +576,10 @@ def test_datatree_print_empty_node_with_attrs(self): def test_datatree_print_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) dt: DataTree = DataTree(name="root", data=dat) - printout = dt.__str__() + printout = str(dt) expected = [ - "DataTree('root', parent=None)", + "", + "Group: /", "Dimensions", "Coordinates", "a", @@ -591,8 +593,8 @@ def test_datatree_printout_nested_node(self): dat = xr.Dataset({"a": [0, 2]}) root: DataTree = DataTree(name="root") DataTree(name="results", data=dat, parent=root) - printout = root.__str__() - assert printout.splitlines()[2].startswith(" ") + printout = str(root) + assert printout.splitlines()[3].startswith(" ") def test_datatree_repr_of_node_with_data(self): dat = xr.Dataset({"a": [0, 2]}) From 19d0fbfcbd3bd74f5846569a78ded68810446c48 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 26 Jun 2024 13:14:25 -0500 Subject: [PATCH 03/13] Fix example code formatting for CachingFileManager (#9178) --- xarray/backends/file_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py index df901f9a1d9..86d84f532b1 100644 --- a/xarray/backends/file_manager.py +++ b/xarray/backends/file_manager.py @@ -63,7 +63,7 @@ class CachingFileManager(FileManager): FileManager.close(), which ensures that closed files are removed from the cache as well. - Example usage: + Example usage:: manager = FileManager(open, 'example.txt', mode='w') f = manager.acquire() @@ -71,7 +71,7 @@ class CachingFileManager(FileManager): manager.close() # ensures file is closed Note that as long as previous files are still cached, acquiring a file - multiple times from the same FileManager is essentially free: + multiple times from the same FileManager is essentially free:: f1 = manager.acquire() f2 = manager.acquire() From 651bd12749e56b0b2f992c8cae51dae0ece29c65 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 26 Jun 2024 20:16:09 +0200 Subject: [PATCH 04/13] Change np.core.defchararray to np.char (#9165) (#9166) * Change np.core.defchararray to np.char.chararray (#9165) Replace a reference to np.core.defchararray with np.char.chararray in xarray.testing.assertions, since the former no longer works on NumPy 2.0.0 and the latter is the "preferred alias" according to NumPy docs. See Issue #9165. * Add test for assert_allclose on dtype S (#9165) * Use np.char.decode, not np.char.chararray.decode ... in assertions._decode_string_data. See #9166. * List #9165 fix in whats-new.rst * cross-like the fixed function * Improve a parameter ID in tests.test_assertions Co-authored-by: Justus Magin * whats-new normalization --------- Co-authored-by: Justus Magin --- doc/whats-new.rst | 2 ++ xarray/testing/assertions.py | 2 +- xarray/tests/test_assertions.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c3383a5648a..97631b4c324 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). + By `Pontus Lurcock `_. Documentation diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 69885868f83..2a4c17e115a 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -36,7 +36,7 @@ def wrapper(*args, **kwargs): def _decode_string_data(data): if data.dtype.kind == "S": - return np.core.defchararray.decode(data, "utf-8", "replace") + return np.char.decode(data, "utf-8", "replace") return data diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index aa0ea46f7db..20b5e163662 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -52,6 +52,11 @@ def test_allclose_regression() -> None: xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}), id="Dataset", ), + pytest.param( + xr.DataArray(np.array("a", dtype="|S1")), + xr.DataArray(np.array("b", dtype="|S1")), + id="DataArray_with_character_dtype", + ), ), ) def test_assert_allclose(obj1, obj2) -> None: From fa41cc0454e6daf47d1417f97a9e72ebb56e3add Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 12:23:55 +0200 Subject: [PATCH 05/13] temporarily pin `numpy<2` (#9181) --- ci/requirements/doc.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 39c2d4d6e88..116eee7f702 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,7 +21,7 @@ dependencies: - nbsphinx - netcdf4>=1.5 - numba - - numpy>=1.21 + - numpy>=1.21,<2 - packaging>=21.3 - pandas>=1.4,!=2.1.0 - pooch diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 3b2e6dc62e6..4cdddc676eb 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy + - numpy<2 - packaging - pandas # - pint>=0.22 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 01521e950f4..f1a10bc040b 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy + - numpy<2 - opt_einsum - packaging - pandas From 48a4f7ac6cf20a8b6d0247c701647c67251ded78 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 14:28:48 +0200 Subject: [PATCH 06/13] temporarily remove `pydap` from CI (#9183) (the issue is that with `numpy>=2` `import pydap` succeeds, but `import pydap.lib` raises) --- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 2f47643cc87..119db282ad9 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -27,7 +27,7 @@ dependencies: - pandas - pint>=0.22 - pip - - pydap + # - pydap - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 4cdddc676eb..2eedc9b0621 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -29,7 +29,7 @@ dependencies: # - pint>=0.22 - pip - pre-commit - - pydap + # - pydap - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index f1a10bc040b..317e1fe5f41 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -35,7 +35,7 @@ dependencies: - pooch - pre-commit - pyarrow # pandas raises a deprecation warning without this, breaking doctests - - pydap + # - pydap - pytest - pytest-cov - pytest-env From f4183ec043de97273efdfdd4a33df2c3dc08ddff Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 27 Jun 2024 19:04:16 +0200 Subject: [PATCH 07/13] also pin `numpy` in the all-but-dask CI (#9184) --- ci/requirements/all-but-dask.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 119db282ad9..abf6a88690a 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy + - numpy<2 - packaging - pandas - pint>=0.22 From 42ed6d30e81dce5b9922ac82f76c5b3cd748b19e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 28 Jun 2024 10:18:55 +0200 Subject: [PATCH 08/13] promote floating-point numeric datetimes to 64-bit before decoding (#9182) * promote floating-point dates to 64-bit while decoding * add a test to make sure we don't regress * whats-new entry --- doc/whats-new.rst | 2 ++ xarray/coding/times.py | 2 ++ xarray/tests/test_coding_times.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 97631b4c324..c58f73cb1fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,8 @@ Bug fixes ~~~~~~~~~ - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. +- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). + By `Justus Magin `_. Documentation diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 466e847e003..34d4f9a23ad 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -278,6 +278,8 @@ def _decode_datetime_with_pandas( # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": flat_num_dates = flat_num_dates.astype(np.int64) + elif flat_num_dates.dtype.kind in "f": + flat_num_dates = flat_num_dates.astype(np.float64) # Cast input ordinals to integers of nanoseconds because pd.to_timedelta # works much faster when dealing with integers (GH 1399). diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 09221d66066..393f8400c46 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1182,6 +1182,22 @@ def test_decode_0size_datetime(use_cftime): np.testing.assert_equal(expected, actual) +def test_decode_float_datetime(): + num_dates = np.array([1867128, 1867134, 1867140], dtype="float32") + units = "hours since 1800-01-01" + calendar = "standard" + + expected = np.array( + ["2013-01-01T00:00:00", "2013-01-01T06:00:00", "2013-01-01T12:00:00"], + dtype="datetime64[ns]", + ) + + actual = decode_cf_datetime( + num_dates, units=units, calendar=calendar, use_cftime=False + ) + np.testing.assert_equal(actual, expected) + + @requires_cftime def test_scalar_unit() -> None: # test that a scalar units (often NaN when using to_netcdf) does not raise an error From caed27437cc695e6fc83475c24c9ae2268806f28 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 30 Jun 2024 16:03:46 +0200 Subject: [PATCH 09/13] `"source"` encoding for datasets opened from `fsspec` objects (#8923) * draft for setting `source` from pre-opened `fsspec` file objects * refactor to only import `fsspec` if we're actually going to check Could use `getattr(filename_or_obj, "path", filename_or_obj)` to avoid `isinstance` checks. * replace with a simple `getattr` on `"path"` * add a test * whats-new entry * open the file as a context manager --- doc/whats-new.rst | 2 ++ xarray/backends/api.py | 7 +++++-- xarray/tests/test_backends.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c58f73cb1fa..0174e16602f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,8 @@ New Features ~~~~~~~~~~~~ - Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). By `Martin Raspaud `_. +- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`). + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7054c62126e..521bdf65e6a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -382,8 +382,11 @@ def _dataset_from_backend_dataset( ds.set_close(backend_ds._close) # Ensure source filename always stored in dataset object - if "source" not in ds.encoding and isinstance(filename_or_obj, (str, os.PathLike)): - ds.encoding["source"] = _normalize_path(filename_or_obj) + if "source" not in ds.encoding: + path = getattr(filename_or_obj, "path", filename_or_obj) + + if isinstance(path, (str, os.PathLike)): + ds.encoding["source"] = _normalize_path(path) return ds diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 177700a5404..15485dc178a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5151,6 +5151,21 @@ def test_source_encoding_always_present_with_pathlib() -> None: assert ds.encoding["source"] == tmp +@requires_h5netcdf +@requires_fsspec +def test_source_encoding_always_present_with_fsspec() -> None: + import fsspec + + rnddata = np.random.randn(10) + original = Dataset({"foo": ("x", rnddata)}) + with create_tmp_file() as tmp: + original.to_netcdf(tmp) + + fs = fsspec.filesystem("file") + with fs.open(tmp) as f, open_dataset(f) as ds: + assert ds.encoding["source"] == tmp + + def _assert_no_dates_out_of_range_warning(record): undesired_message = "dates out of range" for warning in record: From 3deee7bb535dba9a48ee590c7f5119a7f2d779be Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sun, 30 Jun 2024 18:46:30 +0200 Subject: [PATCH 10/13] properly diff objects with arrays as attributes on variables (#9169) * move the attr comparison into a common function * check that we can actually diff objects with array attrs * whats-new entry * Add property test * Add more dtypes * Better test * Fix skip * Use simple attrs strategy --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ properties/test_properties.py | 17 +++++++++++++++++ xarray/core/formatting.py | 18 ++++++++++++------ xarray/testing/strategies.py | 6 +++++- xarray/tests/test_formatting.py | 30 ++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 properties/test_properties.py diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0174e16602f..f3ab5d46e1d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,8 @@ Bug fixes ~~~~~~~~~ - Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`). By `Pontus Lurcock `_. +- Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`). + By `Justus Magin `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. diff --git a/properties/test_properties.py b/properties/test_properties.py new file mode 100644 index 00000000000..fc0a1955539 --- /dev/null +++ b/properties/test_properties.py @@ -0,0 +1,17 @@ +import pytest + +pytest.importorskip("hypothesis") + +from hypothesis import given + +import xarray as xr +import xarray.testing.strategies as xrst + + +@given(attrs=xrst.simple_attrs) +def test_assert_identical(attrs): + v = xr.Variable(dims=(), data=0, attrs=attrs) + xr.testing.assert_identical(v, v.copy(deep=True)) + + ds = xr.Dataset(attrs=attrs) + xr.testing.assert_identical(ds, ds.copy(deep=True)) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c15df34b5b1..5c4a3015843 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -765,6 +765,12 @@ def _diff_mapping_repr( a_indexes=None, b_indexes=None, ): + def compare_attr(a, b): + if is_duck_array(a) or is_duck_array(b): + return array_equiv(a, b) + else: + return a == b + def extra_items_repr(extra_keys, mapping, ab_side, kwargs): extra_repr = [ summarizer(k, mapping[k], col_width, **kwargs[k]) for k in extra_keys @@ -801,11 +807,7 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): is_variable = True except AttributeError: # compare attribute value - if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): - compatible = array_equiv(a_mapping[k], b_mapping[k]) - else: - compatible = a_mapping[k] == b_mapping[k] - + compatible = compare_attr(a_mapping[k], b_mapping[k]) is_variable = False if not compatible: @@ -821,7 +823,11 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): attrs_to_print = set(a_attrs) ^ set(b_attrs) attrs_to_print.update( - {k for k in set(a_attrs) & set(b_attrs) if a_attrs[k] != b_attrs[k]} + { + k + for k in set(a_attrs) & set(b_attrs) + if not compare_attr(a_attrs[k], b_attrs[k]) + } ) for m in (a_mapping, b_mapping): attr_s = "\n".join( diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 449d0c793cc..085b70e518b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -192,10 +192,14 @@ def dimension_sizes( max_side=2, max_dims=2, ), - dtype=npst.scalar_dtypes(), + dtype=npst.scalar_dtypes() + | npst.byte_string_dtypes() + | npst.unicode_string_dtypes(), ) _attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays +simple_attrs = st.dictionaries(_attr_keys, _attr_values) + def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: """ diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index d7a46eeaefc..6c49ab456f6 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -399,6 +399,36 @@ def test_diff_attrs_repr_with_array(self) -> None: actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals") assert expected == actual + def test__diff_mapping_repr_array_attrs_on_variables(self) -> None: + a = { + "a": xr.DataArray( + dims="x", + data=np.array([1], dtype="int16"), + attrs={"b": np.array([1, 2], dtype="int8")}, + ) + } + b = { + "a": xr.DataArray( + dims="x", + data=np.array([1], dtype="int16"), + attrs={"b": np.array([2, 3], dtype="int8")}, + ) + } + actual = formatting.diff_data_vars_repr(a, b, compat="identical", col_width=8) + expected = dedent( + """\ + Differing data variables: + L a (x) int16 2B 1 + Differing variable attributes: + b: [1 2] + R a (x) int16 2B 1 + Differing variable attributes: + b: [2 3] + """.rstrip() + ) + + assert actual == expected + def test_diff_dataset_repr(self) -> None: ds_a = xr.Dataset( data_vars={ From fff82539c7b0f045c35ace332c4f6ecb365a0612 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 30 Jun 2024 23:13:15 +0200 Subject: [PATCH 11/13] Allow str in static typing of reindex, ffill etc. (#9194) * allow str in reindex * add whats-new --- doc/whats-new.rst | 3 ++- xarray/core/alignment.py | 6 +++--- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 8 ++++---- xarray/core/resample.py | 16 ++++++++++------ xarray/tests/test_groupby.py | 6 +++--- 6 files changed, 26 insertions(+), 21 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f3ab5d46e1d..ac849c7ec19 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,7 +43,8 @@ Bug fixes By `Justus Magin `_. - Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`). By `Justus Magin `_. - +- Fiy static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`). + By `Michael Niklas `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 13e3400d170..44fc7319170 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -137,7 +137,7 @@ def __init__( exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), method: str | None = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, @@ -965,7 +965,7 @@ def reindex( obj: T_Alignable, indexers: Mapping[Any, Any], method: str | None = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, @@ -1004,7 +1004,7 @@ def reindex_like( obj: T_Alignable, other: Dataset | DataArray, method: str | None = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = dtypes.NA, ) -> T_Alignable: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d3390d26655..b67f8089eb2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1909,7 +1909,7 @@ def reindex_like( other: T_DataArrayOrSet, *, method: ReindexMethodOptions = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value=dtypes.NA, ) -> Self: @@ -1936,7 +1936,7 @@ def reindex_like( - backfill / bfill: propagate next valid index value backward - nearest: use nearest valid index value - tolerance : optional + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. @@ -2096,7 +2096,7 @@ def reindex( indexers: Mapping[Any, Any] | None = None, *, method: ReindexMethodOptions = None, - tolerance: float | Iterable[float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value=dtypes.NA, **indexers_kwargs: Any, @@ -2126,7 +2126,7 @@ def reindex( - backfill / bfill: propagate next valid index value backward - nearest: use nearest valid index value - tolerance : float | Iterable[float] | None, default: None + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0b8be674675..50cfc7b0c29 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3499,7 +3499,7 @@ def reindex_like( self, other: T_Xarray, method: ReindexMethodOptions = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = xrdtypes.NA, ) -> Self: @@ -3526,7 +3526,7 @@ def reindex_like( - "backfill" / "bfill": propagate next valid index value backward - "nearest": use nearest valid index value - tolerance : optional + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. @@ -3569,7 +3569,7 @@ def reindex( self, indexers: Mapping[Any, Any] | None = None, method: ReindexMethodOptions = None, - tolerance: int | float | Iterable[int | float] | None = None, + tolerance: float | Iterable[float] | str | None = None, copy: bool = True, fill_value: Any = xrdtypes.NA, **indexers_kwargs: Any, @@ -3594,7 +3594,7 @@ def reindex( - "backfill" / "bfill": propagate next valid index value backward - "nearest": use nearest valid index value - tolerance : optional + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. diff --git a/xarray/core/resample.py b/xarray/core/resample.py index ceab0a891c9..ec86f2a283f 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -66,12 +66,12 @@ def _drop_coords(self) -> T_Xarray: obj = obj.drop_vars([k]) return obj - def pad(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray: + def pad(self, tolerance: float | Iterable[float] | str | None = None) -> T_Xarray: """Forward fill new values at up-sampled frequency. Parameters ---------- - tolerance : float | Iterable[float] | None, default: None + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels to limit the up-sampling method. Up-sampled data with indices that satisfy the equation @@ -91,12 +91,14 @@ def pad(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray: ffill = pad - def backfill(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray: + def backfill( + self, tolerance: float | Iterable[float] | str | None = None + ) -> T_Xarray: """Backward fill new values at up-sampled frequency. Parameters ---------- - tolerance : float | Iterable[float] | None, default: None + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels to limit the up-sampling method. Up-sampled data with indices that satisfy the equation @@ -116,13 +118,15 @@ def backfill(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray bfill = backfill - def nearest(self, tolerance: float | Iterable[float] | None = None) -> T_Xarray: + def nearest( + self, tolerance: float | Iterable[float] | str | None = None + ) -> T_Xarray: """Take new values from nearest original coordinate to up-sampled frequency coordinates. Parameters ---------- - tolerance : float | Iterable[float] | None, default: None + tolerance : float | Iterable[float] | str | None, default: None Maximum distance between original and new labels to limit the up-sampling method. Up-sampled data with indices that satisfy the equation diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47cda064143..f0a0fd14d9d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2037,17 +2037,17 @@ def test_upsample_tolerance(self) -> None: array = DataArray(np.arange(2), [("time", times)]) # Forward fill - actual = array.resample(time="6h").ffill(tolerance="12h") # type: ignore[arg-type] # TODO: tolerance also allows strings, same issue in .reindex. + actual = array.resample(time="6h").ffill(tolerance="12h") expected = DataArray([0.0, 0.0, 0.0, np.nan, 1.0], [("time", times_upsampled)]) assert_identical(expected, actual) # Backward fill - actual = array.resample(time="6h").bfill(tolerance="12h") # type: ignore[arg-type] # TODO: tolerance also allows strings, same issue in .reindex. + actual = array.resample(time="6h").bfill(tolerance="12h") expected = DataArray([0.0, np.nan, 1.0, 1.0, 1.0], [("time", times_upsampled)]) assert_identical(expected, actual) # Nearest - actual = array.resample(time="6h").nearest(tolerance="6h") # type: ignore[arg-type] # TODO: tolerance also allows strings, same issue in .reindex. + actual = array.resample(time="6h").nearest(tolerance="6h") expected = DataArray([0, 0, np.nan, 1, 1], [("time", times_upsampled)]) assert_identical(expected, actual) From 24ab84cb0dbc2706677bab2e3765050f1d4f9646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dieter=20Werthm=C3=BCller?= Date: Mon, 1 Jul 2024 16:22:59 +0200 Subject: [PATCH 12/13] Fix dark-theme in `html[data-theme=dark]`-tags (#9200) * Fix dark-theme in html tag * Add to release notes --- doc/whats-new.rst | 2 ++ xarray/static/css/style.css | 1 + 2 files changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac849c7ec19..685cdf28194 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,8 @@ Bug fixes By `Justus Magin `_. - Fiy static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`). By `Michael Niklas `_. +- Dark themes are now properly detected for ``html[data-theme=dark]``-tags (:pull:`9200`). + By `Dieter Werthmüller `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index e0a51312b10..dbe61e311c1 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -14,6 +14,7 @@ } html[theme=dark], +html[data-theme=dark], body[data-theme=dark], body.vscode-dark { --xr-font-color0: rgba(255, 255, 255, 1); From 90e44867f7270e7de5e31b8713224039e39d9704 Mon Sep 17 00:00:00 2001 From: Alfonso Ladino Date: Mon, 1 Jul 2024 09:33:15 -0500 Subject: [PATCH 13/13] Add open_datatree benchmark (#9158) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * open_datatree performance improvement on NetCDF files * fixing issue with forward slashes * fixing issue with pytest * open datatree in zarr format improvement * fixing incompatibility in returned object * passing group parameter to opendatatree method and reducing duplicated code * passing group parameter to opendatatree method - NetCDF * Update xarray/backends/netCDF4_.py renaming variables Co-authored-by: Tom Nicholas * renaming variables * renaming variables * renaming group_store variable * removing _open_datatree_netcdf function not used anymore in open_datatree implementations * improving performance of open_datatree method * renaming 'i' variable within list comprehension in open_store method for zarr datatree * using the default generator instead of loading zarr groups in memory * fixing issue with group path to avoid using group[1:] notation. Adding group variable typing hints (str | Iterable[str] | callable) under the open_datatree for h5 files. Finally, separating positional from keyword args * fixing issue with group path to avoid using group[1:] notation and adding group variable typing hints (str | Iterable[str] | callable) under the open_datatree method for netCDF files * fixing issue with group path to avoid using group[1:] notation and adding group variable typing hints (str | Iterable[str] | callable) under the open_datatree method for zarr files * adding 'mode' parameter to open_datatree method * adding 'mode' parameter to H5NetCDFStore.open method * adding new entry related to open_datatree performance improvement * adding new entry related to open_datatree performance improvement * Getting rid of unnecessary parameters for 'open_datatree' method for netCDF4 and Hdf5 backends * passing parent argument into _iter_zarr_groups instead of group[1:] for creating group path * adding benchmark test for opening a deeply nested data tree. This include a new class named 'IONestedDataTree' and another class for benchmarck named 'IOReadDataTreeNetCDF4' * Update doc/whats-new.rst --------- Co-authored-by: Tom Nicholas Co-authored-by: Kai Mühlbauer Co-authored-by: Deepak Cherian --- asv_bench/benchmarks/dataset_io.py | 113 ++++++++++++++++++++++++++++- xarray/backends/zarr.py | 2 +- 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py index dcc2de0473b..0956be67dad 100644 --- a/asv_bench/benchmarks/dataset_io.py +++ b/asv_bench/benchmarks/dataset_io.py @@ -7,6 +7,8 @@ import pandas as pd import xarray as xr +from xarray.backends.api import open_datatree +from xarray.core.datatree import DataTree from . import _skip_slow, parameterized, randint, randn, requires_dask @@ -16,7 +18,6 @@ except ImportError: pass - os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" _ENGINES = tuple(xr.backends.list_engines().keys() - {"store"}) @@ -469,6 +470,116 @@ def create_delayed_write(): return ds.to_netcdf("file.nc", engine="netcdf4", compute=False) +class IONestedDataTree: + """ + A few examples that benchmark reading/writing a heavily nested netCDF datatree with + xarray + """ + + timeout = 300.0 + repeat = 1 + number = 5 + + def make_datatree(self, nchildren=10): + # multiple Dataset + self.ds = xr.Dataset() + self.nt = 1000 + self.nx = 90 + self.ny = 45 + self.nchildren = nchildren + + self.block_chunks = { + "time": self.nt / 4, + "lon": self.nx / 3, + "lat": self.ny / 3, + } + + self.time_chunks = {"time": int(self.nt / 36)} + + times = pd.date_range("1970-01-01", periods=self.nt, freq="D") + lons = xr.DataArray( + np.linspace(0, 360, self.nx), + dims=("lon",), + attrs={"units": "degrees east", "long_name": "longitude"}, + ) + lats = xr.DataArray( + np.linspace(-90, 90, self.ny), + dims=("lat",), + attrs={"units": "degrees north", "long_name": "latitude"}, + ) + self.ds["foo"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="foo", + attrs={"units": "foo units", "description": "a description"}, + ) + self.ds["bar"] = xr.DataArray( + randn((self.nt, self.nx, self.ny), frac_nan=0.2), + coords={"lon": lons, "lat": lats, "time": times}, + dims=("time", "lon", "lat"), + name="bar", + attrs={"units": "bar units", "description": "a description"}, + ) + self.ds["baz"] = xr.DataArray( + randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32), + coords={"lon": lons, "lat": lats}, + dims=("lon", "lat"), + name="baz", + attrs={"units": "baz units", "description": "a description"}, + ) + + self.ds.attrs = {"history": "created for xarray benchmarking"} + + self.oinds = { + "time": randint(0, self.nt, 120), + "lon": randint(0, self.nx, 20), + "lat": randint(0, self.ny, 10), + } + self.vinds = { + "time": xr.DataArray(randint(0, self.nt, 120), dims="x"), + "lon": xr.DataArray(randint(0, self.nx, 120), dims="x"), + "lat": slice(3, 20), + } + root = {f"group_{group}": self.ds for group in range(self.nchildren)} + nested_tree1 = { + f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren) + } + nested_tree2 = { + f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset( + name="a" + ) + for group in range(self.nchildren) + } + nested_tree3 = { + f"group_{group}/subgroup_2/sub-subgroup_1": self.ds + for group in range(self.nchildren) + } + dtree = root | nested_tree1 | nested_tree2 | nested_tree3 + self.dtree = DataTree.from_dict(dtree) + + +class IOReadDataTreeNetCDF4(IONestedDataTree): + def setup(self): + # TODO: Lazily skipped in CI as it is very demanding and slow. + # Improve times and remove errors. + _skip_slow() + + requires_dask() + + self.make_datatree() + self.format = "NETCDF4" + self.filepath = "datatree.nc4.nc" + dtree = self.dtree + dtree.to_netcdf(filepath=self.filepath) + + def time_load_datatree_netcdf4(self): + open_datatree(self.filepath, engine="netcdf4").load() + + def time_open_datatree_netcdf4(self): + open_datatree(self.filepath, engine="netcdf4") + + class IOWriteNetCDFDask: timeout = 60 repeat = 1 diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 9796fcbf9e2..85a1a6e214c 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -446,7 +446,7 @@ def open_store( stacklevel=stacklevel, zarr_version=zarr_version, ) - group_paths = [str(group / node[1:]) for node in _iter_zarr_groups(zarr_group)] + group_paths = [node for node in _iter_zarr_groups(zarr_group, parent=group)] return { group: cls( zarr_group.get(group),