Skip to content

Commit

Permalink
properly diff objects with arrays as attributes on variables (#9169)
Browse files Browse the repository at this point in the history
* move the attr comparison into a common function

* check that we can actually diff objects with array attrs

* whats-new entry

* Add property test

* Add more dtypes

* Better test

* Fix skip

* Use simple attrs strategy

---------

Co-authored-by: Deepak Cherian <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored Jun 30, 2024
1 parent caed274 commit 3deee7b
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 7 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Bug fixes
~~~~~~~~~
- Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`).
By `Pontus Lurcock <https://github.com/pont-us>`_.
- Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`).
By `Justus Magin <https://github.com/keewis>`_.
- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`).
By `Justus Magin <https://github.com/keewis>`_.

Expand Down
17 changes: 17 additions & 0 deletions properties/test_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest

pytest.importorskip("hypothesis")

from hypothesis import given

import xarray as xr
import xarray.testing.strategies as xrst


@given(attrs=xrst.simple_attrs)
def test_assert_identical(attrs):
v = xr.Variable(dims=(), data=0, attrs=attrs)
xr.testing.assert_identical(v, v.copy(deep=True))

ds = xr.Dataset(attrs=attrs)
xr.testing.assert_identical(ds, ds.copy(deep=True))
18 changes: 12 additions & 6 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,12 @@ def _diff_mapping_repr(
a_indexes=None,
b_indexes=None,
):
def compare_attr(a, b):
if is_duck_array(a) or is_duck_array(b):
return array_equiv(a, b)
else:
return a == b

def extra_items_repr(extra_keys, mapping, ab_side, kwargs):
extra_repr = [
summarizer(k, mapping[k], col_width, **kwargs[k]) for k in extra_keys
Expand Down Expand Up @@ -801,11 +807,7 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs):
is_variable = True
except AttributeError:
# compare attribute value
if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]):
compatible = array_equiv(a_mapping[k], b_mapping[k])
else:
compatible = a_mapping[k] == b_mapping[k]

compatible = compare_attr(a_mapping[k], b_mapping[k])
is_variable = False

if not compatible:
Expand All @@ -821,7 +823,11 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs):

attrs_to_print = set(a_attrs) ^ set(b_attrs)
attrs_to_print.update(
{k for k in set(a_attrs) & set(b_attrs) if a_attrs[k] != b_attrs[k]}
{
k
for k in set(a_attrs) & set(b_attrs)
if not compare_attr(a_attrs[k], b_attrs[k])
}
)
for m in (a_mapping, b_mapping):
attr_s = "\n".join(
Expand Down
6 changes: 5 additions & 1 deletion xarray/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,14 @@ def dimension_sizes(
max_side=2,
max_dims=2,
),
dtype=npst.scalar_dtypes(),
dtype=npst.scalar_dtypes()
| npst.byte_string_dtypes()
| npst.unicode_string_dtypes(),
)
_attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays

simple_attrs = st.dictionaries(_attr_keys, _attr_values)


def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]:
"""
Expand Down
30 changes: 30 additions & 0 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,36 @@ def test_diff_attrs_repr_with_array(self) -> None:
actual = formatting.diff_attrs_repr(attrs_a, attrs_c, "equals")
assert expected == actual

def test__diff_mapping_repr_array_attrs_on_variables(self) -> None:
a = {
"a": xr.DataArray(
dims="x",
data=np.array([1], dtype="int16"),
attrs={"b": np.array([1, 2], dtype="int8")},
)
}
b = {
"a": xr.DataArray(
dims="x",
data=np.array([1], dtype="int16"),
attrs={"b": np.array([2, 3], dtype="int8")},
)
}
actual = formatting.diff_data_vars_repr(a, b, compat="identical", col_width=8)
expected = dedent(
"""\
Differing data variables:
L a (x) int16 2B 1
Differing variable attributes:
b: [1 2]
R a (x) int16 2B 1
Differing variable attributes:
b: [2 3]
""".rstrip()
)

assert actual == expected

def test_diff_dataset_repr(self) -> None:
ds_a = xr.Dataset(
data_vars={
Expand Down

0 comments on commit 3deee7b

Please sign in to comment.