Skip to content

Commit

Permalink
Support robustness functions in xs.ensemble_stats (#299)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [x] This PR addresses an already opened issue (for bug fixes /
features)
    - No.
- [x] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [x] This PR does not seem to break the templates.
- [x] CHANGES.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* You can now directly provide an ensemble dataset to
`xs.ensemble_stats`.
* `xs.ensemble_stats` now supports the robustness-related functions from
`xclim`.
* Small bugfix in `publish_release_notes`

### Does this PR introduce a breaking change?

* Removed explicit support for `ref` as it was implemented, since it
actually was completely broken... See #298.

### Other information:
  • Loading branch information
RondeauG authored Dec 7, 2023
2 parents 22ac74f + c9fe2e2 commit 15f054a
Show file tree
Hide file tree
Showing 4 changed files with 291 additions and 33 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ New features and enhancements
* Better ``xs.extract.resample`` : support for weighted resampling operations when starting with frequencies coarser than daily and missing timesteps/values handling. (:issue:`80`, :issue:`93`, :pull:`265`).
* New argument ``attribute_weights`` to ``generate_weights`` to allow for custom weights. (:pull:`252`).
* ``xs.io.round_bits`` to round floating point variable up to a number of bits, allowing for a better compression. This can be combined with the saving step through argument ``"bitround"`` of ``save_to_netcdf`` and ``save_to_zarr``. (:pull:`266`).
* Added the ability to directly provide an ensemble dataset to ``xs.ensemble_stats``. (:pull:`299`).
* Added support in ``xs.ensemble_stats`` for the new robustness-related functions available in `xclim`. (:pull:`299`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
167 changes: 164 additions & 3 deletions tests/test_ensembles.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from copy import deepcopy

import numpy as np
Expand All @@ -7,6 +8,8 @@

import xscen as xs

LOGGER = logging.getLogger(__name__)


class TestEnsembleStats:
@staticmethod
Expand Down Expand Up @@ -34,6 +37,27 @@ def make_ensemble(n):

return ens

def test_input_type(self, tmpdir):
ens_dict = self.make_ensemble(10)
out_dict = xs.ensemble_stats(
ens_dict, statistics={"ensemble_mean_std_max_min": None}
)
ens_ds = xr.concat(ens_dict, dim="realization")
out_ds = xs.ensemble_stats(
ens_ds, statistics={"ensemble_mean_std_max_min": None}
)
paths = []
for i, ds in enumerate(ens_dict):
paths.append(tmpdir / f"ens{i}.zarr")
ds.to_zarr(paths[-1])
out_zarr = xs.ensemble_stats(
paths, statistics={"ensemble_mean_std_max_min": None}
).compute()

assert out_dict.equals(out_ds)
# The zarr introduced some rounding errors
assert np.round(out_zarr, 10).equals(np.round(out_dict, 10))

@pytest.mark.parametrize(
"weights, to_level", [(None, None), (np.arange(1, 11), "for_tests")]
)
Expand Down Expand Up @@ -88,9 +112,99 @@ def test_weights(self, weights, to_level):
decimal=2,
)

def test_change_significance(self):
# TODO: Possible nearby changes to xclim.ensembles.change_significance would break this test.
pass
# FIXME: This function is deprecated, so this test should be removed eventually.
@pytest.mark.parametrize("p_vals", [True, False])
def test_change_significance(self, p_vals):
ens = self.make_ensemble(10)
with pytest.warns(
FutureWarning,
match="Function change_significance is deprecated as of xclim 0.47",
):
out = xs.ensemble_stats(
ens,
statistics={"change_significance": {"test": None, "p_vals": p_vals}},
)

assert len(out.data_vars) == 3 if p_vals else 2

@pytest.mark.parametrize("fractions", ["only", "both", "nested", "missing"])
def test_robustness_input(self, fractions):
ens = self.make_ensemble(10)

weights = None
if fractions == "only":
statistics = {
"robustness_fractions": {"test": "threshold", "abs_thresh": 2.5}
}
elif fractions == "both":
statistics = {
"robustness_fractions": {"test": "threshold", "abs_thresh": 2.5},
"robustness_categories": {
"categories": ["robust", "non-robust"],
"thresholds": [(0.5, 0.5), (0.5, 0.5)],
"ops": [(">=", ">="), ("<", "<")],
},
}
elif fractions == "nested":
weights = xr.DataArray(
[0] * 5 + [1] * 5, dims="realization"
) # Mostly to check that this is put in the nested dict
statistics = {
"robustness_categories": {
"robustness_fractions": {"test": "threshold", "abs_thresh": 2.5},
"categories": ["robust", "non-robust"],
"thresholds": [(0.5, 0.5), (0.5, 0.5)],
"ops": [(">=", ">="), ("<", "<")],
}
}
elif fractions == "missing":
statistics = {
"robustness_categories": {
"categories": ["robust", "non-robust"],
"thresholds": [(0.5, 0.5), (0.5, 0.5)],
"ops": [(">=", ">="), ("<", "<")],
}
}
with pytest.raises(
ValueError,
match="'robustness_categories' requires 'robustness_fractions'",
):
xs.ensemble_stats(ens, statistics=statistics)
return

out = xs.ensemble_stats(ens, statistics=statistics, weights=weights)

assert len(out.data_vars) == {"only": 5, "both": 6, "nested": 1}[fractions]
if fractions in ["only", "both"]:
np.testing.assert_array_equal(out.tg_mean_changed, [0, 0.4, 1, 1])
np.testing.assert_array_equal(out.tg_mean_agree, [1, 1, 1, 1])
if fractions in ["both", "nested"]:
np.testing.assert_array_equal(
out.tg_mean_robustness_categories,
{"both": [99, 99, 1, 1], "nested": [99, 1, 1, 1]}[fractions],
)
np.testing.assert_array_equal(
out.tg_mean_robustness_categories.attrs["flag_descriptions"],
["robust", "non-robust"],
)

@pytest.mark.parametrize("symbol", ["rel.", "relative", "*", "/", "pct.", "abs."])
def test_robustness_reldelta(self, caplog, symbol):
ens = self.make_ensemble(10)
for e in ens:
e["tg_mean"].attrs["delta_kind"] = symbol

with caplog.at_level(logging.INFO):
xs.ensemble_stats(
ens,
statistics={
"robustness_fractions": {"test": "threshold", "abs_thresh": 2.5}
},
)
if symbol in ["rel.", "relative", "*", "/"]:
assert "Relative delta detected" in caplog.text
else:
assert "Relative delta detected" not in caplog.text

@pytest.mark.parametrize("common_attrs_only", [True, False])
def test_common_attrs_only(self, common_attrs_only):
Expand All @@ -107,6 +221,53 @@ def test_common_attrs_only(self, common_attrs_only):
assert out.attrs.get("foo", None) == "bar"
assert ("bar0" not in out.attrs) == common_attrs_only

def test_errors(self):
ens = self.make_ensemble(10)

# Warning if the statistic does not support weighting
weights = xr.DataArray([0] * 5 + [1] * 5, dims="realization")
with pytest.warns(UserWarning, match="Weighting is not supported"):
with pytest.raises(
TypeError
): # kkz is not actually supported here, but it's one of the few that will not support weighting
xs.ensemble_stats(
ens, statistics={"kkz_reduce_ensemble": None}, weights=weights
)

# Error if you try to use a relative delta with a reference dataset
for e in ens:
e["tg_mean"].attrs["delta_kind"] = "rel."
ref = weights
with pytest.raises(
ValueError, match="is a delta, but 'ref' was still specified."
):
xs.ensemble_stats(
ens,
statistics={
"robustness_fractions": {
"test": "threshold",
"abs_thresh": 2.5,
"ref": ref,
}
},
)

# Error if you try to use a robustness_fractions with a reference dataset, but also specify other statistics
with pytest.raises(
ValueError, match="The input requirements for 'robustness_fractions'"
):
xs.ensemble_stats(
ens,
statistics={
"robustness_fractions": {
"test": "threshold",
"abs_thresh": 2.5,
"ref": ref,
},
"ensemble_mean_std_max_min": None,
},
)


class TestGenerateWeights:
@staticmethod
Expand Down
Loading

0 comments on commit 15f054a

Please sign in to comment.