From 2dec6b6f3cbc5d2b37728e76e6f3050422832f36 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 15 Nov 2023 10:24:49 -0500 Subject: [PATCH 001/135] add lafferty --- docs/references.bib | 17 ++++++ xclim/ensembles/_partitioning.py | 89 ++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/docs/references.bib b/docs/references.bib index 4d9afb8ce..9b11742f2 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2086,3 +2086,20 @@ @inbook{ year={2023}, pages={1927–2058} } + +@article{Lafferty2023, + abstract = {Efforts to diagnose the risks of a changing climate often rely on downscaled and bias-corrected climate information, making it important to understand the uncertainties and potential biases of this approach. Here, we perform a variance decomposition to partition uncertainty in global climate projections and quantify the relative importance of downscaling and bias-correction. We analyze simple climate metrics such as annual temperature and precipitation averages, as well as several indices of climate extremes. We find that downscaling and bias-correction often contribute substantial uncertainty to local decision-relevant climate outcomes, though our results are strongly heterogeneous across space, time, and climate metrics. Our results can provide guidance to impact modelers and decision-makers regarding the uncertainties associated with downscaling and bias-correction when performing local-scale analyses, as neglecting to account for these uncertainties may risk overconfidence relative to the full range of possible climate futures.}, + author = {David C. Lafferty and Ryan L. Sriver}, + doi = {10.1038/s41612-023-00486-0}, + issn = {2397-3722}, + issue = {1}, + journal = {npj Climate and Atmospheric Science 2023 6:1}, + keywords = {Atmospheric science,Climate,Climate and Earth system modelling,Projection and prediction,change impacts}, + month = {9}, + pages = {1-13}, + publisher = {Nature Publishing Group}, + title = {Downscaling and bias-correction contribute considerable uncertainty to local climate projections in CMIP6}, + volume = {6}, + url = {https://www.nature.com/articles/s41612-023-00486-0}, + year = {2023}, +} diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 72f4475ce..85d1aa344 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -184,3 +184,92 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): xm = da.sel(time=baseline[1]) - mm xm = xm.drop("time").squeeze() return 1 / (obs + np.abs(xm - obs)) + + +def lafferty_sriver( + da: xr.DataArray, + sm: xr.DataArray = None, + weights: xr.DataArray = None, +): + """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). + + Parameters + ---------- + da: xr.DataArray + Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'. + sm: xr.DataArray + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order + polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial + order, or a LOESS curve. + weights: xr.DataArray + Weights to be applied to individual models. Should have `model` dimension. + + Returns + ------- + xr.DataArray, xr.DataArray + The mean relative to the baseline, and the components of variance of the ensemble. These components are + coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. + + Notes + ----- + To prepare input data, make sure `da` has dimensions `time`, `scenario`, `downscaling` and `model`, + e.g. `da.rename({"experiment": "scenario"})`. + + References + ---------- + :cite:cts:`Lafferty2023` + """ + if xr.infer_freq(da.time)[0] not in ["A", "Y"]: + raise ValueError("This algorithm expects annual time series.") + + if not {"time", "scenario", "model", "downscaling"}.issubset(da.dims): + raise ValueError( + "DataArray dimensions should include 'time', 'scenario', 'downscaling' and 'model'." + ) + + # TODO: add weigths + + if sm is None: + # Fit a 4th order polynomial + fit = da.polyfit(dim="time", deg=4, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( + da.notnull() + ) + + # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference + # between the extracted forced response and the raw outputs, averaged over all outputs" + nv_u = ( + (da - sm) + .rolling(time=11, center=True) + .var(dim="time") + .mean(dim=["scenario", "model", "downscaling"]) + ) + + # Model uncertainty: U_m(t) + model_u = sm.var(dim="model").mean(dim=["scenario", "downscaling"]) + + # Scenario uncertainty: U_s(t) + # TODO: maybe add option for Brekke and Barsugli + scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario") + + # Downscaling uncertainty: U_d(t) + downscaling_u = sm.var(dim="downscaling").mean(dim=["scenario", "model"]) + + # Total uncertainty: T(t) + total = nv_u + scenario_u + model_u + downscaling_u + + # Create output array with the uncertainty components + u = pd.Index( + ["model", "scenario", "downscaling", "variability", "total"], name="uncertainty" + ) + uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) + + # Add the number of instances for each uncertainty component + uncertainty = uncertainty.assign_coords( + num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) + ) + + # Mean projection: G(t) + g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") + + return g, uncertainty From 8c7a7476901762a797b2ffc5f0d955467d89ad45 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 15 Nov 2023 13:49:15 -0500 Subject: [PATCH 002/135] init and doc --- docs/api.rst | 3 +++ xclim/ensembles/__init__.py | 2 +- xclim/ensembles/_partitioning.py | 7 ++++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 65631ee85..ab78a2716 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -65,6 +65,9 @@ Ensembles Module .. autofunction:: xclim.ensembles.hawkins_sutton :noindex: +.. autofunction:: xclim.ensembles.lafferty_sriver + :noindex: + Units Handling Submodule ======================== diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py index 27deb868b..36c46f8e8 100644 --- a/xclim/ensembles/__init__.py +++ b/xclim/ensembles/__init__.py @@ -10,7 +10,7 @@ from __future__ import annotations from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles -from ._partitioning import hawkins_sutton +from ._partitioning import hawkins_sutton, lafferty_sriver from ._reduce import ( kkz_reduce_ensemble, kmeans_reduce_ensemble, diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 85d1aa344..ba8e1c63f 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -3,8 +3,7 @@ Uncertainty Partitioning ======================== -This module implements methods and tools meant to partition climate projection uncertainties into different components: -natural variability, GHG scenario and climate models. +This module implements methods and tools meant to partition climate projection uncertainties into different components. """ @@ -18,6 +17,7 @@ Implemented partitioning algorithms: - `hawkins_sutton` + - `lafferty_sriver` # References for other more recent algorithms that could be added here. @@ -269,7 +269,8 @@ def lafferty_sriver( num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) ) - # Mean projection: G(t) + # Mean projection: + # This is not part of the original algorithm, but we want all partition algos to have similar outputs. g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") return g, uncertainty From 12d0849b905b755f5094acc45f2e97a64f868f58 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 15 Nov 2023 14:40:44 -0500 Subject: [PATCH 003/135] attrs --- xclim/ensembles/_partitioning.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index ba8e1c63f..dfaa4b7be 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -154,6 +154,15 @@ def hawkins_sutton( u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) + # Add the number of instances for each uncertainty component + uncertainty = uncertainty.assign_coords( + num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) + ) + + # attrs for plotting + uncertainty.attrs["long_name"] = "Fraction of the total variance" + uncertainty.attrs["units"] = "%" + # Mean projection: G(t) g = sm.weighted(weights).mean(dim="model").mean(dim="scenario") @@ -269,6 +278,10 @@ def lafferty_sriver( num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) ) + # attrs for plotting + uncertainty.attrs["long_name"] = "Fraction of the total variance" + uncertainty.attrs["units"] = "%" + # Mean projection: # This is not part of the original algorithm, but we want all partition algos to have similar outputs. g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") From da003bbd5bfb318740c3f46b89e03bf50b9207f0 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 15 Nov 2023 14:42:30 -0500 Subject: [PATCH 004/135] remove attrs --- xclim/ensembles/_partitioning.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index dfaa4b7be..63adea1c3 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -159,10 +159,6 @@ def hawkins_sutton( num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) ) - # attrs for plotting - uncertainty.attrs["long_name"] = "Fraction of the total variance" - uncertainty.attrs["units"] = "%" - # Mean projection: G(t) g = sm.weighted(weights).mean(dim="model").mean(dim="scenario") @@ -278,10 +274,6 @@ def lafferty_sriver( num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) ) - # attrs for plotting - uncertainty.attrs["long_name"] = "Fraction of the total variance" - uncertainty.attrs["units"] = "%" - # Mean projection: # This is not part of the original algorithm, but we want all partition algos to have similar outputs. g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") From 8e8441d0bf3e16787a6832faadca3cfcda4d7496 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 30 Nov 2023 07:49:47 -0500 Subject: [PATCH 005/135] add test and weights --- tests/test_partitioning.py | 43 +++++++++++++++++++++++++++++- xclim/ensembles/_partitioning.py | 45 +++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index cce0a3961..0308ba697 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -3,7 +3,7 @@ import numpy as np import xarray as xr -from xclim.ensembles import hawkins_sutton +from xclim.ensembles import hawkins_sutton, lafferty_sriver from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member @@ -67,3 +67,44 @@ def test_hawkins_sutton_synthetic(random): su.sel(time=slice("2020", None)).mean() > su.sel(time=slice("2000", "2010")).mean() ) + + +def test_lafferty_sriver_synthetic(random): + """Test logic of Lafferty & Sriver's implementation using synthetic data.""" + # Time, scenario, model, downscaling + # Here the scenarios don't change over time, so there should be no model variability (since it's relative to the + # reference period. + sm = np.arange(10, 41, 10) # Scenario mean (4) + mm = np.arange(-6, 7, 1) # Model mean (13) + dm = np.arange(-2, 3, 1) # Downscaling mean (5) + mean = ( + dm[np.newaxis, np.newaxis, :] + + mm[np.newaxis, :, np.newaxis] + + sm[:, np.newaxis, np.newaxis] + ) + + # Natural variability + r = random.standard_normal((4, 13, 5, 60)) + + x = r + mean[:, :, :, np.newaxis] + time = xr.date_range("1970-01-01", periods=60, freq="Y") + da = xr.DataArray( + x, dims=("scenario", "model", "downscaling", "time"), coords={"time": time} + ) + m, v = lafferty_sriver(da) + # Mean uncertainty over time + vm = v.mean(dim="time") + + # Check that the mean relative to the baseline is zero + np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) + + # Check that the scenario uncertainty is zero + np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) + + # Check that model uncertainty > variability + assert vm.sel(uncertainty="model") > vm.sel(uncertainty="variability") + + # Smoke test with polynomial of order 2 + fit = da.polyfit(dim="time", deg=2, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(da.notnull()) + hawkins_sutton(da, sm=sm) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 63adea1c3..34d644fb4 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -50,6 +50,8 @@ - evin_2019 """ +# TODO: Add ref for Brekke and Barsugli (2013) + def hawkins_sutton( da: xr.DataArray, @@ -195,6 +197,7 @@ def lafferty_sriver( da: xr.DataArray, sm: xr.DataArray = None, weights: xr.DataArray = None, + bb13: bool = False, ): """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). @@ -206,8 +209,9 @@ def lafferty_sriver( Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial order, or a LOESS curve. - weights: xr.DataArray - Weights to be applied to individual models. Should have `model` dimension. + bb13: bool + Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance + over scenarios is computed before taking the mean over models and downscaling methods. Returns ------- @@ -232,8 +236,6 @@ def lafferty_sriver( "DataArray dimensions should include 'time', 'scenario', 'downscaling' and 'model'." ) - # TODO: add weigths - if sm is None: # Fit a 4th order polynomial fit = da.polyfit(dim="time", deg=4, skipna=True) @@ -242,7 +244,7 @@ def lafferty_sriver( ) # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference - # between the extracted forced response and the raw outputs, averaged over all outputs" + # between the extracted forced response and the raw outputs, averaged over all outputs." nv_u = ( (da - sm) .rolling(time=11, center=True) @@ -250,15 +252,24 @@ def lafferty_sriver( .mean(dim=["scenario", "model", "downscaling"]) ) + # Scenario uncertainty: U_s(t) + if bb13: + scenario_u = sm.var(dim="scenario").mean(dim=["model", "downscaling"]) + else: + scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario") + # Model uncertainty: U_m(t) - model_u = sm.var(dim="model").mean(dim=["scenario", "downscaling"]) - # Scenario uncertainty: U_s(t) - # TODO: maybe add option for Brekke and Barsugli - scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario") + ## Count the number of parent models that have been downscaled using method $d$ for scenario $s$. + ## In the paper, weights are constant, here they may vary across time if there are missing values. + mw = sm.count("model") + model_u = sm.var(dim="model").weighted(mw).mean(dim=["scenario", "downscaling"]) # Downscaling uncertainty: U_d(t) - downscaling_u = sm.var(dim="downscaling").mean(dim=["scenario", "model"]) + dw = sm.count("downscaling") + downscaling_u = ( + sm.var(dim="downscaling").weighted(dw).mean(dim=["scenario", "model"]) + ) # Total uncertainty: T(t) total = nv_u + scenario_u + model_u + downscaling_u @@ -279,3 +290,17 @@ def lafferty_sriver( g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") return g, uncertainty + + +# def _lafferty_sriver_weights(da): +# """Return the weights used in Lefferty and Sriver (2023). +# +# The weights $w_{s,d}$ are given by the number of parent models that have been downscaled using method $d$ for +# scenario $s$. +# """ +# # Count the number of series that have 80% of their data (not in the paper) +# # We don't want to count series with only a few years of data +# valid = (da.count("time") / len(da.time)) > 0.8 +# s = valid.where(valid) +# +# # Count the number of parent models that have been downscaled using method $d$ for scenario $s$. From 20a56f3f3bb5c32a7868f9b93b2a93599056b17a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Mon, 11 Dec 2023 10:33:12 -0500 Subject: [PATCH 006/135] fix window attrs --- CHANGES.rst | 2 +- xclim/indices/_agro.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f69c45d54..2bebde265 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,7 +21,7 @@ Bug fixes * Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform``. (:issue:`1515`, :pull:`1516`). * In the documentation, fixed the tooltips in the indicator search results. (:issue:`1524`, :pull:`1527`). * If chunked inputs are passed to indicators ``mean_radiant_temperature`` and ``potential_evapotranspiration``, sub-calculations of the solar angle will also use the same chunks, instead of a single one of the same size as the data. (:issue:`1536`, :pull:`1542`). -* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :pull:`1538`). +* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :issue:`1552` , :pull:`1538`, :pull:`1538`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index f8571f669..96fa9f795 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -1239,6 +1239,7 @@ def standardized_precipitation_index( spi = standardized_index(pr, params) spi.attrs = params.attrs spi.attrs["freq"] = freq or xarray.infer_freq(spi.time) + spi.attrs["window"] = window spi.attrs["units"] = "" return spi From a67cb1d04f01351af0f3daf925aef4d1519ff9f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Mon, 11 Dec 2023 10:37:38 -0500 Subject: [PATCH 007/135] update CHANGES --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2bebde265..8e5cbb2d1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,7 +21,7 @@ Bug fixes * Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform``. (:issue:`1515`, :pull:`1516`). * In the documentation, fixed the tooltips in the indicator search results. (:issue:`1524`, :pull:`1527`). * If chunked inputs are passed to indicators ``mean_radiant_temperature`` and ``potential_evapotranspiration``, sub-calculations of the solar angle will also use the same chunks, instead of a single one of the same size as the data. (:issue:`1536`, :pull:`1542`). -* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :issue:`1552` , :pull:`1538`, :pull:`1538`). +* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :issue:`1552` , :pull:`1538`, :pull:`1554`). Internal changes ^^^^^^^^^^^^^^^^ From 62e7a09afb6c82e83c91c841af019dcda202aea6 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 11 Dec 2023 16:19:33 -0500 Subject: [PATCH 008/135] Stack periods --- CHANGES.rst | 8 + docs/notebooks/sdba-advanced.ipynb | 102 +--------- tests/test_calendar.py | 35 ++++ tests/test_sdba/test_processing.py | 44 ----- xclim/core/calendar.py | 296 +++++++++++++++++++++++++++++ xclim/sdba/processing.py | 148 +++------------ xclim/testing/helpers.py | 2 +- 7 files changed, 378 insertions(+), 257 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f69c45d54..f0a098aa6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,14 @@ Changelog ========= +v0.48.0 (unreleased) +-------------------- +Contributors to this version: Pascal Bourgault (:user:`aulemahal`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release. + v0.47.0 (2023-12-01) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb index 0354a0eb7..df0899c0e 100644 --- a/docs/notebooks/sdba-advanced.ipynb +++ b/docs/notebooks/sdba-advanced.ipynb @@ -346,21 +346,18 @@ "\n", "Some Adjustment methods require that the adjusted data (`sim`) be of the same length (same number of points) than the training data (`ref` and `hist`). These requirements often ensure conservation of statistical properties and a better representation of the climate change signal over the long adjusted timeseries.\n", "\n", - "In opposition to a conventional \"rolling window\", here it is the _years_ that are the base units of the window, not the elements themselves. `xclim` implements `sdba.construct_moving_yearly_window` and `sdba.unpack_moving_yearly_window` to manipulate data in that goal. The \"construct\" function cuts the data in overlapping windows of a certain length (in years) and stacks them along a new `\"movingdim\"` dimension, alike to xarray's `da.rolling(time=win).construct('movingdim')`, but with yearly steps. The step between each window can also be controlled. This argument is an indicator of how many years overlap between each window. With a value of 1 (the default), a window will have `window - 1` years overlapping with the previous one. `step = window` will result in no overlap at all.\n", + "In opposition to a conventional \"rolling window\", here it is the _years_ that are the base units of the window, not the elements themselves. `xclim` implements `xc.core.calendar.stack_periods` and `xc.core.calendar.unstack_periods` to manipulate data in that goal. The \"stack\" function cuts the data in overlapping windows of a certain length and stacks them along a new `\"period\"` dimension, alike to xarray's `da.rolling(time=win).construct('period')`, but with yearly steps. The stride (or step) between each window can also be controlled. This argument is an indicator of how many years overlap between each window. With a value of 1, a window will have `window - 1` years overlapping with the previous one. The default (`None`) is to have `stride = window` will result in no overlap at all. The default units in which `window` and `stride` are given is a year (\"YS\"), but can be changed with argument `freq`.\n", "\n", - "By default, the result is chunked along this `'movingdim'` dimension. For this reason, the method is expected to be more computationally efficient (when using `dask`) than looping over the windows.\n", + "By chunking the result along this `'period'` dimension, it is expected to be more computationally efficient (when using `dask`) than looping over the windows with a for-loop (or a `GroupyBy`)\n", "\n", "Note that this results in two restrictions:\n", "\n", "1. The constructed array has the same \"time\" axis for all windows. This is a problem if the actual _year_ is of importance for the adjustment, but this is not the case for any of xclim's current adjustment methods.\n", "2. The input timeseries must be in a calendar with uniform year lengths. For daily data, this means only the \"360_day\", \"noleap\" and \"all_leap\" calendars are supported.\n", "\n", - "The \"unpack\" function does the opposite : it concatenates the windows together to recreate the original timeseries.\n", - "The time points that are not part of a window will not appear in the reconstructed timeseries.\n", - "If `append_ends` is True, the reconstructed timeseries will go from the first time point of the first window to the last time point of the last window. In the middle, the central `step` years are kept from each window.\n", - "If `append_ends` is False, only the central `step` years are kept from each window. Which means the final timeseries has `(window - step) / 2` years missing on either side, with the extra year missing on the right in case of an odd `(window - step)`. We are missing data, but the contribution from each window is equal.\n", + "The \"unstack\" function does the opposite : it concatenates the windows together to recreate the original timeseries. It only works for the no-overlap case where `stride = window` and for the non-ambiguous one where `stride` divides `window` into an odd number (N) of parts. In that latter situation, the middle parts of each period are kept when reconstructing the timeseries, in addition to the first (last) parts of the first (last) period needed to get a full timeseries.\n", "\n", - "Here, as `ref` and `hist` cover 15 years, we will use a window of 15 on sim. With a step of two (2), this means the first window goes from 2000 to 2014 (inclusive). The last window goes from 2016 to 2030. `window - step = 13`, so six (6) years will be missing at the beginning of the final `scen` and seven (7) years at the end." + "Here, as `ref` and `hist` cover 15 years, we will use a window of 15 on sim. With a stride of 5 years, this means the first window goes from 2000 to 2014 (inclusive). Then 2005-2019, 2010-2024 and 2015-2029. The last year will be dropped." ] }, { @@ -382,18 +379,9 @@ "metadata": {}, "outputs": [], "source": [ - "sim" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from xclim.sdba import construct_moving_yearly_window, unpack_moving_yearly_window\n", + "from xclim.core.calendar import stack_periods, unstack_periods\n", "\n", - "sim_win = construct_moving_yearly_window(sim, window=15, step=2)\n", + "sim_win = stack_periods(sim, window=15, stride=5)\n", "sim_win" ] }, @@ -401,24 +389,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here, we retrieve the full timeseries." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scen_win = unpack_moving_yearly_window(QDM.adjust(sim_win), append_ends=True)\n", - "scen_win" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Whereas here, we have gaps at the edges." + "Here, we retrieve the full timeseries (minus the lasy year that couldn't fit in any window)." ] }, { @@ -427,63 +398,10 @@ "metadata": {}, "outputs": [], "source": [ - "scen_win = unpack_moving_yearly_window(QDM.adjust(sim_win), append_ends=False)\n", + "scen_win = unstack_periods(QDM.adjust(sim_win))\n", "scen_win" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is another short example, with an uneven number of years. Here `sim` goes from 2000 to 2029 (30 years instead of 31). With a step of 2 and a window of 15, the first window goes again from 2000 to 2014, but the last one is now from 2014 to 2028. The next window would be 2016-2030, but that last year doesn't exist." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sim_win = construct_moving_yearly_window(\n", - " sim.sel(time=slice(\"2000\", \"2029\")), window=15, step=2\n", - ")\n", - "sim_win" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we don't recover the full timeseries, even when we append the ends, because 2029 is not part of a window." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sim2 = unpack_moving_yearly_window(sim_win, append_ends=True)\n", - "sim2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Without appending the ends, the final timeseries is from 2006 to 2021, 6 years missing at the beginning, like last time and **8** years missing at the end." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sim2 = unpack_moving_yearly_window(sim_win, append_ends=False)\n", - "sim2" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -854,9 +772,7 @@ "hist.isel(location=1).plot(label=\"hist\", linewidth=lw)\n", "ref.isel(location=1).plot(label=\"ref\", linewidth=lw)\n", "ref_future.isel(location=1).plot(label=\"ref_future\", linewidth=lw)\n", - "leg = plt.legend()\n", - "for legobj in leg.legendHandles:\n", - " legobj.set_linewidth(2.0)" + "leg = plt.legend()" ] }, { diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 625e603e9..f071127ca 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -29,7 +29,9 @@ max_doy, parse_offset, percentile_doy, + stack_periods, time_bnds, + unstack_periods, ) @@ -703,3 +705,36 @@ def test_convert_doy(): np.testing.assert_allclose( out.isel(lat=0), [31.0, 200.48, 190.0, 59.83607, 299.71885] ) + + +@pytest.mark.parametrize("cftime", [True, False]) +@pytest.mark.parametrize( + "w,s,m,f,ss", + [(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)], +) +def test_stack_periods(tas_series, cftime, w, s, m, f, ss): + da = tas_series(np.arange(365 * 50), cftime=cftime, start="2000-01-01") + + da_stck = stack_periods(da, window=w, stride=s, min_length=m, freq=f) + + assert "period_length" in da_stck.coords + assert bool(da_stck.period.attrs["unequal_periods"]) is (not f.startswith("Y")) + + da2 = unstack_periods(da_stck) + + xr.testing.assert_identical(da2, da.isel(time=slice(ss, da2.time.size + ss))) + + +def test_stack_periods_special(tas_series): + da = tas_series( + np.arange(365 * 48 + 12), cftime=True, start="2004-01-01" + ).convert_calendar("noleap") + + da_stck = stack_periods(da, dim="horizon") + np.testing.assert_array_equal(da_stck.horizon_length, 10950) + + with pytest.raises(ValueError, match="can't find the window"): + unstack_periods(da_stck) + + da2 = unstack_periods(da_stck.drop_vars("horizon_length"), dim="horizon") + xr.testing.assert_identical(da2, da.isel(time=slice(0, da2.time.size))) diff --git a/tests/test_sdba/test_processing.py b/tests/test_sdba/test_processing.py index a59e18313..294ef980c 100644 --- a/tests/test_sdba/test_processing.py +++ b/tests/test_sdba/test_processing.py @@ -10,7 +10,6 @@ from xclim.sdba.base import Grouper from xclim.sdba.processing import ( adapt_freq, - construct_moving_yearly_window, escore, from_additive_space, jitter, @@ -21,7 +20,6 @@ stack_variables, standardize, to_additive_space, - unpack_moving_yearly_window, unstack_variables, unstandardize, ) @@ -282,45 +280,3 @@ def test_stack_variables(open_dataset): ds1p = unstack_variables(da1) xr.testing.assert_equal(ds1, ds1p) - - -@pytest.mark.parametrize( - "window,step,lengths", - [ - (1, 1, 151), - (5, 5, 30), - (10, 10, 15), - (25, 25, 6), - (50, 50, 3), - (None, None, 131), - ], -) -def test_construct_moving_yearly_window(open_dataset, window, step, lengths): - ds = open_dataset("sdba/CanESM2_1950-2100.nc") - - calls = {k: v for k, v in dict(window=window, step=step).items() if v is not None} - da_windowed = construct_moving_yearly_window(ds.tasmax, **calls) - - assert len(da_windowed) == lengths - - -def test_construct_moving_yearly_window_standard_calendar(tasmin_series): - tasmin = tasmin_series(np.zeros(365 * 30), start="1997-01-01", units="degC") - - with pytest.raises(ValueError): - construct_moving_yearly_window(tasmin) - - -@pytest.mark.parametrize("append_ends", [True, False]) -def test_unpack_moving_yearly_window(open_dataset, append_ends): - tasmax = open_dataset("sdba/ahccd_1950-2013.nc").tasmax - - tasmax_windowed = construct_moving_yearly_window(tasmax) - - tx_deconstructed = unpack_moving_yearly_window( - tasmax_windowed, append_ends=append_ends - ) - if append_ends: - np.testing.assert_array_equal(tasmax, tx_deconstructed) - else: - assert len(tx_deconstructed.time) < len(tasmax.time) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index dfbb3cb61..9f8c34998 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -15,6 +15,7 @@ import xarray as xr from xarray.coding.cftime_offsets import to_cftime_datetime from xarray.coding.cftimeindex import CFTimeIndex +from xarray.core import dtypes from xarray.core.resample import DataArrayResample, DatasetResample from xclim.core.utils import DayOfYearStr, uses_dask @@ -1588,3 +1589,298 @@ def get_doys(start, end, inclusive): mask["time"] = da.time return da.where(mask, drop=drop) + + +def _month_is_first_period_month(time, freq): + """Returns True if the given time is from the first month of freq.""" + if isinstance(time, cftime.datetime): + frqM = xr.coding.cftime_offsets.to_offset("MS") + frq = xr.coding.cftime_offsets.to_offset(freq) + if frqM.onOffset(time): + return frq.onOffset(time) + return frq.onOffset(frqM.rollback(time)) + # Pandas + time = pd.Timestamp(time) + frqM = pd.tseries.frequencies.to_offset("MS") + frq = pd.tseries.frequencies.to_offset(freq) + if frqM.is_on_offset(time): + return frq.is_on_offset(time) + return frq.is_on_offset(frqM.rollback(time)) + + +def stack_periods( + da: xr.Dataset | xr.DataArray, + window: int = 30, + stride: int | None = None, + min_length: int | None = None, + freq: str = "YS", + dim: str = "period", + start: str = "1970-01-01", + pad_value=dtypes.NA, +): + """Construct a multi-period array + + Stack different equal-length periods of `da` into a new 'period' dimension. + + This is similar to ``da.rolling(time=window).construct(dim, stride=stride)``, but adapted for arguments + in terms of a base temporal frequency that might be non uniform (years, months, etc). + It is reversible for some cases (see `stride`). A rolling-construct method will be much more performant for uniform periods (days, weeks). + + Parameters + ---------- + da : xr.Dataset or xr.DataArray + An xarray object with a `time` dimension. + Must have an uniform timestep length. + Output might be strange if this does not use an uniform calendar (noleap, 360_day, all_leap). + window : int + The length of the moving window as a multiple of ``freq``. + stride : int, optional + At which interval to take the windows, as a multiple of ``freq``. + For the operation to be reversible with :py:func:`unstack_periods`, it must divide `window` into an odd number of parts. + Default is `window` (no overlap between periods). + min_length : int, optional + Windows shorter than this are not included in the output. + Given as a multiple of ``freq``. Defaults is ``window`` (every window must be complete). + Similar to the ``min_periods`` argument of ``da.rolling``. + If ``freq`` is annual or quarterly and ``min_length == ``window``, the first period is considered complete + if the first timestep is in the first month of the period. + freq : str + Units of ``window``, ``stride`` and ``min_length``, as a frequency string. + Must be larger or equal to the data's sampling frequency. + Note that this function offers an easier interface for non uniform period (like years or months) + but is much slower than a rolling-construct method. + dim : str + The new dimension name. + start : str + The `start` argument passed to :py:func:`xarray.date_range` to generate the new placeholder + time coordinate. + pad_value: Any + When some periods are shorter than others, this value is used to pad them at the end. + Passed directly as argument ``fill_value`` to :py:func:`xarray.concat`, the default is the same as on that function. + + Return + ------ + xr.DataArray + A DataArray with a new `period` dimension and a `time` dimension with the length of the longest window. + The new time coordinate has the same frequency as the input data but is generated using + :py:func:`xarray.date_range` with the given `start` value. + That coordinate is the same for all periods, depending on the choice of ``window`` and ``freq``, it might make sense. + But for unequal periods or non-uniform calendars, it will certainly not. + If ``stride`` is a divisor of ``window``, the correct timeseries can be reconstructed with :py:func:`unstack_periods`. + The coordinate of `period` is the first timestep of each windows. + """ + from xclim.core.units import ( # Import in function to avoid cyclical imports + ensure_cf_units, + infer_sampling_units, + ) + + stride = stride or window + min_length = min_length or window + + srcfreq = xr.infer_freq(da.time) + cal = da.time.dt.calendar + use_cftime = da.time.dtype == "O" + + # Convert integer inputs to freq strings + mult, *args = parse_offset(freq) + win_frq = construct_offset(mult * window, *args) + strd_frq = construct_offset(mult * stride, *args) + minl_frq = construct_offset(mult * min_length, *args) + + # The same time coord as da, but with one extra element. + # This way, the last window's last index is not returned as None by xarray's grouper. + time2 = xr.DataArray( + xr.date_range( + da.time[0].item(), + freq=srcfreq, + calendar=cal, + periods=da.time.size + 1, + use_cftime=use_cftime, + ), + dims=("time",), + name="time", + ) + + periods = [] + longest = 0 + for begin, strd_slc in da.resample(time=strd_frq).groups.items(): + win_resamp = time2.isel(time=slice(strd_slc.start, None)).resample(time=win_frq) + # Get slice for first group + win_slc = win_resamp.groupers[0].group_indices[0] + if min_length < window: + min_resamp = time2.isel(time=slice(strd_slc.start, None)).resample( + time=minl_frq + ) + min_slc = min_resamp.groupers[0].group_indices[0] + open_ended = min_slc.stop is None + else: + # The end of the group slice is None if no outside-group value was found after the last element + # As we added an extra step to time2, we avoid the case where a group ends exactly on the last element of ds. + open_ended = win_slc.stop is None + if open_ended: + # Too short, we got to the end + break + if ( + strd_slc.start == 0 + and parse_offset(freq)[1] in "YAQ" + and min_length == window + and not _month_is_first_period_month(da.time[0].item(), freq) + ): + # For annual or quartely frequencies (which can be anchor-based), if the first time is not in the first month of the first period, + # then the first period is incomplete but by a fractional amount. + continue + periods.append( + slice( + strd_slc.start + win_slc.start, + (strd_slc.start + win_slc.stop) + if win_slc.stop is not None + else da.time.size, + ) + ) + + # Make coordinates + lengths = xr.DataArray( + [slc.stop - slc.start for slc in periods], + dims=(dim,), + attrs={"long_name": "Length of each period"}, + ) + longest = lengths.max().item() + # Length as a pint-ready array : with proper units, but values are not usable as indexes anymore + m, u = infer_sampling_units(da) + lengths = lengths * m + lengths.attrs["units"] = ensure_cf_units(u) + # Start points for each periods + remember parameters for unstacking + starts = xr.DataArray( + [da.time[slc.start].item() for slc in periods], + dims=(dim,), + attrs={ + "long_name": "Start of the period", + # Save parameters so that we can unstack. + "window": window, + "stride": stride, + "freq": freq, + "unequal_lengths": int(len(np.unique(lengths)) > 1), + }, + ) + # The "fake" axis that all periods share + fake_time = xr.date_range( + start, periods=longest, freq=srcfreq, calendar=cal, use_cftime=use_cftime + ) + # Slice and concat along new dim. We drop the index and add a new one so that xarray can concat them together. + out = xr.concat( + [ + da.isel(time=slc) + .drop_vars("time") + .assign_coords(time=np.arange(slc.stop - slc.start)) + for slc in periods + ], + dim, + join="outer", + fill_value=pad_value, + ) + out = out.assign_coords( + time=(("time",), fake_time, da.time.attrs.copy()), + **{f"{dim}_length": lengths, dim: starts}, + ) + out.time.attrs.update(long_name="Placeholder time axis") + return out + + +def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): + """Unstack an array constructed with :py:func:`stack_periods`. + + Can only work with periods stacked with a ``stride`` that divides ``window`` in a odd number of sections. + When ``stride`` is smaller than ``window``, only the centermost stride of each window is kept, + except for the beginning and end which are taken from the first and last windows. + + Parameters + ---------- + da : xr.DataArray + As constructed by :py:func:`stack_periods`, attributes of the period coordinates must have been perserved. + dim : str + The period dimension name. + """ + from xclim.core.units import infer_sampling_units + + try: + starts = da[dim] + window = starts.attrs["window"] + stride = starts.attrs["stride"] + freq = starts.attrs["freq"] + unequal_lengths = bool(starts.attrs["unequal_lengths"]) + except (AttributeError, KeyError) as err: + raise ValueError( + f"`unstack_periods` can't find the window, stride and freq attributes on the {dim} coordiantes." + ) from err + + if unequal_lengths: + try: + lengths = da[f"{dim}_length"] + except KeyError as err: + raise ValueError( + f"`unstack_periods` can't find the `{dim}_length` coordinate." + ) from err + # Get length as number of points + m, u = infer_sampling_units(da.time) + lengths = lengths // m + else: + lengths = xr.DataArray([da.time.size] * da[dim].size, dims=(dim,)) + + time_as_delta = da.time - da.time[0] + if da.time.dtype == "O": + # cftime can't add with np.timedelta64 (restriction comes from numpy which refuses to add O with m8) + time_as_delta = pd.TimedeltaIndex( + time_as_delta + ).to_pytimedelta() # this array is O, numpy complies + else: + # Xarray will return int when iterating over datetime values, this returns timestamps + starts = pd.DatetimeIndex(starts) + + def _reconstruct_time(start): + times = time_as_delta + start + return xr.DataArray(times, dims=("time",), coords={"time": times}, name="time") + + # Easy case: + if window == stride: + # just concat them all + periods = [] + for i, (start, length) in enumerate(zip(starts.values, lengths.values)): + real_time = _reconstruct_time(start) + periods.append( + da.isel(**{dim: i}, drop=True) + .isel(time=slice(0, length)) + .assign_coords(time=real_time.isel(time=slice(0, length))) + ) + return xr.concat(periods, "time") + + # Difficult and ambiguous case + if (window / stride) % 2 != 1: + raise NotImplementedError( + "`unstack_periods` can't work with strides that do not divide the window into an odd number of parts." + f"Got {window} / {stride} which is not an odd integer." + ) + + # Non-ambiguous overlapping case + Nwin = window // stride + mid = (Nwin - 1) // 2 # index of the center window + + mult, *args = parse_offset(freq) + strd_frq = construct_offset(mult * stride, *args) + + periods = [] + for i, (start, length) in enumerate(zip(starts.values, lengths.values)): + real_time = _reconstruct_time(start) + slices = real_time.resample(time=strd_frq).groupers[0].group_indices + if i == 0: + slc = slice(slices[0].start, min(slices[mid].stop, length)) + elif i == da.period.size - 1: + slc = slice(slices[mid].start, min(slices[Nwin - 1].stop or length, length)) + else: + slc = slice(slices[mid].start, min(slices[mid].stop, length)) + periods.append( + da.isel(**{dim: i}, drop=True) + .isel(time=slc) + .assign_coords(time=real_time.isel(time=slc)) + ) + + return xr.concat(periods, "time") diff --git a/xclim/sdba/processing.py b/xclim/sdba/processing.py index 08ddddb76..f6d833ae8 100644 --- a/xclim/sdba/processing.py +++ b/xclim/sdba/processing.py @@ -12,7 +12,13 @@ import xarray as xr from xarray.core.utils import get_temp_dimname -from xclim.core.calendar import get_calendar, max_doy, parse_offset +from xclim.core.calendar import ( + get_calendar, + max_doy, + parse_offset, + stack_periods, + unstack_periods, +) from xclim.core.formatting import update_xclim_history from xclim.core.units import convert_units_to, infer_context, units from xclim.core.utils import uses_dask @@ -480,133 +486,37 @@ def _get_number_of_elements_by_year(time): def construct_moving_yearly_window( da: xr.Dataset, window: int = 21, step: int = 1, dim: str = "movingwin" ): - """Construct a moving window DataArray. - - Stack windows of `da` in a new 'movingwin' dimension. - Windows are always made of full years, so calendar with non-uniform year lengths are not supported. - - Windows are constructed starting at the beginning of `da`, if number of given years is not - a multiple of `step`, then the last year(s) will be missing as a supplementary window would be incomplete. - - Parameters - ---------- - da : xr.Dataset - A DataArray with a `time` dimension. - window : int - The length of the moving window as a number of years. - step : int - The step between each window as a number of years. - dim : str - The new dimension name. If given, must also be given to `unpack_moving_yearly_window`. - - Return - ------ - xr.DataArray - A DataArray with a new `movingwin` dimension and a `time` dimension with a length of 1 window. - This assumes downstream algorithms do not make use of the _absolute_ year of the data. - The correct timeseries can be reconstructed with :py:func:`unpack_moving_yearly_window`. - The coordinates of `movingwin` are the first date of the windows. + """Deprecated function. + Use :py:func:`xclim.core.calendar.stack_periods` instead, renaming ``step`` to ``stride``. + Beware of the different default value for `dim` ("period"). """ - # Get number of samples per year (and perform checks) - N_in_year = _get_number_of_elements_by_year(da.time) - - # Number of samples in a window - N = window * N_in_year - - first_slice = da.isel(time=slice(0, N)) - first_slice = first_slice.expand_dims({dim: np.atleast_1d(first_slice.time[0])}) - daw = [first_slice] - - i_start = N_in_year * step - # This is the first time I use `while` in real python code. What an event. - while i_start + N <= da.time.size: - # Cut and add _full_ slices only, partial window are thrown out - # Use isel so that we don't need to deal with a starting date. - slc = da.isel(time=slice(i_start, i_start + N)) - slc = slc.expand_dims({dim: np.atleast_1d(slc.time[0])}) - slc["time"] = first_slice.time - daw.append(slc) - i_start += N_in_year * step - - daw = xr.concat(daw, dim) - return daw + warnings.warn( + FutureWarning, + ( + "`construct_moving_yearly_window` is deprecated and will be removed in a future version. " + f"Please use xclim.core.calendar.stack_periods(da, window={window}, stride={step}, dim='{dim}', freq='YS') instead." + ), + ) + return stack_periods(da, window=window, stride=step, dim=dim, freq="YS") def unpack_moving_yearly_window( da: xr.DataArray, dim: str = "movingwin", append_ends: bool = True ): - """Unpack a constructed moving window dataset to a normal timeseries, only keeping the central data. - - Unpack DataArrays created with :py:func:`construct_moving_yearly_window` and recreate a timeseries data. - If `append_ends` is False, only keeps the central non-overlapping years. The final timeseries will be - (window - step) years shorter than the initial one. If `append_ends` is True, the time points from first and last - windows will be included in the final timeseries. - - The time points that are not in a window will never be included in the final timeseries. - The window length and window step are inferred from the coordinates. - - Parameters - ---------- - da : xr.DataArray - As constructed by :py:func:`construct_moving_yearly_window`. - dim : str - The window dimension name as given to the construction function. - append_ends : bool - Whether to append the ends of the timeseries - If False, the final timeseries will be (window - step) years shorter than the initial one, - but all windows will contribute equally. - If True, the year before the middle years of the first window and the years after the middle years of the last - window are appended to the middle years. The final timeseries will be the same length as the initial timeseries - if the windows span the whole timeseries. - The time steps that are not in a window will be left out of the final timeseries. + """Deprecated function. + Use :py:func:`xclim.core.calendar.unstack_periods` instead. + Beware of the different default value for `dim` ("period"). The new function always behaves like ``appends_ends=True``. """ - # Get number of samples by year (and perform checks) - N_in_year = _get_number_of_elements_by_year(da.time) - - # Might be smaller than the original moving window, doesn't matter - window = da.time.size / N_in_year - - if window % 1 != 0: - warnings.warn( - f"Incomplete data received as number of years covered is not an integer ({window})" - ) - - # Get step in number of years - days_in_year = max_doy[get_calendar(da)] - step = np.unique(da[dim].diff(dim).dt.days / days_in_year) - if len(step) > 1: - raise ValueError("The spacing between the windows is not equal.") - step = int(step[0]) - - # Which years to keep: length step, in the middle of window - left = int((window - step) // 2) # first year to keep - - # Keep only the middle years - da_mid = da.isel(time=slice(left * N_in_year, (left + step) * N_in_year)) - - out = [] - for win_start in da_mid[dim]: - slc = da_mid.sel({dim: win_start}).drop_vars(dim) - dt = win_start.values - da_mid[dim][0].values - slc["time"] = slc.time + dt - out.append(slc) - - if append_ends: - # add front end at the front - out.insert( - 0, da.isel({dim: 0, "time": slice(None, left * N_in_year)}).drop_vars(dim) - ) - # add back end at the back - back_end = da.isel( - {dim: -1, "time": slice((left + step) * N_in_year, None)} - ).drop_vars(dim) - dt = da.isel({dim: -1})[dim].values - da.isel({dim: 0})[dim].values - back_end["time"] = back_end.time + dt - out.append(back_end) - - return xr.concat(out, "time") + warnings.warn( + FutureWarning, + ( + "`unpack_moving_yearly_window` is deprecated and will be removed in a future version. " + f"Please use xclim.core.calendar.unstack_periods(da, dim='{dim}') instead." + ), + ) + return unstack_periods(da, dim=dim) @update_xclim_history diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 85d1f953a..3b135b6b5 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -221,7 +221,7 @@ def add_example_file_paths(cache_dir: Path) -> dict[str]: def test_timeseries( values, variable, - start="7/1/2000", + start="2000-01-07", units=None, freq="D", as_dataset=False, From c508e2731218fc9d0d54071da904a32d28549893 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 11 Dec 2023 16:22:29 -0500 Subject: [PATCH 009/135] add comments to stack_periods --- xclim/core/calendar.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 9f8c34998..838813e64 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1703,11 +1703,13 @@ def stack_periods( periods = [] longest = 0 + # Iterate over strides, but recompute the full window for each stride start for begin, strd_slc in da.resample(time=strd_frq).groups.items(): win_resamp = time2.isel(time=slice(strd_slc.start, None)).resample(time=win_frq) # Get slice for first group win_slc = win_resamp.groupers[0].group_indices[0] if min_length < window: + # If we ask for a min_length period instead is it complete ? min_resamp = time2.isel(time=slice(strd_slc.start, None)).resample( time=minl_frq ) @@ -1824,8 +1826,10 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): m, u = infer_sampling_units(da.time) lengths = lengths // m else: + # It is acceptable to lose "{dim}_length" if they were all equal lengths = xr.DataArray([da.time.size] * da[dim].size, dims=(dim,)) + # Convert from the fake axis to the real one time_as_delta = da.time - da.time[0] if da.time.dtype == "O": # cftime can't add with np.timedelta64 (restriction comes from numpy which refuses to add O with m8) From a0939af22502dfaf380f70da6595ae5be028a4dd Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 11 Dec 2023 16:37:24 -0500 Subject: [PATCH 010/135] info for figanos + fraction arg --- xclim/ensembles/_partitioning.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 34d644fb4..04cde4300 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -59,6 +59,7 @@ def hawkins_sutton( weights: xr.DataArray = None, baseline: tuple = ("1971", "2000"), kind: str = "+", + fraction: bool = False, ): """Return the mean and partitioned variance of an ensemble based on method from Hawkins & Sutton (2009). @@ -76,6 +77,9 @@ def hawkins_sutton( Start and end year of the reference period. kind: {'+', '*'} Whether the mean over the reference period should be substracted (+) or divided by (*). + fraction: + If True, return the fraction of the total variance instead of the variance itself. + Use this option if you want to use `figanos.partition()`. Returns ------- @@ -156,9 +160,14 @@ def hawkins_sutton( u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) - # Add the number of instances for each uncertainty component + if fraction: + uncertainty = uncertainty / uncertainty.sel(uncertainty="total") * 100 + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + + # Add the elements for each uncertainty component uncertainty = uncertainty.assign_coords( - num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) + elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) ) # Mean projection: G(t) @@ -198,6 +207,7 @@ def lafferty_sriver( sm: xr.DataArray = None, weights: xr.DataArray = None, bb13: bool = False, + fraction: bool = False, ): """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). @@ -212,6 +222,9 @@ def lafferty_sriver( bb13: bool Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance over scenarios is computed before taking the mean over models and downscaling methods. + fraction: + If True, return the fraction of the total variance instead of the variance itself. + Use this option if ou want to use `figanos.partition()`. Returns ------- @@ -280,9 +293,14 @@ def lafferty_sriver( ) uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) - # Add the number of instances for each uncertainty component + if fraction: + uncertainty = uncertainty / uncertainty.sel(uncertainty="total") * 100 + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + + # Add the elements for each uncertainty component uncertainty = uncertainty.assign_coords( - num=("uncertainty", [int(len(da[v])) if v in da.dims else 0 for v in u]) + elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) ) # Mean projection: From 7a4e87e9c8f65e25b21ec1fe948db56460a56445 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 11 Dec 2023 17:22:01 -0500 Subject: [PATCH 011/135] Fix tests --- tests/test_calendar.py | 2 +- xclim/testing/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index f071127ca..4a547f79b 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -718,7 +718,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss): da_stck = stack_periods(da, window=w, stride=s, min_length=m, freq=f) assert "period_length" in da_stck.coords - assert bool(da_stck.period.attrs["unequal_periods"]) is (not f.startswith("Y")) + assert bool(da_stck.period.attrs["unequal_lengths"]) is (not f.startswith("Y")) da2 = unstack_periods(da_stck) diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 3b135b6b5..08fae0661 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -221,7 +221,7 @@ def add_example_file_paths(cache_dir: Path) -> dict[str]: def test_timeseries( values, variable, - start="2000-01-07", + start="2000-07-01", units=None, freq="D", as_dataset=False, From 6c20ad9f49291377ead4978b75a77382a15f120e Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 11 Dec 2023 17:33:34 -0500 Subject: [PATCH 012/135] Rephrase and add warning about nonuniform years in notebook --- docs/notebooks/sdba-advanced.ipynb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb index df0899c0e..3e3d95098 100644 --- a/docs/notebooks/sdba-advanced.ipynb +++ b/docs/notebooks/sdba-advanced.ipynb @@ -357,7 +357,13 @@ "\n", "The \"unstack\" function does the opposite : it concatenates the windows together to recreate the original timeseries. It only works for the no-overlap case where `stride = window` and for the non-ambiguous one where `stride` divides `window` into an odd number (N) of parts. In that latter situation, the middle parts of each period are kept when reconstructing the timeseries, in addition to the first (last) parts of the first (last) period needed to get a full timeseries.\n", "\n", - "Here, as `ref` and `hist` cover 15 years, we will use a window of 15 on sim. With a stride of 5 years, this means the first window goes from 2000 to 2014 (inclusive). Then 2005-2019, 2010-2024 and 2015-2029. The last year will be dropped." + "Quantile Delta Mapping requires that the adjustment period should be of a length similar to the training one. As our `ref` and `hist` cover 15 years, we will transform `sim` by stacking windows of 15 years. With a stride of 5 years, this means the first window goes from 2000 to 2014 (inclusive). Then 2005-2019, 2010-2024 and 2015-2029. The last year will be dropped.\n", + "\n", + "
\n", + "\n", + "In the following example, `QDM` is configurated with `group=\"time.dayofyear\"` which will perform the adjustment for each day of year (doy) separately. When using `stack_periods` the extracted windows are all concatenated along the new `period` axis and they all share the same time coordinate. As such, for the doy information to make sense, we must use a calendar with uniform year lengths. Otherwise, the doys would shift one day at each leap year.\n", + "\n", + "
" ] }, { From 40e1d9f729f49d470934dc434c0d5adab2da38b8 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 11 Dec 2023 18:14:05 -0500 Subject: [PATCH 013/135] Remove wrong test line - more words in explanation --- docs/notebooks/sdba-advanced.ipynb | 2 +- tests/test_calendar.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb index 3e3d95098..3ee096608 100644 --- a/docs/notebooks/sdba-advanced.ipynb +++ b/docs/notebooks/sdba-advanced.ipynb @@ -357,7 +357,7 @@ "\n", "The \"unstack\" function does the opposite : it concatenates the windows together to recreate the original timeseries. It only works for the no-overlap case where `stride = window` and for the non-ambiguous one where `stride` divides `window` into an odd number (N) of parts. In that latter situation, the middle parts of each period are kept when reconstructing the timeseries, in addition to the first (last) parts of the first (last) period needed to get a full timeseries.\n", "\n", - "Quantile Delta Mapping requires that the adjustment period should be of a length similar to the training one. As our `ref` and `hist` cover 15 years, we will transform `sim` by stacking windows of 15 years. With a stride of 5 years, this means the first window goes from 2000 to 2014 (inclusive). Then 2005-2019, 2010-2024 and 2015-2029. The last year will be dropped.\n", + "Quantile Delta Mapping requires that the adjustment period should be of a length similar to the training one. As our `ref` and `hist` cover 15 years but `sim` covers 31 years, we will transform `sim` by stacking windows of 15 years. With a stride of 5 years, this means the first window goes from 2000 to 2014 (inclusive). Then 2005-2019, 2010-2024 and 2015-2029. The last year will be dropped as it can't be included in any complete window.\n", "\n", "
\n", "\n", diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 4a547f79b..46f116828 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -718,7 +718,6 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss): da_stck = stack_periods(da, window=w, stride=s, min_length=m, freq=f) assert "period_length" in da_stck.coords - assert bool(da_stck.period.attrs["unequal_lengths"]) is (not f.startswith("Y")) da2 = unstack_periods(da_stck) From 2697210413f5099776174afa98e96748ebddb746 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Tue, 12 Dec 2023 09:58:01 -0500 Subject: [PATCH 014/135] mention xscen and figanos functions in doc --- docs/notebooks/partitioning.ipynb | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/partitioning.ipynb b/docs/notebooks/partitioning.ipynb index 76ccf61f6..01bd60004 100644 --- a/docs/notebooks/partitioning.ipynb +++ b/docs/notebooks/partitioning.ipynb @@ -79,7 +79,11 @@ "source": [ "## Create an ensemble \n", "\n", - "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. " + "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. \n", + "\n", + "
\n", + "Note that the [xscen library](https://xscen.readthedocs.io/en/latest/index.html) provides a helper function `xscen.ensembles.get_partition_input` to build partition ensembles.\n", + "
" ] }, { @@ -137,7 +141,11 @@ "id": "41af418d-9e92-433c-800c-6ba28ff7684c", "metadata": {}, "source": [ - "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. " + "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. \n", + "\n", + "
\n", + "Note that the [figanos library](https://figanos.readthedocs.io/en/latest/) provides a function `fg.partition` to plot the graph below.\n", + "
" ] }, { @@ -238,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.9.13" } }, "nbformat": 4, From 56670954be2a878a526651f29707b72579472ab0 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 12 Dec 2023 17:03:12 -0500 Subject: [PATCH 015/135] Added test based on data published by Lafferty and Sriver. Added fractional_uncertainty function --- tests/test_partitioning.py | 71 +++++++++++++++++++++++++++++++- xclim/ensembles/__init__.py | 2 +- xclim/ensembles/_partitioning.py | 35 ++++++++++++---- 3 files changed, 96 insertions(+), 12 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 0308ba697..30d62e853 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -3,8 +3,9 @@ import numpy as np import xarray as xr -from xclim.ensembles import hawkins_sutton, lafferty_sriver +from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member +from xclim.testing import get_file def test_hawkins_sutton_smoke(open_dataset): @@ -107,4 +108,70 @@ def test_lafferty_sriver_synthetic(random): # Smoke test with polynomial of order 2 fit = da.polyfit(dim="time", deg=2, skipna=True) sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(da.notnull()) - hawkins_sutton(da, sm=sm) + lafferty_sriver(da, sm=sm) + + +def test_lafferty_sriver(): + import pandas as pd + + # Get data from Lafferty & Sriver unit test + # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test + fn = get_file( + "uncertainty_partitioning/seattle_avg_tas.csv", branch="lafferty_sriver" + ) + + df = pd.read_csv(fn, parse_dates=["time"]).rename( + columns={"ssp": "scenario", "ensemble": "downscaling"} + ) + + # Make xarray dataset + ds = xr.Dataset.from_dataframe( + df.set_index(["scenario", "model", "downscaling", "time"]) + ) + g, u = lafferty_sriver(ds.tas) + fu = fractional_uncertainty(u) + + # Assertions based on expected results from + # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb + assert fu.sel(time="2020", uncertainty="downscaling") > fu.sel( + time="2020", uncertainty="model" + ) + assert fu.sel(time="2020", uncertainty="variability") > fu.sel( + time="2020", uncertainty="scenario" + ) + assert ( + fu.sel(time="2090", uncertainty="scenario").data + > fu.sel(time="2020", uncertainty="scenario").data + ) + assert ( + fu.sel(time="2090", uncertainty="downscaling").data + < fu.sel(time="2020", uncertainty="downscaling").data + ) + + def graph(): + """Return graphic like in https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb""" + from matplotlib import pyplot as plt + + udict = { + "Scenario": fu.sel(uncertainty="scenario").to_numpy().flatten(), + "Model": fu.sel(uncertainty="model").to_numpy().flatten(), + "Downscaling": fu.sel(uncertainty="downscaling").to_numpy().flatten(), + "Variability": fu.sel(uncertainty="variability").to_numpy().flatten(), + } + + fig, ax = plt.subplots() + ax.stackplot( + np.arange(2015, 2101), + udict.values(), + labels=udict.keys(), + alpha=1, + colors=["#00CC89", "#6869B3", "#CC883C", "#FFFF99"], + edgecolor="white", + lw=1.5, + ) + ax.set_xlim([2020, 2095]) + ax.set_ylim([0, 100]) + ax.legend(loc="upper left") + plt.show() + + # graph() diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py index 36c46f8e8..9144811cb 100644 --- a/xclim/ensembles/__init__.py +++ b/xclim/ensembles/__init__.py @@ -10,7 +10,7 @@ from __future__ import annotations from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles -from ._partitioning import hawkins_sutton, lafferty_sriver +from ._partitioning import fractional_uncertainty, hawkins_sutton, lafferty_sriver from ._reduce import ( kkz_reduce_ensemble, kmeans_reduce_ensemble, diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index c9eb6862d..4940bb1c3 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -76,7 +76,7 @@ def hawkins_sutton( baseline: (str, str) Start and end year of the reference period. kind: {'+', '*'} - Whether the mean over the reference period should be substracted (+) or divided by (*). + Whether the mean over the reference period should be subtracted (+) or divided by (*). fraction: bool If True, return the fraction of the total variance instead of the variance itself. Use this option if you want to use `figanos.partition()`. @@ -222,9 +222,9 @@ def lafferty_sriver( bb13: bool Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance over scenarios is computed before taking the mean over models and downscaling methods. - fraction: - If True, return the fraction of the total variance instead of the variance itself. - Use this option if ou want to use `figanos.partition()`. + fraction: bool + If True, return the fraction of the total variance instead of the variance itself. + Use this option if ou want to use `figanos.partition()`. Returns ------- @@ -276,6 +276,8 @@ def lafferty_sriver( ## Count the number of parent models that have been downscaled using method $d$ for scenario $s$. ## In the paper, weights are constant, here they may vary across time if there are missing values. mw = sm.count("model") + # In https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/lafferty_sriver.py + # weights are set to zero when there is only one model, but the var for a single element is 0 anyway. model_u = sm.var(dim="model").weighted(mw).mean(dim=["scenario", "downscaling"]) # Downscaling uncertainty: U_d(t) @@ -293,11 +295,6 @@ def lafferty_sriver( ) uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) - if fraction: - uncertainty = uncertainty / uncertainty.sel(uncertainty="total") * 100 - uncertainty.attrs["long_name"] = "Fraction of total variance" - uncertainty.attrs["units"] = "%" - # Add the elements for each uncertainty component uncertainty = uncertainty.assign_coords( elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) @@ -310,6 +307,26 @@ def lafferty_sriver( return g, uncertainty +def fractional_uncertainty(u: xr.DataArray): + """ + Return the fractional uncertainty. + + Parameters + ---------- + u: xr.DataArray + Array with uncertainty components along the `uncertainty` dimension. + + Returns + ------- + xr.DataArray + Fractional, or relative uncertainty with respect to the total uncertainty. + """ + uncertainty = u / u.sel(uncertainty="total") * 100 + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + return uncertainty + + # def _lafferty_sriver_weights(da): # """Return the weights used in Lefferty and Sriver (2023). # From 4eeaadae7ecc6b7a05840a574ce98ae2d94293f8 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 12 Dec 2023 17:13:35 -0500 Subject: [PATCH 016/135] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Éric Dupuis <71575674+coxipi@users.noreply.github.com> --- xclim/core/calendar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 838813e64..86eead275 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1640,7 +1640,7 @@ def stack_periods( Default is `window` (no overlap between periods). min_length : int, optional Windows shorter than this are not included in the output. - Given as a multiple of ``freq``. Defaults is ``window`` (every window must be complete). + Given as a multiple of ``freq``. Default is ``window`` (every window must be complete). Similar to the ``min_periods`` argument of ``da.rolling``. If ``freq`` is annual or quarterly and ``min_length == ``window``, the first period is considered complete if the first timestep is in the first month of the period. @@ -1812,7 +1812,7 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): unequal_lengths = bool(starts.attrs["unequal_lengths"]) except (AttributeError, KeyError) as err: raise ValueError( - f"`unstack_periods` can't find the window, stride and freq attributes on the {dim} coordiantes." + f"`unstack_periods` can't find the window, stride and freq attributes on the {dim} coordinates." ) from err if unequal_lengths: From 92ebcd9f68744680f87cb56b57aceaa454b8a82e Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 09:31:29 -0500 Subject: [PATCH 017/135] keep element coord in fractional_uncertainty --- xclim/ensembles/_partitioning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 4940bb1c3..ac968bf33 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -322,6 +322,7 @@ def fractional_uncertainty(u: xr.DataArray): Fractional, or relative uncertainty with respect to the total uncertainty. """ uncertainty = u / u.sel(uncertainty="total") * 100 + uncertainty["elements"] = uncertainty["elements"] # keep element coords uncertainty.attrs["long_name"] = "Fraction of total variance" uncertainty.attrs["units"] = "%" return uncertainty From e312c8d63322832e3dfa2cef74469fd71a72054e Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 09:31:56 -0500 Subject: [PATCH 018/135] typo --- xclim/ensembles/_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index ac968bf33..fb38e4b9d 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -322,7 +322,7 @@ def fractional_uncertainty(u: xr.DataArray): Fractional, or relative uncertainty with respect to the total uncertainty. """ uncertainty = u / u.sel(uncertainty="total") * 100 - uncertainty["elements"] = uncertainty["elements"] # keep element coords + uncertainty["elements"] = u["elements"] # keep element coords uncertainty.attrs["long_name"] = "Fraction of total variance" uncertainty.attrs["units"] = "%" return uncertainty From d4b74faee3df07e00544d7a15726f909f540d1bd Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 13 Dec 2023 09:52:07 -0500 Subject: [PATCH 019/135] Updated CHANGES with lafferty_sriver. --- CHANGES.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index f69c45d54..8b62586f9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,16 @@ Changelog ========= + +v0.48 (unreleased) +------------------ +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). + + v0.47.0 (2023-12-01) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). From 177eef5ce340abf8888fc928ec5b712ff2f99972 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 13 Dec 2023 09:56:37 -0500 Subject: [PATCH 020/135] Remove fraction argument. Add note in docstring on using fractional_uncertainty --- xclim/ensembles/_partitioning.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index fb38e4b9d..fa7938789 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -59,7 +59,6 @@ def hawkins_sutton( weights: xr.DataArray | None = None, baseline: tuple[str, str] = ("1971", "2000"), kind: str = "+", - fraction: bool = False, ): """Return the mean and partitioned variance of an ensemble based on method from Hawkins & Sutton (2009). @@ -77,9 +76,6 @@ def hawkins_sutton( Start and end year of the reference period. kind: {'+', '*'} Whether the mean over the reference period should be subtracted (+) or divided by (*). - fraction: bool - If True, return the fraction of the total variance instead of the variance itself. - Use this option if you want to use `figanos.partition()`. Returns ------- @@ -96,6 +92,9 @@ def hawkins_sutton( - annual time series starting in 1950 and ending in 2100; - the same models are available for all scenarios. + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + References ---------- :cite:cts:`hawkins_2009,hawkins_2011` @@ -160,11 +159,6 @@ def hawkins_sutton( u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) - if fraction: - uncertainty = uncertainty / uncertainty.sel(uncertainty="total") * 100 - uncertainty.attrs["long_name"] = "Fraction of total variance" - uncertainty.attrs["units"] = "%" - # Add the elements for each uncertainty component uncertainty = uncertainty.assign_coords( elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) @@ -222,9 +216,6 @@ def lafferty_sriver( bb13: bool Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance over scenarios is computed before taking the mean over models and downscaling methods. - fraction: bool - If True, return the fraction of the total variance instead of the variance itself. - Use this option if ou want to use `figanos.partition()`. Returns ------- @@ -237,6 +228,9 @@ def lafferty_sriver( To prepare input data, make sure `da` has dimensions `time`, `scenario`, `downscaling` and `model`, e.g. `da.rename({"experiment": "scenario"})`. + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + References ---------- :cite:cts:`Lafferty2023` From ed310585733bd661a5d8303732875f0fca56305b Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 11:47:15 -0500 Subject: [PATCH 021/135] replace elements coords by attrs --- xclim/ensembles/_partitioning.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index fa7938789..5daeb034c 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -159,10 +159,9 @@ def hawkins_sutton( u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) - # Add the elements for each uncertainty component - uncertainty = uncertainty.assign_coords( - elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) - ) + # Keep a trace of the elements for each uncertainty component + for d in ["model", "scenario"]: + uncertainty.attrs[d] = da[d].values # Mean projection: G(t) g = sm.weighted(weights).mean(dim="model").mean(dim="scenario") @@ -199,9 +198,7 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): def lafferty_sriver( da: xr.DataArray, sm: xr.DataArray = None, - weights: xr.DataArray = None, bb13: bool = False, - fraction: bool = False, ): """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). @@ -289,10 +286,9 @@ def lafferty_sriver( ) uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) - # Add the elements for each uncertainty component - uncertainty = uncertainty.assign_coords( - elements=("uncertainty", [da[v].values if v in da.dims else None for v in u]) - ) + # Keep a trace of the elements for each uncertainty component + for d in ["model", "scenario", "downscaling"]: + uncertainty.attrs[d] = da[d].values # Mean projection: # This is not part of the original algorithm, but we want all partition algos to have similar outputs. @@ -316,21 +312,7 @@ def fractional_uncertainty(u: xr.DataArray): Fractional, or relative uncertainty with respect to the total uncertainty. """ uncertainty = u / u.sel(uncertainty="total") * 100 - uncertainty["elements"] = u["elements"] # keep element coords + uncertainty.attrs.update(u.attrs) uncertainty.attrs["long_name"] = "Fraction of total variance" uncertainty.attrs["units"] = "%" return uncertainty - - -# def _lafferty_sriver_weights(da): -# """Return the weights used in Lefferty and Sriver (2023). -# -# The weights $w_{s,d}$ are given by the number of parent models that have been downscaled using method $d$ for -# scenario $s$. -# """ -# # Count the number of series that have 80% of their data (not in the paper) -# # We don't want to count series with only a few years of data -# valid = (da.count("time") / len(da.time)) > 0.8 -# s = valid.where(valid) -# -# # Count the number of parent models that have been downscaled using method $d$ for scenario $s$. From c10b7a8a9f2efd6dfaf36de52cf2179854bf7cd4 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 12:06:16 -0500 Subject: [PATCH 022/135] remove test of baseline zero --- tests/test_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 30d62e853..be94cfe84 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -42,7 +42,7 @@ def test_hawkins_sutton_synthetic(random): vm = v.mean(dim="time") # Check that the mean relative to the baseline is zero - np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) + # np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) # Check that the scenario uncertainty is zero np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) From 0b80081c0cc2d966a453a5ca55f35d01f82b4281 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 12:07:39 -0500 Subject: [PATCH 023/135] removed the wrong one oups --- tests/test_partitioning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index be94cfe84..f634e8826 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -42,7 +42,7 @@ def test_hawkins_sutton_synthetic(random): vm = v.mean(dim="time") # Check that the mean relative to the baseline is zero - # np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) + np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) # Check that the scenario uncertainty is zero np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) @@ -97,7 +97,7 @@ def test_lafferty_sriver_synthetic(random): vm = v.mean(dim="time") # Check that the mean relative to the baseline is zero - np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) + # np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) # Check that the scenario uncertainty is zero np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) From adc89ce6fba9415e52708819c71b959cf2f0459d Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 13 Dec 2023 13:29:06 -0500 Subject: [PATCH 024/135] remove scenario test --- tests/test_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index f634e8826..112dbe9dc 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -100,7 +100,7 @@ def test_lafferty_sriver_synthetic(random): # np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) # Check that the scenario uncertainty is zero - np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) + # np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) # Check that model uncertainty > variability assert vm.sel(uncertainty="model") > vm.sel(uncertainty="variability") From 04f6aafe027ed0d49aa448a74108e0e1a350f8dd Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 14 Dec 2023 09:38:52 -0500 Subject: [PATCH 025/135] add test for mean uncertainty --- tests/test_partitioning.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 112dbe9dc..4bd3f0a4a 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -96,11 +96,8 @@ def test_lafferty_sriver_synthetic(random): # Mean uncertainty over time vm = v.mean(dim="time") - # Check that the mean relative to the baseline is zero - # np.testing.assert_array_almost_equal(m.mean(dim="time"), 0, decimal=1) - - # Check that the scenario uncertainty is zero - # np.testing.assert_array_almost_equal(vm.sel(uncertainty="scenario"), 0, decimal=1) + # Check that the mean uncertainty + np.testing.assert_array_almost_equal(m.mean(dim="time"), 25, decimal=1) # Check that model uncertainty > variability assert vm.sel(uncertainty="model") > vm.sel(uncertainty="variability") From d6cccd326c6b9ef9501798a69a252f962dec4d3a Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 14 Dec 2023 10:35:32 -0500 Subject: [PATCH 026/135] update testdata --- .github/workflows/main.yml | 2 +- xclim/testing/helpers.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 119b7a6a2..b19408d70 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ on: - submitted env: - XCLIM_TESTDATA_BRANCH: v2023.9.12 + XCLIM_TESTDATA_BRANCH: v2023.12.14 concurrency: # For a given workflow, if we push to the same branch, cancel all previous builds on that branch except on master. diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 85d1f953a..b19ad498d 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -140,6 +140,7 @@ def populate_testing_data( "sdba/ahccd_1950-2013.nc", "sdba/nrcan_1950-2013.nc", "uncertainty_partitioning/cmip5_pr_global_mon.nc", + "uncertainty_partitioning/seattle_avg_tas.csv", ] data = dict() From 02c5ad8ea249999d3eef573479fc0195184e24e9 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 14 Dec 2023 10:38:02 -0500 Subject: [PATCH 027/135] remove branch to tesdata --- tests/test_partitioning.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 4bd3f0a4a..b8215695d 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -113,9 +113,7 @@ def test_lafferty_sriver(): # Get data from Lafferty & Sriver unit test # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test - fn = get_file( - "uncertainty_partitioning/seattle_avg_tas.csv", branch="lafferty_sriver" - ) + fn = get_file("uncertainty_partitioning/seattle_avg_tas.csv") df = pd.read_csv(fn, parse_dates=["time"]).rename( columns={"ssp": "scenario", "ensemble": "downscaling"} From da3527f4776b01680e2bf4bd5a43d8bff7a9dafa Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 10:40:00 -0500 Subject: [PATCH 028/135] update xclim-testdata. Fix bug in get_file looking for master branch while its not called main --- .github/workflows/main.yml | 2 +- tests/conftest.py | 19 +++++++++++++++++++ tests/test_partitioning.py | 20 ++------------------ xclim/testing/helpers.py | 1 + xclim/testing/utils.py | 2 +- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 119b7a6a2..bf7c0c87b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ on: - submitted env: - XCLIM_TESTDATA_BRANCH: v2023.9.12 + XCLIM_TESTDATA_BRANCH: v2023.12.24 concurrency: # For a given workflow, if we push to the same branch, cancel all previous builds on that branch except on master. diff --git a/tests/conftest.py b/tests/conftest.py index ffb64c11e..bebcf288b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from xclim.testing import helpers from xclim.testing.helpers import test_timeseries from xclim.testing.utils import _default_cache_dir # noqa +from xclim.testing.utils import get_file from xclim.testing.utils import open_dataset as _open_dataset if not __xclim_version__.endswith("-beta") and helpers.TESTDATA_BRANCH == "main": @@ -429,6 +430,24 @@ def ensemble_dataset_objects() -> dict: return edo +@pytest.fixture(scope="session") +def lafferty_sriver_ds() -> xr.Dataset: + import pandas as pd + + # Get data from Lafferty & Sriver unit test + # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test + fn = get_file("uncertainty_partitioning/seattle_avg_tas.csv") + + df = pd.read_csv(fn, parse_dates=["time"]).rename( + columns={"ssp": "scenario", "ensemble": "downscaling"} + ) + + # Make xarray dataset + return xr.Dataset.from_dataframe( + df.set_index(["scenario", "model", "downscaling", "time"]) + ) + + @pytest.fixture(scope="session", autouse=True) def gather_session_data(threadsafe_data_dir, worker_id, xdoctest_namespace): """Gather testing data on pytest run. diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 30d62e853..4f40d5d5a 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -111,24 +111,8 @@ def test_lafferty_sriver_synthetic(random): lafferty_sriver(da, sm=sm) -def test_lafferty_sriver(): - import pandas as pd - - # Get data from Lafferty & Sriver unit test - # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test - fn = get_file( - "uncertainty_partitioning/seattle_avg_tas.csv", branch="lafferty_sriver" - ) - - df = pd.read_csv(fn, parse_dates=["time"]).rename( - columns={"ssp": "scenario", "ensemble": "downscaling"} - ) - - # Make xarray dataset - ds = xr.Dataset.from_dataframe( - df.set_index(["scenario", "model", "downscaling", "time"]) - ) - g, u = lafferty_sriver(ds.tas) +def test_lafferty_sriver(lafferty_sriver_ds): + g, u = lafferty_sriver(lafferty_sriver_ds.tas) fu = fractional_uncertainty(u) # Assertions based on expected results from diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 85d1f953a..b19ad498d 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -140,6 +140,7 @@ def populate_testing_data( "sdba/ahccd_1950-2013.nc", "sdba/nrcan_1950-2013.nc", "uncertainty_partitioning/cmip5_pr_global_mon.nc", + "uncertainty_partitioning/seattle_avg_tas.csv", ] data = dict() diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 1084d197e..c15a92054 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -80,7 +80,7 @@ def file_md5_checksum(f_name): def get_file( name: str | os.PathLike | Sequence[str | os.PathLike], github_url: str = "https://github.com/Ouranosinc/xclim-testdata", - branch: str = "master", + branch: str = "main", cache_dir: Path = _default_cache_dir, ) -> Path | list[Path]: """Return a file from an online GitHub-like repository. From daadf0722e823eb241291a3297f04bcbff12a4c7 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 10:47:19 -0500 Subject: [PATCH 029/135] typo in tag name --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf7c0c87b..b19408d70 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ on: - submitted env: - XCLIM_TESTDATA_BRANCH: v2023.12.24 + XCLIM_TESTDATA_BRANCH: v2023.12.14 concurrency: # For a given workflow, if we push to the same branch, cancel all previous builds on that branch except on master. From 888e902f162355162cef08d9e9884b6f50d38c18 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 10:51:59 -0500 Subject: [PATCH 030/135] black --- tests/test_partitioning.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 02e02d1e2..070a49d2e 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -5,7 +5,6 @@ from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member -from xclim.testing import get_file def test_hawkins_sutton_smoke(open_dataset): From 3eb8dd18f88f5e46a3a0d698b26376a4186c3a45 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:54:30 -0500 Subject: [PATCH 031/135] ensure caching is safe --- tests/conftest.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bebcf288b..b3c7339ea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -432,11 +432,17 @@ def ensemble_dataset_objects() -> dict: @pytest.fixture(scope="session") def lafferty_sriver_ds() -> xr.Dataset: - import pandas as pd + """Get data from Lafferty & Sriver unit test. - # Get data from Lafferty & Sriver unit test - # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test - fn = get_file("uncertainty_partitioning/seattle_avg_tas.csv") + Notes + ----- + https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test + """ + fn = get_file( + "uncertainty_partitioning/seattle_avg_tas.csv", + cache_dir=_default_cache_dir, + branch=helpers.TESTDATA_BRANCH, + ) df = pd.read_csv(fn, parse_dates=["time"]).rename( columns={"ssp": "scenario", "ensemble": "downscaling"} From 2ea609b494af56c3cc8b80b97022b3bc5751bfdd Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 13:45:51 -0500 Subject: [PATCH 032/135] validate YAML file using schema before constructing module. --- CHANGES.rst | 3 +++ tests/test_modules.py | 6 ------ xclim/core/indicator.py | 11 +++++++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f69c45d54..937e64934 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,9 @@ Changelog ========= +* Validate YAML indicators description before trying to build module. (:issue:`1523`) + + v0.47.0 (2023-12-01) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). diff --git a/tests/test_modules.py b/tests/test_modules.py index d25f93c17..b6b3abb47 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -61,12 +61,6 @@ def test_custom_indices(open_dataset): # Use the example data used in the Extending Xclim notebook for testing. example_path = Path(__file__).parent.parent / "docs" / "notebooks" / "example" - schema = yamale.make_schema( - Path(__file__).parent.parent / "xclim" / "data" / "schema.yml" - ) - data = yamale.make_data(example_path / "example.yml") - yamale.validate(schema, data) - pr = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").pr # This tests load_module with a python file that is _not_ on the PATH diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 36cb63771..df1d68b3c 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -85,8 +85,8 @@ In the following, the section under `` is referred to as `data`. When creating indicators from a dictionary, with :py:meth:`Indicator.from_dict`, the input dict must follow the same structure of `data`. -The resulting yaml file can be validated using the provided schema (in xclim/data/schema.yml) -and the YAMALE tool :cite:p:`lopker_yamale_2022`. See the "Extending xclim" notebook for more info. +When a module is built from a yaml file, the yaml is first validated against the schema (see xclim/data/schema.yml) +using the YAMALE library (:cite:p:`lopker_yamale_2022`). See the "Extending xclim" notebook for more info. Inputs ~~~~~~ @@ -115,6 +115,7 @@ import numpy as np import xarray +import yamale from xarray import DataArray, Dataset from yaml import safe_load @@ -1716,6 +1717,12 @@ def build_indicator_module_from_yaml( # noqa: C901 with ymlpath.open(encoding=encoding) as f: yml = safe_load(f) + # Read schema + schema = yamale.make_schema(Path(__file__).parent.parent / "data" / "schema.yml") + + # Validate - a YamaleError will be raised if the module does not comply with the schema. + yamale.validate(schema, yamale.make_data(ymlpath)) + # Load values from top-level in yml. # Priority of arguments differ. module_name = name or yml.get("module", filepath.stem) From 20ee8b49b1c204303f0d0acefda5e5da703da2cd Mon Sep 17 00:00:00 2001 From: "bumpversion[bot]" Date: Thu, 14 Dec 2023 19:33:17 +0000 Subject: [PATCH 033/135] =?UTF-8?q?Bump=20version:=200.47.0=20=E2=86=92=20?= =?UTF-8?q?0.47.1-beta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.cfg | 2 +- xclim/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 72c7825f7..56573dc96 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.47.0 +current_version = 0.47.1-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/xclim/__init__.py b/xclim/__init__.py index c70429906..2def75ca7 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -15,7 +15,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.47.0" +__version__ = "0.47.1-beta" _module_data = _files("xclim.data") From 9b1e2dd7a08701a1581fd00ba18c5d70b5a6a004 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 14:45:27 -0500 Subject: [PATCH 034/135] Add note in the docs about schema validation --- docs/notebooks/extendxclim.ipynb | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/docs/notebooks/extendxclim.ipynb b/docs/notebooks/extendxclim.ipynb index d4e9a4c04..35e8a2597 100644 --- a/docs/notebooks/extendxclim.ipynb +++ b/docs/notebooks/extendxclim.ipynb @@ -397,26 +397,15 @@ "\n", "\n", "#### Validation of the YAML file\n", - "Using [yamale](https://github.com/23andMe/Yamale), it is possible to check if the YAML file is valid. `xclim` ships with a schema (in `xclim/data/schema.yml`) file. The file can be located with:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from importlib.resources import path\n", "\n", - "with path(\"xclim.data\", \"schema.yml\") as f:\n", - " print(f)" + "Using [yamale](https://github.com/23andMe/Yamale), it is possible to check if the YAML file is valid. `xclim` ships with a schema (in `xclim/data/schema.yml`) file. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "And the validation can be executed either in a python session:" + "The validation can be executed in a python session:" ] }, { @@ -425,6 +414,7 @@ "metadata": {}, "outputs": [], "source": [ + "from importlib.resources import path\n", "import yamale\n", "\n", "with path(\"xclim.data\", \"schema.yml\") as f:\n", @@ -437,13 +427,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "No errors means it passed. The validation can also be run through the command line with:\n", + "Or the validation can alternatively be run from the command line with:\n", "\n", "```bash\n", "yamale -s path/to/schema.yml path/to/module.yml\n", "```\n", "\n", - "#### Loading the module and computating of the indices." + "Note that xclim builds indicators from a yaml file, as shown in the next example, it validates it first. \n", + "\n", + "#### Loading the module and computing indicators." ] }, { From 8b1885aad0c9907fc99adcde89ee22e5122fc6d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:51:30 +0000 Subject: [PATCH 035/135] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/notebooks/extendxclim.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/notebooks/extendxclim.ipynb b/docs/notebooks/extendxclim.ipynb index 35e8a2597..ac5af394d 100644 --- a/docs/notebooks/extendxclim.ipynb +++ b/docs/notebooks/extendxclim.ipynb @@ -415,6 +415,7 @@ "outputs": [], "source": [ "from importlib.resources import path\n", + "\n", "import yamale\n", "\n", "with path(\"xclim.data\", \"schema.yml\") as f:\n", From e431a5fc0976968aff5eff07ecce9e65c4b4e899 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 14:55:03 -0500 Subject: [PATCH 036/135] Added indicator in example.yml using indexer argument. Modified schema.yml to support it. --- docs/notebooks/example/example.yml | 6 ++++++ xclim/data/schema.yml | 12 ++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/example/example.yml b/docs/notebooks/example/example.yml index 8ed2f8b2e..ed3cacbe3 100644 --- a/docs/notebooks/example/example.yml +++ b/docs/notebooks/example/example.yml @@ -50,6 +50,12 @@ indicators: freq: default: YS references: ETCCDI + spring_fd: + compute: frost_days + parameters: + freq: YS-DEC + indexer: + months: [ 12, 1, 2 ] R95p: compute: extreme_precip_accumulation_and_days cf_attrs: diff --git a/xclim/data/schema.yml b/xclim/data/schema.yml index cc3a8f74d..1d7c07543 100644 --- a/xclim/data/schema.yml +++ b/xclim/data/schema.yml @@ -19,7 +19,7 @@ indicator: missing_options: map(key=str(), required=False) notes: str(required=False) cf_attrs: any(list(include('cf_attrs')), include('cf_attrs'), required=False) - parameters: map(str(), num(), bool(), null(), include('parameter'), key=str(), required=False) + parameters: map(str(), num(), bool(), null(), include('parameter'), include('indexer'), key=str(), required=False) realm: str(required=False) references: str(required=False) title: str(required=False) @@ -29,10 +29,18 @@ cf_attrs: map(str(), key=str(), required=False) parameter: description: str(required=False) - default: any(str(), num(), bool(), null(), required=False) + default: any(str(), num(), bool(), null(), include('indexer'), required=False) choices: list(str(), required=False) units: str(required=False) +indexer: + drop: bool(required=False) + months: any(int(), list(int()), required=False) + season: any(str(), list(str()), required=False) + doy_bounds: list(int(), required=False, maxItems=2) + date_bounds: list(str(), required=False, maxItems=2) + include_bounds: any(bool(), list(bool(), bool()), required=False, maxItems=2) + variable: canonical_units: str(required=True) cell_methods: str(required=False) From bcbf4a32ba243a45ac543dcd2301017fb2ca5cbd Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 14:56:04 -0500 Subject: [PATCH 037/135] Update CHANGES --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8b62586f9..b0f64fe6a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,7 +10,7 @@ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal B New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). - +* Support ``indexer`` keyword in YAML indicator description. (:issue:`1522`, :pull:`1561`). v0.47.0 (2023-12-01) -------------------- From 472f26171e3799bf32984142fd0e2129bdac3e5c Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 14 Dec 2023 15:45:59 -0500 Subject: [PATCH 038/135] Fail if days wont align --- tests/test_calendar.py | 15 +++++++++----- xclim/core/calendar.py | 47 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 46f116828..d73b31fe3 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -713,9 +713,11 @@ def test_convert_doy(): [(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)], ) def test_stack_periods(tas_series, cftime, w, s, m, f, ss): - da = tas_series(np.arange(365 * 50), cftime=cftime, start="2000-01-01") + da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime) - da_stck = stack_periods(da, window=w, stride=s, min_length=m, freq=f) + da_stck = stack_periods( + da, window=w, stride=s, min_length=m, freq=f, align_days=False + ) assert "period_length" in da_stck.coords @@ -725,9 +727,12 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss): def test_stack_periods_special(tas_series): - da = tas_series( - np.arange(365 * 48 + 12), cftime=True, start="2004-01-01" - ).convert_calendar("noleap") + da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01") + + with pytest.raises(ValueError, match="unaligned day-of-year"): + stack_periods(da) + + da = da.convert_calendar("noleap") da_stck = stack_periods(da, dim="horizon") np.testing.assert_array_equal(da_stck.horizon_length, 10950) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 838813e64..87c76e52c 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -46,8 +46,10 @@ "percentile_doy", "resample_doy", "select_time", + "stack_periods", "time_bnds", "uniform_calendars", + "unstack_periods", "within_bnds_doy", "yearly_interpolated_doy", "yearly_random_doy", @@ -1616,6 +1618,7 @@ def stack_periods( freq: str = "YS", dim: str = "period", start: str = "1970-01-01", + align_days: bool = True, pad_value=dtypes.NA, ): """Construct a multi-period array @@ -1654,6 +1657,11 @@ def stack_periods( start : str The `start` argument passed to :py:func:`xarray.date_range` to generate the new placeholder time coordinate. + align_days : bool + When True (default), an error is raised if the output would have unaligned days across periods. + If `freq = 'YS'`, day-of-year alignment is checked and if `freq` is "MS" or "QS", we check day-in-month. + Only uniform-calendar will pass the test for `freq='YS'`. For other frequencies, only the `360_day` calendar will work. + This check is ignored if the sampling rate of the data is coarser than "D". pad_value: Any When some periods are shorter than others, this value is used to pad them at the end. Passed directly as argument ``fill_value`` to :py:func:`xarray.concat`, the default is the same as on that function. @@ -1676,11 +1684,33 @@ def stack_periods( stride = stride or window min_length = min_length or window + if stride > window: + raise ValueError( + f"Stride must be less than or equal to window. Got {stride} > {window}." + ) srcfreq = xr.infer_freq(da.time) cal = da.time.dt.calendar use_cftime = da.time.dtype == "O" + if ( + compare_offsets(srcfreq, "<=", "D") + and align_days + and ( + (freq.startswith(("Y", "A")) and cal not in uniform_calendars) + or (freq.startswith(("Q", "M")) and window > 1 and cal != "360_day") + ) + ): + if freq.startswith(("Y", "A")): + u = "year" + else: + u = "month" + raise ValueError( + f"Stacking {window}{freq} periods will result in unaligned day-of-{u}. " + f"Consider converting the calendar of your data to one with uniform {u} lengths, " + "or pass `align_days=False` to disable this check." + ) + # Convert integer inputs to freq strings mult, *args = parse_offset(freq) win_frq = construct_offset(mult * window, *args) @@ -1801,6 +1831,23 @@ def unstack_periods(da: xr.DataArray | xr.Dataset, dim: str = "period"): As constructed by :py:func:`stack_periods`, attributes of the period coordinates must have been perserved. dim : str The period dimension name. + + Notes + ----- + The following table shows which strides are included (``o``) in the unstacked output. + in this example, ``stride`` was a fifth of ``window`` and ``min_length`` was 4 times ``stride``. + The row index ``i`` the period index in the stacked datast, columns are the stride-long section of the original timeseries. + + .. table:: Unstacking example with ``stride < window``. + + === === === === === === === === + i 0 1 2 3 4 5 6 + === === === === === === === === + 3 x x o o + 2 x x o x x + 1 x x o x x + 0 o o o x x + === === === === === === === === """ from xclim.core.units import infer_sampling_units From ad7096550b8a8cdede3c1191cdde4169d865ef0f Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 14 Dec 2023 17:02:51 -0500 Subject: [PATCH 039/135] Read YML file with encoding for validation --- xclim/core/indicator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index df1d68b3c..6590ab9a4 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1721,7 +1721,9 @@ def build_indicator_module_from_yaml( # noqa: C901 schema = yamale.make_schema(Path(__file__).parent.parent / "data" / "schema.yml") # Validate - a YamaleError will be raised if the module does not comply with the schema. - yamale.validate(schema, yamale.make_data(ymlpath)) + yamale.validate( + schema, yamale.make_data(content=ymlpath.read_text(encoding=encoding)) + ) # Load values from top-level in yml. # Priority of arguments differ. From 85a2f7435ecaa71ebcd94a5f02c03f0ed50baabc Mon Sep 17 00:00:00 2001 From: "bumpversion[bot]" Date: Fri, 15 Dec 2023 03:26:01 +0000 Subject: [PATCH 040/135] =?UTF-8?q?Bump=20version:=200.47.1-beta=20?= =?UTF-8?q?=E2=86=92=200.47.2-beta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.cfg | 2 +- xclim/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 56573dc96..d6128c362 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.47.1-beta +current_version = 0.47.2-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/xclim/__init__.py b/xclim/__init__.py index 2def75ca7..e983c9c25 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -15,7 +15,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.47.1-beta" +__version__ = "0.47.2-beta" _module_data = _files("xclim.data") From 846d3b50faa6714cb7fe1b9a39ef4bf0d64ce64c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Fri, 15 Dec 2023 09:50:27 -0500 Subject: [PATCH 041/135] CHANGES text under 0.48 instead of 0.47 --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 1344a0f4a..4efdab8f0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,9 @@ New features and enhancements * Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). * Validate YAML indicators description before trying to build module. (:issue:`1523`, :pull:`1560`). +Bug fixes +^^^^^^^^^ +* Fix wrong `window` attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1552` :pull:`1554`). v0.47.0 (2023-12-01) From 14fc9b46f489c1db7d4bc04c2347651de28bb0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Fri, 15 Dec 2023 09:52:07 -0500 Subject: [PATCH 042/135] remove text in 0.47 --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4efdab8f0..868a73d92 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -35,7 +35,7 @@ Bug fixes * Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform``. (:issue:`1515`, :pull:`1516`). * In the documentation, fixed the tooltips in the indicator search results. (:issue:`1524`, :pull:`1527`). * If chunked inputs are passed to indicators ``mean_radiant_temperature`` and ``potential_evapotranspiration``, sub-calculations of the solar angle will also use the same chunks, instead of a single one of the same size as the data. (:issue:`1536`, :pull:`1542`). -* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :issue:`1552` , :pull:`1538`, :pull:`1554`). +* Fix wrong attributes in ``xclim.indices.standardized_precipitation_index``, ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1537`, :pull:`1538`). Internal changes ^^^^^^^^^^^^^^^^ From d50331d63c57bb1ce5f476f53187a3e9048f1a57 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 15 Dec 2023 13:47:09 -0500 Subject: [PATCH 043/135] Fixed missing=0 in convert cal --- CHANGES.rst | 3 +++ tests/test_calendar.py | 3 ++- xclim/core/calendar.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0b2f4705c..46fd3b9f2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,9 @@ New features and enhancements * Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). * Validate YAML indicators description before trying to build module. (:issue:`1523`, :pull:`1560`). +Bug fixes +^^^^^^^^^ +* Fixed passing ``missing=0`` to ``xclim.core.calendar.convert_calendar`` (:issue:`1562`, :pull:`1563`). v0.47.0 (2023-12-01) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 625e603e9..f9631cd5c 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -413,10 +413,11 @@ def test_convert_calendar_missing(source, target, freq): da_src = xr.DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) - out = convert_calendar(da_src, target, missing=np.nan, align_on="date") + out = convert_calendar(da_src, target, missing=0, align_on="date") assert xr.infer_freq(out.time) == freq if source == "360_day": assert out.time[-1].dt.day == 31 + assert out[-1] == 0 def test_convert_calendar_and_doy(): diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index dfbb3cb61..74c2b2b28 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -503,7 +503,9 @@ def convert_calendar( target = date_range_like(source[dim], cal_tgt) if isinstance(target, xr.DataArray): - out = out.reindex({dim: target}, fill_value=missing or np.nan) + out = out.reindex( + {dim: target}, fill_value=missing if missing is not None else np.nan + ) # Copy attrs but change remove `calendar` is still present. out[dim].attrs.update(source[dim].attrs) From 26057a03d80f56dd9f9db75df5c206656ef9af54 Mon Sep 17 00:00:00 2001 From: "bumpversion[bot]" Date: Fri, 15 Dec 2023 20:33:32 +0000 Subject: [PATCH 044/135] =?UTF-8?q?Bump=20version:=200.47.2-beta=20?= =?UTF-8?q?=E2=86=92=200.47.3-beta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.cfg | 2 +- xclim/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index d6128c362..dcf2ecc33 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.47.2-beta +current_version = 0.47.3-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/xclim/__init__.py b/xclim/__init__.py index e983c9c25..b0aa0c200 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -15,7 +15,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.47.2-beta" +__version__ = "0.47.3-beta" _module_data = _files("xclim.data") From e7d7d03c46a43b9b8bf171eefcb4bd01e1052f5a Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 15 Dec 2023 15:57:28 -0500 Subject: [PATCH 045/135] Make it work with xr <2023.5 --- environment.yml | 2 +- xclim/core/calendar.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 3be9a6afe..586642f6d 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python >=3.8 + - python ==3.8 - astroid - boltons >=20.1 - bottleneck >=1.3.1 diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 58346466b..b24fcdc84 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1737,13 +1737,13 @@ def stack_periods( for begin, strd_slc in da.resample(time=strd_frq).groups.items(): win_resamp = time2.isel(time=slice(strd_slc.start, None)).resample(time=win_frq) # Get slice for first group - win_slc = win_resamp.groupers[0].group_indices[0] + win_slc = win_resamp._group_indices[0] if min_length < window: # If we ask for a min_length period instead is it complete ? min_resamp = time2.isel(time=slice(strd_slc.start, None)).resample( time=minl_frq ) - min_slc = min_resamp.groupers[0].group_indices[0] + min_slc = min_resamp._group_indices[0] open_ended = min_slc.stop is None else: # The end of the group slice is None if no outside-group value was found after the last element @@ -1921,7 +1921,7 @@ def _reconstruct_time(start): periods = [] for i, (start, length) in enumerate(zip(starts.values, lengths.values)): real_time = _reconstruct_time(start) - slices = real_time.resample(time=strd_frq).groupers[0].group_indices + slices = real_time.resample(time=strd_frq)._group_indices if i == 0: slc = slice(slices[0].start, min(slices[mid].stop, length)) elif i == da.period.size - 1: From b4c3028f74c9e152d418360ca8641a2bd6fbb6fe Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 15 Dec 2023 16:54:59 -0500 Subject: [PATCH 046/135] forgot test pin in env! --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 586642f6d..3be9a6afe 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python ==3.8 + - python >=3.8 - astroid - boltons >=20.1 - bottleneck >=1.3.1 From e678308f2c5cde75a3c63cddcab7835dab1e422e Mon Sep 17 00:00:00 2001 From: "bumpversion[bot]" Date: Fri, 15 Dec 2023 22:41:06 +0000 Subject: [PATCH 047/135] =?UTF-8?q?Bump=20version:=200.47.3-beta=20?= =?UTF-8?q?=E2=86=92=200.47.4-beta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.cfg | 2 +- xclim/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index dcf2ecc33..bf840465d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.47.3-beta +current_version = 0.47.4-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/xclim/__init__.py b/xclim/__init__.py index b0aa0c200..d56960211 100644 --- a/xclim/__init__.py +++ b/xclim/__init__.py @@ -15,7 +15,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.47.3-beta" +__version__ = "0.47.4-beta" _module_data = _files("xclim.data") From 7632597fc617afc603b182461b3bf7dc11229c9a Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:27:12 -0500 Subject: [PATCH 048/135] use bump-my-version with semver v2 --- environment.yml | 2 +- pyproject.toml | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 3be9a6afe..19d78a220 100644 --- a/environment.yml +++ b/environment.yml @@ -30,7 +30,7 @@ dependencies: # Testing and development dependencies - black >=22.12 - blackdoc - - bump2version + - bump-my-version - cairosvg - coverage - distributed >=2.0 diff --git a/pyproject.toml b/pyproject.toml index 15329f375..36bbbd64b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ dev = [ # Dev tools and testing "black >=23.3.0", "blackdoc", - "bump2version", + "bump-my-version", "coverage[toml]", "flake8", "flake8-alphabetize", @@ -113,6 +113,32 @@ xclim = "xclim.cli:cli" [tool] +[tool.bumpversion] +current_version = "0.47.4-beta" +commit = true +tag = false +parse = "(?P\\d+)\\.(?P\\d+).(?P\\d+)(\\-(?P[a-z]+)(\\-(?P\\d+))?" +serialize = [ + "{major}.{minor}.{patch}-{release}-{build}", + "{major}.{minor}.{patch}" +] + +[tool.bumpversion.parts.release] +optional_value = "gamma" +values = [ + "alpha", + "beta", + "gamma" +] + +[tool.bumpversion.parts.build] +independent = true + +[[tool.bumpversion.files]] +filename = "xclim/__init__.py" +search = "__version__ = \"{current_version}\"" +replace = "__version__ = \"{new_version}\"" + [tool.black] target-version = [ "py38", From 1c5bb5baf73b59312f26a2e702b43f5d365b84ba Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:32:24 -0500 Subject: [PATCH 049/135] remove setup.cfg, migrate to .flake8 --- .flake8 | 40 +++++++++++++++++++++++++++++++++++++ setup.cfg | 59 ------------------------------------------------------- 2 files changed, 40 insertions(+), 59 deletions(-) create mode 100644 .flake8 delete mode 100644 setup.cfg diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..522625f06 --- /dev/null +++ b/.flake8 @@ -0,0 +1,40 @@ +[flake8] +exclude = + .git, + docs, + build, + .eggs, + tests +ignore = + AZ100, + AZ200, + AZ300, + C, + D, + E, + F, + W503 +per-file-ignores = + xclim/core/locales.py:RST399 +rst-directives = + bibliography, + autolink-skip +rst-roles = + doc, + mod, + py:attr, + py:attribute, + py:class, + py:const, + py:data, + py:func, + py:indicator, + py:meth, + py:mod, + py:obj, + py:ref, + ref, + cite:cts, + cite:p, + cite:t, + cite:ts diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index bf840465d..000000000 --- a/setup.cfg +++ /dev/null @@ -1,59 +0,0 @@ -[bumpversion] -current_version = 0.47.4-beta -commit = True -tag = False -parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? -serialize = - {major}.{minor}.{patch}-{release} - {major}.{minor}.{patch} - -[bumpversion:part:release] -optional_value = gamma -values = - beta - gamma - -[bumpversion:file:xclim/__init__.py] -search = __version__ = "{current_version}" -replace = __version__ = "{new_version}" - -[flake8] -exclude = - .git, - docs, - build, - .eggs, - tests -ignore = - AZ100, - AZ200, - AZ300, - C, - D, - E, - F, - W503 -per-file-ignores = - xclim/core/locales.py:RST399 -rst-directives = - bibliography, - autolink-skip -rst-roles = - doc, - mod, - py:attr, - py:attribute, - py:class, - py:const, - py:data, - py:func, - py:indicator, - py:meth, - py:mod, - py:obj, - py:ref, - ref, - cite:cts, - cite:p, - cite:t, - cite:ts From c5819a4a4eba625887823313d428e626c5089a52 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:01:47 -0500 Subject: [PATCH 050/135] initial new version bumping approach --- pyproject.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36bbbd64b..8ee91af97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,22 +117,22 @@ xclim = "xclim.cli:cli" current_version = "0.47.4-beta" commit = true tag = false -parse = "(?P\\d+)\\.(?P\\d+).(?P\\d+)(\\-(?P[a-z]+)(\\-(?P\\d+))?" +allow_dirty = true +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\-(?P[a-z]+)(\\.(?P\\d+)))?" serialize = [ - "{major}.{minor}.{patch}-{release}-{build}", + "{major}.{minor}.{patch}-{release}.{build}", "{major}.{minor}.{patch}" ] [tool.bumpversion.parts.release] -optional_value = "gamma" +optional_value = "release" values = [ - "alpha", - "beta", - "gamma" + "dev", + "release" ] [tool.bumpversion.parts.build] -independent = true +independent = false [[tool.bumpversion.files]] filename = "xclim/__init__.py" From d38f1613a7feb97d5146de97a774563cca6ea055 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 15:14:45 -0500 Subject: [PATCH 051/135] pre-commit autoupdate, run hooks --- .pre-commit-config.yaml | 15 +++++++-------- pyproject.toml | 36 ++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d7e4c222..ff433e263 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,6 @@ repos: rev: v4.5.0 hooks: - id: trailing-whitespace - exclude: setup.cfg - id: end-of-file-fixer exclude: '.ipynb|.github/publish-mastodon.template.md' - id: check-json @@ -33,15 +32,15 @@ repos: - id: yamllint args: [ '--config-file=.yamllint.yaml' ] - repo: https://github.com/psf/black-pre-commit-mirror - rev: 23.11.0 + rev: 23.12.0 hooks: - id: black - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.1.9 hooks: - id: ruff - repo: https://github.com/pycqa/flake8 @@ -49,12 +48,12 @@ repos: hooks: - id: flake8 additional_dependencies: [ 'flake8-alphabetize', 'flake8-rst-docstrings '] - args: [ '--config=setup.cfg' ] + args: [ '--config=.flake8' ] - repo: https://github.com/nbQA-dev/nbQA rev: 1.7.1 hooks: - id: nbqa-black - additional_dependencies: [ 'black==23.11.0' ] + additional_dependencies: [ 'black==23.12.0' ] - id: nbqa-pyupgrade args: [ '--py38-plus' ] - id: nbqa-isort @@ -69,9 +68,9 @@ repos: hooks: - id: blackdoc exclude: '(xclim/indices/__init__.py|docs/installation.rst)' - additional_dependencies: [ 'black==23.11.0' ] + additional_dependencies: [ 'black==23.12.0' ] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.27.2 + rev: 0.27.3 hooks: - id: check-github-workflows - id: check-readthedocs diff --git a/pyproject.toml b/pyproject.toml index 8ee91af97..f5405fb3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,14 @@ xclim = "xclim.cli:cli" [tool] +[tool.black] +target-version = [ + "py38", + "py39", + "py310", + "py311" +] + [tool.bumpversion] current_version = "0.47.4-beta" commit = true @@ -120,31 +128,23 @@ tag = false allow_dirty = true parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\-(?P[a-z]+)(\\.(?P\\d+)))?" serialize = [ - "{major}.{minor}.{patch}-{release}.{build}", - "{major}.{minor}.{patch}" + "{major}.{minor}.{patch}-{release}.{build}", + "{major}.{minor}.{patch}" ] -[tool.bumpversion.parts.release] -optional_value = "release" -values = [ - "dev", - "release" -] - -[tool.bumpversion.parts.build] -independent = false - [[tool.bumpversion.files]] filename = "xclim/__init__.py" search = "__version__ = \"{current_version}\"" replace = "__version__ = \"{new_version}\"" -[tool.black] -target-version = [ - "py38", - "py39", - "py310", - "py311" +[tool.bumpversion.parts.build] +independent = false + +[tool.bumpversion.parts.release] +optional_value = "release" +values = [ + "dev", + "release" ] [tool.coverage.run] From 64c51072ef284e269f8b30a481e5d9407a7132c3 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 15:34:56 -0500 Subject: [PATCH 052/135] update bumpversion workflow to bump patch only if version is stable --- .github/workflows/bump-version.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index c325fb40f..6f2bf3637 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -43,10 +43,22 @@ jobs: run: | CURRENT_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" echo "current_version=${CURRENT_VERSION}" + echo "CURRENT_VERSION=${CURRENT_VERSION}" >> $GITHUB_ENV + - name: Install bump-my-version + run: | + python -m pip install bump-my-version - name: Bump Patch Version + if: ${{ !env.CURRENT_VERSION =~ \d+\.\d+\.\d+(-dev(\.\d+)?)?$ }} + run: | + echo "Version is stable, bumping 'patch' version" + bump-my-version bump patch + NEW_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" + echo "new_version=${NEW_VERSION}" + - name: Bump Build Version + if: ${{ env.CURRENT_VERSION =~ \d+\.\d+\.\d+(-dev(\.\d+)?)?$ }} run: | - python -m pip install bump2version - echo "running `bump2version patch`" + echo "Development version (ends in 'dev(\.\d+)?'), bumping 'build' version" + bump-my-version bump build NEW_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" echo "new_version=${NEW_VERSION}" - name: Push Changes From 5f570441f6c0da60494ed29c8e9c40cf4c5b2961 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 15:51:50 -0500 Subject: [PATCH 053/135] update CHANGES.rst --- CHANGES.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 68bd98287..5566088b9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,16 +6,29 @@ v0.48 (unreleased) ------------------ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). +Announcements +^^^^^^^^^^^^^ +* `xclim` now adheres to the `Semantic Versioning 2.0.0 `_ specification. (:issue:`1556`). + New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). * Validate YAML indicators description before trying to build module. (:issue:`1523`, :pull:`1560`). * New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release. +Breaking changes +^^^^^^^^^^^^^^^^ +* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the `pyproject.toml` file. (:pull:`1557`). + Bug fixes ^^^^^^^^^ * Fixed passing ``missing=0`` to ``xclim.core.calendar.convert_calendar`` (:issue:`1562`, :pull:`1563`). +Internal changes +^^^^^^^^^^^^^^^^ +* The `flake8` configuration has been migrated from `setup.cfg` to `.flake8`. +* The `bump-version.yml` workflow has been adjusted to bump the `patch` version when the last version is determined to have been a `release` version; otherwise, the `build` version is bumped. + v0.47.0 (2023-12-01) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). From 626d4816dce475c76616c308e2f5a3e0566b2b3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Sat, 23 Dec 2023 07:24:18 -0500 Subject: [PATCH 054/135] Add reference for APP method of gamma distribution --- docs/references.bib | 15 +++++++++++++++ xclim/indices/stats.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/references.bib b/docs/references.bib index 9b11742f2..1c5f49339 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2103,3 +2103,18 @@ @article{Lafferty2023 url = {https://www.nature.com/articles/s41612-023-00486-0}, year = {2023}, } + +@article{thom_1958, + title = {A Note on the Gamma Distribution}, + author = {Thom, H. C. S.}, + year = {1958}, + journal = {Monthly Weather Review}, + volume = {86}, + number = {4}, + pages = {117--122}, + publisher = {{American Meteorological Society}}, + issn = {1520-0493, 0027-0644}, + doi = {10.1175/1520-0493(1958)086<0117:ANOTGD>2.0.CO;2}, + abstract = {Abstract The general properties of the gamma distribution, which has several applications in meteorology, are discussed. A short review of the general properties of good statistical estimators is given. This is applied to the gamma distribution to show that the maximum likelihood estimators are jointly sufficient. A new, simple approximation of the likelihood solutions is given, and the efficiency of the fitting procedure is computed.}, + chapter = {Monthly Weather Review}, +} diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 8325ba623..7a91e89bc 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -476,7 +476,7 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]: References ---------- - :cite:cts:`coles_introduction_2001,cohen_parameter_2019` + :cite:cts:`coles_introduction_2001,cohen_parameter_2019, thom_1958` """ x = np.asarray(x) From 044751acb56acb45df1fb4a03efff895ac717d23 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Sun, 24 Dec 2023 13:16:14 -0500 Subject: [PATCH 055/135] replace setup.cfg flake8 configurations with .flake8 file --- .github/workflows/bump-version.yml | 1 - .github/workflows/codeql-analysis.yml | 1 - .github/workflows/main.yml | 1 - .github/workflows/upstream.yml | 1 - CONTRIBUTING.rst | 2 +- Makefile | 2 +- pyproject.toml | 1 - tox.ini | 2 +- 8 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 6f2bf3637..17659bb83 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -20,7 +20,6 @@ on: - pyproject.toml - requirements_dev.txt - requirements_upstream.txt - - setup.cfg - tox.ini - xclim/__init__.py diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c0d0b5468..d82707b86 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -9,7 +9,6 @@ on: - Makefile - pyproject.toml - requirements_upstream.txt - - setup.cfg - tox.ini - xclim/__init__.py - docs/**.ipynb diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b19408d70..1532c39d4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,7 +8,6 @@ on: - CHANGES.rst - README.rst - pyproject.toml - - setup.cfg - xclim/__init__.py pull_request: types: diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index 59a587464..881d8c7c1 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -7,7 +7,6 @@ on: - CHANGES.rst - README.rst - pyproject.toml - - setup.cfg - xclim/__init__.py schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 97068bc49..e9ad5486c 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -147,7 +147,7 @@ Ready to contribute? Here's how to set up `xclim` for local development. $ black --check xclim tests $ isort --check xclim tests $ ruff xclim tests - $ flake8 --config=setup.cfg xclim tests + $ flake8 --config=.flake8 xclim tests $ nbqa black --check docs $ nbqa isort --check docs $ blackdoc --check --exclude=xclim/indices/__init__.py xclim diff --git a/Makefile b/Makefile index d78c50063..7146a99ed 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ lint: ## check style with flake8 and black black --check xclim tests isort --check xclim tests ruff xclim tests - flake8 --config=setup.cfg xclim tests + flake8 --config=.flake8 xclim tests nbqa black --check docs blackdoc --check --exclude=xclim/indices/__init__.py xclim blackdoc --check docs diff --git a/pyproject.toml b/pyproject.toml index f5405fb3e..d9c54cbed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,7 +169,6 @@ include = [ "docs/make.bat", "docs/notebooks/_finder.py", "requirements_upstream.txt", - "setup.cfg", "tests/**/*.py", "tests/**/*.txt", "tox.ini", diff --git a/tox.ini b/tox.ini index 961765544..3017f8de9 100644 --- a/tox.ini +++ b/tox.ini @@ -34,7 +34,7 @@ commands = black --check xclim tests isort --check xclim tests ruff xclim tests - flake8 --config=setup.cfg xclim tests + flake8 --config=.flake8 xclim tests nbqa black --check docs blackdoc --check --exclude=xclim/indices/__init__.py xclim blackdoc --check docs From 2beb56751bb461bda65da05ed4e9d4e61bc32ae1 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Sun, 24 Dec 2023 13:20:55 -0500 Subject: [PATCH 056/135] update CHANGES.rst --- CHANGES.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5566088b9..6452c8b76 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,7 +8,7 @@ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal B Announcements ^^^^^^^^^^^^^ -* `xclim` now adheres to the `Semantic Versioning 2.0.0 `_ specification. (:issue:`1556`). +* `xclim` now adheres to the `Semantic Versioning 2.0.0 `_ specification. (:issue:`1556`, :pull:`1569`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -18,7 +18,7 @@ New features and enhancements Breaking changes ^^^^^^^^^^^^^^^^ -* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the `pyproject.toml` file. (:pull:`1557`). +* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the `pyproject.toml` file. (:issue:`1557`, :pull:`1569`). Bug fixes ^^^^^^^^^ @@ -26,8 +26,8 @@ Bug fixes Internal changes ^^^^^^^^^^^^^^^^ -* The `flake8` configuration has been migrated from `setup.cfg` to `.flake8`. -* The `bump-version.yml` workflow has been adjusted to bump the `patch` version when the last version is determined to have been a `release` version; otherwise, the `build` version is bumped. +* The `flake8` configuration has been migrated from `setup.cfg` to `.flake8`; `setup.cfg` has been removed. (:pull:`1569`) +* The `bump-version.yml` workflow has been adjusted to bump the `patch` version when the last version is determined to have been a `release` version; otherwise, the `build` version is bumped. (:issue:`1557`, :pull:`1569`). v0.47.0 (2023-12-01) -------------------- From f57f9fa582ef3dd160418474d125a1e9173718d6 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Sun, 24 Dec 2023 14:17:43 -0500 Subject: [PATCH 057/135] migrate questions/support to GitHub Discussions, create a translations requests discussion, add links to both from issues --- .github/DISCUSSION_TEMPLATE/questions.yml | 37 +++++++++++++++++++ .github/DISCUSSION_TEMPLATE/translations.yml | 33 +++++++++++++++++ .../ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml | 31 ---------------- .github/ISSUE_TEMPLATE/config.yml | 10 +++++ 4 files changed, 80 insertions(+), 31 deletions(-) create mode 100644 .github/DISCUSSION_TEMPLATE/questions.yml create mode 100644 .github/DISCUSSION_TEMPLATE/translations.yml delete mode 100644 .github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml diff --git a/.github/DISCUSSION_TEMPLATE/questions.yml b/.github/DISCUSSION_TEMPLATE/questions.yml new file mode 100644 index 000000000..ceb365ec5 --- /dev/null +++ b/.github/DISCUSSION_TEMPLATE/questions.yml @@ -0,0 +1,37 @@ +name: Question/Support +description: Ask for help from the developers +labels: [ "support" ] + +body: + - type: markdown + attributes: + value: | + Thanks for opening this discussion! + Before you submit, please make sure you have read our [Code of Conduct](https://github.com/Ouranosinc/xclim/blob/master/CODE_OF_CONDUCT.md). + - type: textarea + id: setup-information + attributes: + label: Setup Information + description: | + What software versions are you running? Example: + * Xclim version: 0.55.0-gamma + * Python version: 4.2 + * Operating System: Nutmeg Linux 12.34 | macOS 11.0 "Redmond" + value: | + * Xclim version: + * Python version: + * Operating System: + - type: textarea + id: description + attributes: + label: Context + description: Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. + - type: textarea + id: steps-to-reproduce + attributes: + label: Steps To Reproduce + description: Paste the command(s) you ran and the output. If there was a crash, please include the traceback below. + value: | + ``` + $ pip install foo --bar + ``` diff --git a/.github/DISCUSSION_TEMPLATE/translations.yml b/.github/DISCUSSION_TEMPLATE/translations.yml new file mode 100644 index 000000000..be8a5db31 --- /dev/null +++ b/.github/DISCUSSION_TEMPLATE/translations.yml @@ -0,0 +1,33 @@ +name: Translations +description: Coordinate translations of the Xclim documentation +labels: [ "docs" ] + +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to help translate Xclim's documentation! + Before you submit, please make sure you have read our [Code of Conduct](https://github.com/Ouranosinc/xclim/blob/master/CODE_OF_CONDUCT.md). + - type: textarea + id: language + attributes: + label: Language + description: What language are you translating to? + - type: textarea + id: translation + attributes: + label: Translation + description: | + Please paste your translation here. + If you are translating a file, please paste the file contents here. + Remember that you can use Markdown formatting in this text box. + value: | + diff --git a/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml b/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml deleted file mode 100644 index 8b1b4852d..000000000 --- a/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Question/Support -description: Ask for help from the developers -labels: [ "support" ] - -body: - - type: textarea - id: setup-information - attributes: - label: Setup Information - description: | - What software versions are you running? Example: - - Xclim version: 0.55.0-gamma - - Python version: 4.2 - - Operating System: Nutmeg Linux 12.34 | macOS 11.0 "Redmond" - value: | - - Xclim version: - - Python version: - - Operating System: - - type: textarea - id: description - attributes: - label: Context - description: Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. - - type: checkboxes - id: terms - attributes: - label: Code of Conduct - description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/Ouranosinc/xclim/blob/master/CODE_OF_CONDUCT.md) - options: - - label: I agree to follow this project's Code of Conduct - required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 0086358db..9f962cfcc 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1 +1,11 @@ blank_issues_enabled: true +contact_links: + - name: Questions and/or support + about: "For questions or support, please use the Discussions tab" + url: https://www.github.com/Ouranosinc/xclim/discussions/categories/questions + - name: Translation requests + about: "For coordinating translation requests, please use the Discussions tab" + url: https://www.github.com/Ouranosinc/xclim/discussions/categories/translations + - name: PAVICS-related questions + about: "For questions related to PAVICS, the Platform for the Analysis and Visualization of Climate Science, please use the PAVICS email: pavics@ouranos.ca" + url: https://pavics.ouranos.ca/index.html From bcaf72a1391be521bee2f070790169a93d3e9b66 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Sun, 24 Dec 2023 14:32:22 -0500 Subject: [PATCH 058/135] minor labeling adjustments --- .github/DISCUSSION_TEMPLATE/questions.yml | 2 +- .github/DISCUSSION_TEMPLATE/translations.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/DISCUSSION_TEMPLATE/questions.yml b/.github/DISCUSSION_TEMPLATE/questions.yml index ceb365ec5..033db1e4e 100644 --- a/.github/DISCUSSION_TEMPLATE/questions.yml +++ b/.github/DISCUSSION_TEMPLATE/questions.yml @@ -1,4 +1,4 @@ -name: Question/Support +name: "[Questions] Questions and/or support" description: Ask for help from the developers labels: [ "support" ] diff --git a/.github/DISCUSSION_TEMPLATE/translations.yml b/.github/DISCUSSION_TEMPLATE/translations.yml index be8a5db31..10a712fca 100644 --- a/.github/DISCUSSION_TEMPLATE/translations.yml +++ b/.github/DISCUSSION_TEMPLATE/translations.yml @@ -1,4 +1,4 @@ -name: Translations +name: "[Translations] Translation request/coordination" description: Coordinate translations of the Xclim documentation labels: [ "docs" ] @@ -24,10 +24,10 @@ body: value: | From 34c6045ef94e19617dd1e1878f60c849127863ba Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Sun, 24 Dec 2023 14:37:24 -0500 Subject: [PATCH 059/135] update .gitignore to ignore autogenerated files --- .gitignore | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 6b0ee89bb..7b0401607 100644 --- a/.gitignore +++ b/.gitignore @@ -107,23 +107,10 @@ ENV/ .idea/ # autogenerated RestructuredText -/docs/modules.rst -/docs/xclim.core.rst -/docs/xclim.data.rst -/docs/xclim.ensembles.rst -/docs/xclim.indicators.atmos.rst -/docs/xclim.indicators.generic.rst -/docs/xclim.indicators.land.rst -/docs/xclim.indicators.rst -/docs/xclim.indicators.seaIce.rst -/docs/xclim.indices.fire.rst -/docs/xclim.indices.rst -/docs/xclim.locales.rst -/docs/xclim.rst -/docs/xclim.sdba.rst -/docs/xclim.testing.rst -/docs/xclim.testing.tests.rst -/docs/xclim.testing.tests.test_sdba.rst +docs/apidoc/modules.rst +docs/apidoc/xclim*.rst +docs/indicators.json +docs/variables.json .vscode From 7bec305b53467d1941f3a2b9300cbdcde1514ee4 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:07:19 -0500 Subject: [PATCH 060/135] fix semver workflow to conform to regex conventions in GitHub --- .github/workflows/bump-version.yml | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 17659bb83..b9bd0d62d 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -6,19 +6,18 @@ on: - master paths-ignore: - .* - - .github/**.yml + - .github/*/*.yml - CHANGES.rst - Makefile - - docs/**.ipynb - - docs/**.py - - docs/**.rst + - docs/*/*.ipynb + - docs/*/*.py + - docs/*/*.rst - docs/Makefile - docs/make.bat - docs/references.bib - environment.yml - pylintrc - pyproject.toml - - requirements_dev.txt - requirements_upstream.txt - tox.ini - xclim/__init__.py @@ -46,20 +45,19 @@ jobs: - name: Install bump-my-version run: | python -m pip install bump-my-version - - name: Bump Patch Version - if: ${{ !env.CURRENT_VERSION =~ \d+\.\d+\.\d+(-dev(\.\d+)?)?$ }} + - name: Conditional Bump + id: bump run: | - echo "Version is stable, bumping 'patch' version" - bump-my-version bump patch - NEW_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" - echo "new_version=${NEW_VERSION}" - - name: Bump Build Version - if: ${{ env.CURRENT_VERSION =~ \d+\.\d+\.\d+(-dev(\.\d+)?)?$ }} - run: | - echo "Development version (ends in 'dev(\.\d+)?'), bumping 'build' version" - bump-my-version bump build + if [[ $CURRENT_VERSION =~ \d+\.\d+\.\d+(-dev(\.\d+)?)?$ ]]; then + echo "Development version (ends in 'dev(\.\d+)?'), bumping 'build' version" + bump-my-version bump build + else + echo "Version is stable, bumping 'patch' version" + bump-my-version bump patch + fi NEW_VERSION="$(grep -E '__version__' xclim/__init__.py | cut -d ' ' -f3)" echo "new_version=${NEW_VERSION}" + echo "NEW_VERSION=${NEW_VERSION}" >> $GITHUB_ENV - name: Push Changes uses: ad-m/github-push-action@v0.8.0 with: From 398dc7aee814cd69b67e5606ed74997a3949bbd0 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:40:28 -0500 Subject: [PATCH 061/135] update ReadMe to remove Gitter integration --- README.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 57b7bf1f7..d44b07946 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ xclim: Climate services library |logo| +----------------------------+-----------------------------------------------------+ | Versions | |pypi| |conda| |versions| | +----------------------------+-----------------------------------------------------+ -| Documentation and Support | |docs| |gitter| | +| Documentation and Support | |docs| |discussions| | +----------------------------+-----------------------------------------------------+ | Open Source | |license| |fair| |zenodo| |pyOpenSci| |joss| | +----------------------------+-----------------------------------------------------+ @@ -81,7 +81,8 @@ Contributing to xclim --------------------- `xclim` is in active development and is being used in production by climate services specialists around the world. -* If you're interested in participating in the development of `xclim` by suggesting new features, new indices or report bugs, please leave us a message on the `issue tracker`_. There is also a chat room on gitter (|gitter|). +* If you're interested in participating in the development of `xclim` by suggesting new features, new indices or report bugs, please leave us a message on the `issue tracker`_. + * If you have a support/usage question or would like to translate `xclim` to a new language, be sure to check out the existing |discussions| first! * If you would like to contribute code or documentation (which is greatly appreciated!), check out the `Contributing Guidelines`_ before you begin! @@ -127,6 +128,10 @@ This package was created with Cookiecutter_ and the `audreyfeldroy/cookiecutter- :target: https://anaconda.org/conda-forge/xclim :alt: Conda-forge Build Version +.. |discussions| image:: https://img.shields.io/badge/GitHub-Discussions-blue + :target: https://github.com/Ouranosinc/xclim/discussions + :alt: Static Badge + .. |gitter| image:: https://badges.gitter.im/Ouranosinc/xclim.svg :target: https://gitter.im/Ouranosinc/xclim?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge :alt: Gitter Chat From 5556a76b3ab738ab64a1222bd4cf2f1b5461ddb2 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:56:54 -0500 Subject: [PATCH 062/135] Update CONTRIBUTING.rst --- CONTRIBUTING.rst | 83 ++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e9ad5486c..cae525a30 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -23,7 +23,7 @@ and "help wanted" is open to whoever wants to implement it. General to-do list for implementing a new Indicator: -1. Implement the indice +#. Implement the indice * Indices are function wrapped with :py:func:`~xclim.core.units.declare_units` * Their input arguments should have type annotations, as documented in :py:class:`~xclim.core.utils.InputKind` @@ -31,31 +31,34 @@ General to-do list for implementing a new Indicator: * They should set the units on their outputs, but no other metadata fields. * Their code should be found in the most relevant ``xclim/indices/_*.py`` file. Functions are explicitly added to the ``__all__`` at the top of the file. -2. Add unit tests +#. Add unit tests * Indices are best tested with made up, idealized data to explicitly test the edge cases. Many pytest fixtures are available to help this data generation. * Tests should be added as one or more functions in ``tests/test_indices.py``, see other tests for inspiration. -3. Add the indicator +#. Add the indicator * See :ref:`notebooks/extendxclim:Defining new indicators` for more info and look at the other indicators for inspiration. * They are added in the most relevant ``xclim/indicators/{realm}/_*.py`` file. * Indicator are instances of subclasses of :py:class:`xclim.core.indicator.Indicator`. They should use a class declared within the ``{realm}`` folder, creating a dummy one if needed. They are explicitly added to the file's ``__all__``. -4. Add unit tests +#. Add unit tests * Indicators are best tested with real data, also looking at missing value propagation and metadata formatting. In addition to the ``atmosds`` fixture, only datasets that can be accessed with :py:func:`xclim.testing.open_dataset` should be used. For convenience, this special function is accessible as the ``open_dataset`` pytest fixture. * Tests are added in the most relevant ``tests/test_{variable}.py`` file. -5. Add French translations +#. Add French translations xclim comes with an internationalization module and all "official" indicators (those in ``xclim.atmos.indicators``) must have a french translation added to ``xclim/data/fr.json``. This part can be done by the core team after you open a Pull Request. +.. note:: + If you are adding new translations to the library (for languages other than French), please begin by opening a discussion on the `xclim Discussions page`_ to coordinate the scope and implementation of these translations. + General notes for implementing new bias-adjustment methods: * Method are implemented as classes in ``xclim/sdba/adjustment.py``. @@ -113,36 +116,36 @@ Get Started! Ready to contribute? Here's how to set up `xclim` for local development. -1. Fork the `xclim` repo on GitHub. +#. Fork the `xclim` repo on GitHub. -2. Clone your fork locally:: +#. Clone your fork locally:: $ git clone git@github.com:{my_github_username}/xclim.git $ cd xclim/ -3. Create a development environment. We recommend using ``conda``:: +#. Create a development environment. We recommend using ``conda``:: $ conda create -n xclim python=3.8 --file=environment.yml $ pip install -e .[dev] -4. Create a branch for local development:: +#. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally! -5. Before committing your changes, we ask that you install ``pre-commit`` in your development environment. Pre-commit runs git hooks that ensure that your code resembles that of the project and catches and corrects any small errors or inconsistencies when you ``git commit``:: +#. Before committing your changes, we ask that you install ``pre-commit`` in your development environment. Pre-commit runs git hooks that ensure that your code resembles that of the project and catches and corrects any small errors or inconsistencies when you ``git commit``:: # To install the necessary pre-commit hooks: $ pre-commit install # To run pre-commit hooks manually: $ pre-commit run --all-files - Instead of ``pre-commit``, you can also verify your changes using the `Make` recipe for code linting checks:: + Instead of ``pre-commit``, you can also verify your changes using the `Make` recipe for code linting checks:: $ make lint - Or, alternatively, you can check individual hooks manually with `black`, `isort`, `ruff`, `flake8`, `flake8-rst-docstrings`, `nbqa`, `blackdoc`, and `yamllint`:: + Or, alternatively, you can check individual hooks manually with `black`, `isort`, `ruff`, `flake8`, `flake8-rst-docstrings`, `nbqa`, `blackdoc`, and `yamllint`:: $ black --check xclim tests $ isort --check xclim tests @@ -154,14 +157,14 @@ Ready to contribute? Here's how to set up `xclim` for local development. $ blackdoc --check docs $ yamllint --config-file=.yamllint.yaml xclim -6. When features or bug fixes have been contributed, unit tests and doctests have been added, or notebooks have been updated, use ``$ pytest`` to test them:: +#. When features or bug fixes have been contributed, unit tests and doctests have been added, or notebooks have been updated, use ``$ pytest`` to test them:: $ pytest --no-cov --nbval --dist=loadscope --rootdir=tests/ docs/notebooks --ignore=docs/notebooks/example.ipynb # for notebooks, exclusively. $ pytest --no-cov --rootdir=tests/ --xdoctest xclim # for doctests, exclusively. $ pytest # for all unit tests, excluding doctests and notebooks. $ pytest -m "not slow" # for all unit tests, excluding doctests, notebooks, and "slow" marked tests. - Alternatively, one can use ``$ tox`` to run very specific testing configurations, as GitHub Workflows would do when a Pull Request is submitted and new commits are pushed:: + Alternatively, one can use ``$ tox`` to run very specific testing configurations, as GitHub Workflows would do when a Pull Request is submitted and new commits are pushed:: $ tox -e py38 # run tests on Python 3.8 $ tox -e py39-upstream-doctest # run tests on Python 3.9, including doctests, with upstream dependencies @@ -172,7 +175,7 @@ Ready to contribute? Here's how to set up `xclim` for local development. $ tox -m test # run all builds listed above -.. warning:: + .. warning:: Starting from `xclim` v0.46.0, when running tests with `tox`, any `pytest` markers passed to `pyXX` builds (e.g. `-m "not slow"`) must be passed to `tox` directly. This can be done as follows:: $ tox -e py38 -- -m "not slow" @@ -181,80 +184,77 @@ Ready to contribute? Here's how to set up `xclim` for local development. `notebooks_doctests`: this configuration does not pass test markers to its `pytest` call. `offline`: this configuration runs by default with the `-m "not requires_internet"` test marker. Be aware that running `tox` and manually setting a `pytest` marker will override this default. -.. note:: + .. note:: `xclim` tests are organized to support the `pytest-xdist`_ plugin for distributed testing across workers or CPUs. In order to benefit from multiple processes, add the flag `--numprocesses=auto` or `-n auto` to your `pytest` calls. When running tests via `tox`, `numprocesses` is set to the number of logical cores available (`numprocesses=logical`), with a maximum amount of `8`. -7. Docs should also be tested to ensure that the documentation will build correctly on ReadTheDocs. This can be performed in a number of ways:: +#. Docs should also be tested to ensure that the documentation will build correctly on ReadTheDocs. This can be performed in a number of ways:: # To run in a contained virtualenv environment $ tox -e docs # or, alternatively, to build the docs directly $ make docs -.. note:: + .. note:: When building the documentation, the default behaviour is to evaluate notebooks ('`nbsphinx_execute = "auto"`'), rather than simply parse the content ('`nbsphinx_execute = "never"`'). Due to their complexity, this is a very computationally demanding task and should only be performed when necessary (i.e.: when the notebooks have been modified). In order to speed up documentation builds, setting a value for the environment variable "`SKIP_NOTEBOOKS`" (e.g. "`$ export SKIP_NOTEBOOKS=1`") will prevent the notebooks from being evaluated on all subsequent "`$ tox -e docs`" or "`$ make docs`" invocations. -8. After clearing the previous checks, commit your changes and push your branch to GitHub:: +#. After clearing the previous checks, commit your changes and push your branch to GitHub:: $ git add * $ git commit -m "Your detailed description of your changes." -If installed, `pre-commit` will run checks at this point: + If installed, `pre-commit` will run checks at this point: -* If no errors are found, changes will be committed. -* If errors are found, modifications will be made and warnings will be raised if intervention is needed. -* After adding changes, simply `git commit` again:: + * If no errors are found, changes will be committed. + * If errors are found, modifications will be made and warnings will be raised if intervention is needed. + * After adding changes, simply `git commit` again:: $ git push origin name-of-your-bugfix-or-feature -9. Submit a pull request through the GitHub website. +#. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- Before you submit a pull request, please follow these guidelines: -1. Open an *issue* on our `GitHub repository`_ with your issue that you'd like to fix or feature that you'd like to implement. -2. Perform the changes, commit and push them either to new a branch within `Ouranosinc/xclim` or to your personal fork of xclim. +#. Open an *issue* on our `GitHub repository`_ with your issue that you'd like to fix or feature that you'd like to implement. -.. warning:: - Try to keep your contributions within the scope of the issue that you are addressing. - While it might be tempting to fix other aspects of the library as it comes up, it's better to - simply to flag the problems in case others are already working on it. +#. Perform the changes, commit and push them either to new a branch within `Ouranosinc/xclim` or to your personal fork of xclim. - Consider adding a "**# TODO:**" comment if the need arises. + .. warning:: + Try to keep your contributions within the scope of the issue that you are addressing. While it might be tempting to fix other aspects of the library as it comes up, it's better to simply to flag the problems in case others are already working on it. + + Consider adding a "**# TODO:**" comment if the need arises. + +#. Pull requests should raise test coverage for the xclim library. Code coverage is an indicator of how extensively tested the library is. -3. Pull requests should raise test coverage for the xclim library. Code coverage is an indicator of how extensively tested the library is. If you are adding a new set of functions, they **must be tested** and **coverage percentage should not significantly decrease.** -4. If the pull request adds functionality, your functions should include docstring explanations. - So long as the docstrings are syntactically correct, sphinx-autodoc will be able to automatically parse the information. - Please ensure that the docstrings and documentation adhere to the following standards (badly formed docstrings will fail build tests): + +#. If the pull request adds functionality, your functions should include docstring explanations. So long as the docstrings are syntactically correct, sphinx-autodoc will be able to automatically parse the information. Please ensure that the docstrings and documentation adhere to the following standards (badly formed docstrings will fail build tests): * `numpydoc`_ * `reStructuredText (ReST)`_ -.. note:: + .. note:: If you aren't accustomed to writing documentation in reStructuredText (`.rst`), we encourage you to spend a few minutes going over the incredibly well-summarized `reStructuredText Primer`_ from the sphinx-doc maintainer community. -5. The pull request should work for Python 3.8, 3.9, 3.10, and 3.11 as well as raise test coverage. +#. The pull request should work for Python 3.8, 3.9, 3.10, and 3.11 as well as raise test coverage. Pull requests are also checked for documentation build status and for `PEP8`_ compliance. The build statuses and build errors for pull requests can be found at: https://github.com/Ouranosinc/xclim/actions -.. warning:: + .. warning:: PEP8, black, pytest (with xdoctest) and pydocstyle (for numpy docstrings) conventions are strongly enforced. Ensure that your changes pass all tests prior to pushing your final commits to your branch. Code formatting errors are treated as build errors and will block your pull request from being accepted. -6. The version changes (HISTORY.rst) should briefly describe changes introduced in the Pull request. Changes should be organized by type - (ie: `New indicators`, `New features and enhancements`, `Breaking changes`, `Bug fixes`, `Internal changes`) and the GitHub Pull Request, - GitHub Issue. Your name and/or GitHub handle should also be listed among the contributors to this version. This can be done as follows:: +#. The version changes (HISTORY.rst) should briefly describe changes introduced in the Pull request. Changes should be organized by type (ie: `New indicators`, `New features and enhancements`, `Breaking changes`, `Bug fixes`, `Internal changes`) and the GitHub Pull Request, GitHub Issue. Your name and/or GitHub handle should also be listed among the contributors to this version. This can be done as follows:: Contributors to this version: John Jacob Jingleheimer Schmidt (:user:`username`). @@ -454,4 +454,5 @@ Before updating the main conda-forge recipe, we *strongly* suggest performing th .. _`reStructuredText Primer`: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html .. _`sphinxcontrib-bibtex`: https://sphinxcontrib-bibtex.readthedocs.io .. _`xclim on TestPyPI`: https://test.pypi.org/project/xclim/ +.. _`xclim Discussions page`: https://github.com/Ouranosinc/xclim/discussions .. _`xclim-testdata repository`: https://github.com/Ouranosinc/xclim-testdata From 1ed9e3d3aeb37ceb5fd2ac5ec8c9783e86ff941a Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 29 Dec 2023 13:11:43 -0500 Subject: [PATCH 063/135] update deployment information --- CONTRIBUTING.rst | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e9ad5486c..fe75f7c8a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -354,47 +354,41 @@ The method we use is as follows:: **Patch** should be used for bug fixes and optimizations; -**Release** is a keyword used to specify the degree of production readiness (`beta` [, and optionally, `gamma`]). *Only versions built from the main development branch will ever have this tag!* +**Release** is a keyword used to specify the degree of production readiness (`dev` [, and optionally, `release`]). *Only versions built from the main development branch will ever have this marker!* - An increment to the Major or Minor will reset the Release to `beta`. When a build is promoted above `beta` (ie: release-ready), it's a good idea to push this version towards PyPi. +**Build** is a keyword used to specify the build number. *Only versions built from the main development branch will ever have this number!* -Deploying ---------- +An increment to the Major or Minor will reset the Release to `beta`. When a build is promoted above `beta` (ie: the release/stable version), it's a good idea to push this version towards PyPI. -A reminder for the maintainers on how to prepare the library for a tagged version. +Packaging and Deployment +------------------------ -Make sure all your changes are committed (**including an entry in HISTORY.rst**). -Then run:: +This section serves as a reminder for the maintainers on how to prepare the library for a tagged version and how to deploy packages to TestPyPI and PyPI. - $ bump2version