Ouranosinc · RondeauG · Nov 6, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -31,6 +31,7 @@ Bug fixes
 * Fixed a bug in the documentation build configuration that prevented stable/latest and tagged documentation builds from resolving on ReadTheDocs. (:pull:`256`).
 * Fixed ``get_warming_level`` to avoid incomplete matches. (:pull:`269`).
 * `search_data_catalogs` now eliminates anything that matches any entry in `exclusions`. (:issue:`275`, :pull:`280`).
+* Fixed a bug in ``xs.scripting.save_and_update`` where ``build_path_kwargs`` was ignored when trying to guess the file format. (:pull:`282`).
 
 Internal changes
 ^^^^^^^^^^^^^^^^
@@ -44,6 +45,7 @@ Internal changes
 * Added a new `xscen.testing` module with the `datablock_3d` function previously located in `/tests/conftest.py`. (:pull:`248`).
 * New function `xscen.testing.fake_data` to generate fake data for testing. (:pull:`248`).
 * xESMF 0.8 Regridder and SpatialAverager argument ``out_chunks`` is now accepted by ``xs.regrid_dataset``  and ``xs.spatial_mean``. (:pull:`260`).
+* Multiple improvements to the docstrings and type annotations. (:pull:`282`).
 
 v0.7.1 (2023-08-23)
 -------------------

diff --git a/xscen/aggregate.py b/xscen/aggregate.py
@@ -1,12 +1,13 @@
-# noqa: D100
+"""Functions to aggregate data over time and space."""
 import datetime
 import logging
+import os
 import warnings
 from collections.abc import Sequence
 from copy import deepcopy
 from pathlib import Path
 from types import ModuleType
-from typing import Union
+from typing import Optional, Union
 
 import geopandas as gpd
 import numpy as np
@@ -16,7 +17,6 @@
 import xclim as xc
 import xclim.core.calendar
 import xesmf as xe
-from shapely.geometry import Polygon
 from xclim.core.indicator import Indicator
 
 from .config import parse_config
@@ -47,26 +47,26 @@ def climatological_mean(
     window: int = None,
     min_periods: int = None,
     interval: int = 1,
-    periods: list = None,
-    to_level: str = "climatology",
+    periods: Union[list[str], list[list[str]]] = None,
+    to_level: Optional[str] = "climatology",
 ) -> xr.Dataset:
     """Compute the mean over 'year' for given time periods, respecting the temporal resolution of ds.
 
     Parameters
     ----------
     ds : xr.Dataset
         Dataset to use for the computation.
-    window : int
+    window : int, optional
         Number of years to use for the time periods.
         If left at None and periods is given, window will be the size of the first period.
         If left at None and periods is not given, the window will be the size of the input dataset.
-    min_periods : int
+    min_periods : int, optional
         For the rolling operation, minimum number of years required for a value to be computed.
         If left at None and the xrfreq is either QS or AS and doesn't start in January, min_periods will be one less than window.
         If left at None, it will be deemed the same as 'window'.
     interval : int
         Interval (in years) at which to provide an output.
-    periods : list
+    periods : list of str or list of lists of str, optional
         Either [start, end] or list of [start, end] of continuous periods to be considered. This is needed when the time axis of ds contains some jumps in time.
         If None, the dataset will be considered continuous.
     to_level : str, optional
@@ -208,7 +208,7 @@ def compute_deltas(
     *,
     kind: Union[str, dict] = "+",
     rename_variables: bool = True,
-    to_level: str = "deltas",
+    to_level: Optional[str] = "deltas",
 ) -> xr.Dataset:
     """Compute deltas in comparison to a reference time period, respecting the temporal resolution of ds.
 
@@ -218,7 +218,7 @@ def compute_deltas(
         Dataset to use for the computation.
     reference_horizon : str or xr.Dataset
         Either a YYYY-YYYY string corresponding to the 'horizon' coordinate of the reference period, or a xr.Dataset containing the climatological mean.
-    kind : str
+    kind : str or dict
         ['+', '/', '%'] Whether to provide absolute, relative, or percentage deltas.
         Can also be a dictionary separated per variable name.
     rename_variables : bool
@@ -391,18 +391,18 @@ def spatial_mean(
         'interp_centroid' will find the region's centroid (if coordinates are not fed through kwargs), then perform a .interp() over the spatial dimensions of the Dataset.
         The coordinate can also be directly fed to .interp() through the 'kwargs' argument below.
         'xesmf' will make use of xESMF's SpatialAverager. This will typically be more precise, especially for irregular regions, but can be much slower than other methods.
-    spatial_subset : bool
+    spatial_subset : bool, optional
         If True, xscen.spatial.subset will be called prior to the other operations. This requires the 'region' argument.
         If None, this will automatically become True if 'region' is provided and the subsetting method is either 'cos-lat' or 'mean'.
-    region : dict or str
+    region : dict or str, optional
         Description of the region and the subsetting method (required fields listed in the Notes).
         If method=='interp_centroid', this is used to find the region's centroid.
         If method=='xesmf', the bounding box or shapefile is given to SpatialAverager.
         Can also be "global", for global averages. This is simply a shortcut for `{'name': 'global', 'method': 'bbox', 'lon_bnds' [-180, 180], 'lat_bnds': [-90, 90]}`.
-    kwargs : dict
+    kwargs : dict, optional
         Arguments to send to either mean(), interp() or SpatialAverager().
         For SpatialAverager, one can give `skipna` or  `out_chunks` here, to be passed to the averager call itself.
-    simplify_tolerance : float
+    simplify_tolerance : float, optional
         Precision (in degree) used to simplify a shapefile before sending it to SpatialAverager().
         The simpler the polygons, the faster the averaging, but it will lose some precision.
     to_domain : str, optional
@@ -696,14 +696,18 @@ def spatial_mean(
 def produce_horizon(
     ds: xr.Dataset,
     indicators: Union[
-        str, Path, Sequence[Indicator], Sequence[tuple[str, Indicator]], ModuleType
+        str,
+        os.PathLike,
+        Sequence[Indicator],
+        Sequence[tuple[str, Indicator]],
+        ModuleType,
     ],
     *,
-    periods: list = None,
+    periods: Union[list[str], list[list[str]]] = None,
     warminglevels: dict = None,
-    to_level: str = "horizons",
+    to_level: Optional[str] = "horizons",
     period: list = None,
-):
+) -> xr.Dataset:
     """Compute indicators, then the climatological mean, and finally unstack dates in order to have a single dataset with all indicators of different frequencies.
 
     Once this is done, the function drops 'time' in favor of 'horizon'.
@@ -714,16 +718,16 @@ def produce_horizon(
     ----------
     ds: xr.Dataset
         Input dataset with a time dimension.
-    indicators:  Union[str, Path, Sequence[Indicator], Sequence[Tuple[str, Indicator]]]
+    indicators:  Union[str, os.PathLike, Sequence[Indicator], Sequence[Tuple[str, Indicator]], ModuleType]
         Indicators to compute. It will be passed to the `indicators` argument of `xs.compute_indicators`.
-    periods: list
+    periods: list of str or list of lists of str, optional
         Either [start, end] or list of [start_year, end_year] for the period(s) to be evaluated.
         If both periods and warminglevels are None, the full time series will be used.
-    warminglevels: dict
+    warminglevels: dict, optional
         Dictionary of arguments to pass to `py:func:xscen.subset_warming_level`.
         If 'wl' is a list, the function will be called for each value and produce multiple horizons.
         If both periods and warminglevels are None, the full time series will be used.
-    to_level:
+    to_level: str, optional
         The processing level to assign to the output.
         If there is only one horizon, you can use "{wl}", "{period0}" and "{period1}" in the string to dynamically include
         that information in the processing level.

diff --git a/xscen/biasadjust.py b/xscen/biasadjust.py
@@ -1,6 +1,5 @@
-# noqa: D100
+"""Functions to train and adjust a dataset using a bias-adjustment algorithm."""
 import logging
-import warnings
 from copy import deepcopy
 from typing import Optional, Union
 
@@ -14,9 +13,6 @@
 from .config import parse_config
 from .utils import minimum_calendar, standardize_periods
 
-# TODO: Change all paths to PosixPath objects, including in the catalog?
-# TODO: Compute sometimes fails randomly (in debug, pretty much always). Also (detrend?) fails with pr. Investigate why.
-
 logger = logging.getLogger(__name__)
 
 
@@ -60,16 +56,16 @@ def _add_preprocessing_attr(scen, train_kwargs):
 def train(
     dref: xr.Dataset,
     dhist: xr.Dataset,
-    var: list,
-    period: list,
+    var: Union[str, list],
+    period: list[str],
     *,
     method: str = "DetrendedQuantileMapping",
-    group: Union[sdba.Grouper, str, dict] = {"group": "time.dayofyear", "window": 31},
+    group: Union[sdba.Grouper, str, dict] = None,
     xclim_train_args: dict = None,
     maximal_calendar: str = "noleap",
-    adapt_freq: Optional[dict] = None,
-    jitter_under: Optional[dict] = None,
-    jitter_over: Optional[dict] = None,
+    adapt_freq: dict = None,
+    jitter_under: dict = None,
+    jitter_over: dict = None,
     align_on: Optional[str] = "year",
 ) -> xr.Dataset:
     """
@@ -81,14 +77,15 @@ def train(
       The target timeseries, on the reference period.
     dhist : xr.Dataset
       The timeseries to adjust, on the reference period.
-    var : str
-      Variable on which to do the adjustment
-    period : list
+    var : str or list of str
+      Variable on which to do the adjustment. Currently only supports one variable.
+    period : list of str
       [start, end] of the reference period
     method : str
       Name of the `sdba.TrainAdjust` method of xclim.
-    group : str or sdba.Grouper
-      Grouping information
+    group : str or sdba.Grouper or dict, optional
+      Grouping information. If a string, it is interpreted as a grouper on the time dimension. If a dict, it is passed to `sdba.Grouper.from_kwargs`.
+      Defaults to {"group": "time.dayofyear", "window": 31}.
     xclim_train_args : dict
       Dict of arguments to pass to the `.train` of the adjustment object.
     maximal_calendar: str
@@ -101,7 +98,7 @@ def train(
     jitter_over: dict, optional
       If given, a dictionary of args to pass to `jitter_over_thresh`.
     align_on: str, optional
-      `align_on` argument for the fonction `xclim.core.calendar.convert_calendar`.
+      `align_on` argument for the function `xclim.core.calendar.convert_calendar`.
 
     Returns
     -------
@@ -114,6 +111,8 @@ def train(
 
     """
     # TODO: To be adequately fixed later when we add multivariate
+    if isinstance(var, str):
+        var = [var]
     if len(var) != 1:
         raise ValueError(
             "biasadjust currently does not support entries with multiple variables."
@@ -122,6 +121,7 @@ def train(
         ref = dref[var[0]]
         hist = dhist[var[0]]
 
+    group = group or {"group": "time.dayofyear", "window": 31}
     xclim_train_args = xclim_train_args or {}
     if method == "DetrendedQuantileMapping":
         xclim_train_args.setdefault("nquantiles", 15)
@@ -189,15 +189,15 @@ def train(
 def adjust(
     dtrain: xr.Dataset,
     dsim: xr.Dataset,
-    periods: list,
-    xclim_adjust_args: dict,
+    periods: Union[list[str], list[list[str]]],
     *,
+    xclim_adjust_args: dict = None,
     to_level: str = "biasadjusted",
     bias_adjust_institution: str = None,
     bias_adjust_project: str = None,
-    moving_yearly_window: Optional[dict] = None,
+    moving_yearly_window: dict = None,
     align_on: Optional[str] = "year",
-):
+) -> xr.Dataset:
     """
     Adjust a simulation.
 
@@ -207,11 +207,11 @@ def adjust(
       A trained algorithm's dataset, as returned by `train`.
     dsim : xr.Dataset
       Simulated timeseries, projected period.
-    periods : list
+    periods : list of str or list of lists of str
       Either [start, end] or list of [start, end] of the simulation periods to be adjusted (one at a time).
-    xclim_adjust_args : dict
+    xclim_adjust_args : dict, optional
       Dict of arguments to pass to the `.adjust` of the adjustment object.
-    to_level : str, optional
+    to_level : str
       The processing level to assign to the output.
       Defaults to 'biasadjusted'
     bias_adjust_institution : str, optional
@@ -240,6 +240,7 @@ def adjust(
     # TODO: To be adequately fixed later
 
     xclim_adjust_args = deepcopy(xclim_adjust_args)
+    xclim_adjust_args = xclim_adjust_args or {}
 
     if moving_yearly_window:
         dsim = construct_moving_yearly_window(dsim, **moving_yearly_window)
@@ -267,7 +268,6 @@ def adjust(
     if simcal != mincal:
         sim = convert_calendar(sim, mincal, align_on=align_on)
 
-    xclim_adjust_args = xclim_adjust_args or {}
     # do the adjustment for all the simulation_period lists
     periods = standardize_periods(periods)
     slices = []