Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple small fixes to docstrings and typing #282

Merged
merged 25 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Bug fixes
* Fixed a bug in the documentation build configuration that prevented stable/latest and tagged documentation builds from resolving on ReadTheDocs. (:pull:`256`).
* Fixed ``get_warming_level`` to avoid incomplete matches. (:pull:`269`).
* `search_data_catalogs` now eliminates anything that matches any entry in `exclusions`. (:issue:`275`, :pull:`280`).
* Fixed a bug in ``xs.scripting.save_and_update`` where ``build_path_kwargs`` was ignored when trying to guess the file format. (:pull:`282`).

Internal changes
^^^^^^^^^^^^^^^^
Expand All @@ -51,7 +52,7 @@ Internal changes
* Linters are now called by order of most common failures first, to speed up the CI.
* `Manifest.in` is much more specific about what is installed.
* Re-adds a dev recipe to the `setup.py`.

* Multiple improvements to the docstrings and type annotations. (:pull:`282`).

v0.7.1 (2023-08-23)
-------------------
Expand Down
70 changes: 37 additions & 33 deletions xscen/aggregate.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# noqa: D100
"""Functions to aggregate data over time and space."""
import datetime
import logging
import os
import warnings
from collections.abc import Sequence
from copy import deepcopy
from pathlib import Path
from types import ModuleType
from typing import Union
from typing import Optional, Union

import geopandas as gpd
import numpy as np
Expand All @@ -16,7 +17,6 @@
import xclim as xc
import xclim.core.calendar
import xesmf as xe
from shapely.geometry import Polygon
from xclim.core.indicator import Indicator

from .config import parse_config
Expand Down Expand Up @@ -44,29 +44,29 @@ def _(s):
def climatological_mean(
ds: xr.Dataset,
*,
window: int = None,
min_periods: int = None,
window: Optional[int] = None,
min_periods: Optional[int] = None,
interval: int = 1,
periods: list = None,
to_level: str = "climatology",
periods: Optional[Union[list[str], list[list[str]]]] = None,
to_level: Optional[str] = "climatology",
) -> xr.Dataset:
"""Compute the mean over 'year' for given time periods, respecting the temporal resolution of ds.

Parameters
----------
ds : xr.Dataset
Dataset to use for the computation.
window : int
window : int, optional
Number of years to use for the time periods.
If left at None and periods is given, window will be the size of the first period.
If left at None and periods is not given, the window will be the size of the input dataset.
min_periods : int
min_periods : int, optional
For the rolling operation, minimum number of years required for a value to be computed.
If left at None and the xrfreq is either QS or AS and doesn't start in January, min_periods will be one less than window.
If left at None, it will be deemed the same as 'window'.
interval : int
Interval (in years) at which to provide an output.
periods : list
periods : list of str or list of lists of str, optional
Either [start, end] or list of [start, end] of continuous periods to be considered. This is needed when the time axis of ds contains some jumps in time.
If None, the dataset will be considered continuous.
to_level : str, optional
Expand Down Expand Up @@ -208,7 +208,7 @@ def compute_deltas(
*,
kind: Union[str, dict] = "+",
rename_variables: bool = True,
to_level: str = "deltas",
to_level: Optional[str] = "deltas",
) -> xr.Dataset:
"""Compute deltas in comparison to a reference time period, respecting the temporal resolution of ds.

Expand All @@ -218,7 +218,7 @@ def compute_deltas(
Dataset to use for the computation.
reference_horizon : str or xr.Dataset
Either a YYYY-YYYY string corresponding to the 'horizon' coordinate of the reference period, or a xr.Dataset containing the climatological mean.
kind : str
kind : str or dict
['+', '/', '%'] Whether to provide absolute, relative, or percentage deltas.
Can also be a dictionary separated per variable name.
rename_variables : bool
Expand Down Expand Up @@ -372,13 +372,13 @@ def spatial_mean(
ds: xr.Dataset,
method: str,
*,
spatial_subset: bool = None,
call_clisops: bool = False,
region: Union[dict, str] = None,
kwargs: dict = None,
simplify_tolerance: float = None,
to_domain: str = None,
to_level: str = None,
spatial_subset: Optional[bool] = None,
call_clisops: Optional[bool] = False,
RondeauG marked this conversation as resolved.
Show resolved Hide resolved
region: Optional[Union[dict, str]] = None,
kwargs: Optional[dict] = None,
simplify_tolerance: Optional[float] = None,
to_domain: Optional[str] = None,
to_level: Optional[str] = None,
) -> xr.Dataset:
"""Compute the spatial mean using a variety of available methods.

Expand All @@ -391,18 +391,18 @@ def spatial_mean(
'interp_centroid' will find the region's centroid (if coordinates are not fed through kwargs), then perform a .interp() over the spatial dimensions of the Dataset.
The coordinate can also be directly fed to .interp() through the 'kwargs' argument below.
'xesmf' will make use of xESMF's SpatialAverager. This will typically be more precise, especially for irregular regions, but can be much slower than other methods.
spatial_subset : bool
spatial_subset : bool, optional
If True, xscen.spatial.subset will be called prior to the other operations. This requires the 'region' argument.
If None, this will automatically become True if 'region' is provided and the subsetting method is either 'cos-lat' or 'mean'.
region : dict or str
region : dict or str, optional
Description of the region and the subsetting method (required fields listed in the Notes).
If method=='interp_centroid', this is used to find the region's centroid.
If method=='xesmf', the bounding box or shapefile is given to SpatialAverager.
Can also be "global", for global averages. This is simply a shortcut for `{'name': 'global', 'method': 'bbox', 'lon_bnds' [-180, 180], 'lat_bnds': [-90, 90]}`.
kwargs : dict
kwargs : dict, optional
Arguments to send to either mean(), interp() or SpatialAverager().
For SpatialAverager, one can give `skipna` or `out_chunks` here, to be passed to the averager call itself.
simplify_tolerance : float
simplify_tolerance : float, optional
Precision (in degree) used to simplify a shapefile before sending it to SpatialAverager().
The simpler the polygons, the faster the averaging, but it will lose some precision.
to_domain : str, optional
Expand Down Expand Up @@ -696,14 +696,18 @@ def spatial_mean(
def produce_horizon(
ds: xr.Dataset,
indicators: Union[
str, Path, Sequence[Indicator], Sequence[tuple[str, Indicator]], ModuleType
str,
os.PathLike,
RondeauG marked this conversation as resolved.
Show resolved Hide resolved
Sequence[Indicator],
Sequence[tuple[str, Indicator]],
ModuleType,
],
*,
periods: list = None,
warminglevels: dict = None,
to_level: str = "horizons",
period: list = None,
):
periods: Optional[Union[list[str], list[list[str]]]] = None,
warminglevels: Optional[dict] = None,
to_level: Optional[str] = "horizons",
period: Optional[list] = None,
) -> xr.Dataset:
"""Compute indicators, then the climatological mean, and finally unstack dates in order to have a single dataset with all indicators of different frequencies.

Once this is done, the function drops 'time' in favor of 'horizon'.
Expand All @@ -714,16 +718,16 @@ def produce_horizon(
----------
ds: xr.Dataset
Input dataset with a time dimension.
indicators: Union[str, Path, Sequence[Indicator], Sequence[Tuple[str, Indicator]]]
indicators: Union[str, os.PathLike, Sequence[Indicator], Sequence[Tuple[str, Indicator]], ModuleType]
Indicators to compute. It will be passed to the `indicators` argument of `xs.compute_indicators`.
periods: list
periods: list of str or list of lists of str, optional
Either [start, end] or list of [start_year, end_year] for the period(s) to be evaluated.
If both periods and warminglevels are None, the full time series will be used.
warminglevels: dict
warminglevels: dict, optional
Dictionary of arguments to pass to `py:func:xscen.subset_warming_level`.
If 'wl' is a list, the function will be called for each value and produce multiple horizons.
If both periods and warminglevels are None, the full time series will be used.
to_level:
to_level: str, optional
The processing level to assign to the output.
If there is only one horizon, you can use "{wl}", "{period0}" and "{period1}" in the string to dynamically include
that information in the processing level.
Expand Down
48 changes: 24 additions & 24 deletions xscen/biasadjust.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# noqa: D100
"""Functions to train and adjust a dataset using a bias-adjustment algorithm."""
import logging
import warnings
from copy import deepcopy
from typing import Optional, Union

Expand All @@ -14,9 +13,6 @@
from .config import parse_config
from .utils import minimum_calendar, standardize_periods

# TODO: Change all paths to PosixPath objects, including in the catalog?
# TODO: Compute sometimes fails randomly (in debug, pretty much always). Also (detrend?) fails with pr. Investigate why.

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -60,12 +56,12 @@ def _add_preprocessing_attr(scen, train_kwargs):
def train(
dref: xr.Dataset,
dhist: xr.Dataset,
var: list,
period: list,
var: Union[str, list[str]],
period: list[str],
*,
method: str = "DetrendedQuantileMapping",
group: Union[sdba.Grouper, str, dict] = {"group": "time.dayofyear", "window": 31},
xclim_train_args: dict = None,
group: Optional[Union[sdba.Grouper, str, dict]] = None,
xclim_train_args: Optional[dict] = None,
maximal_calendar: str = "noleap",
adapt_freq: Optional[dict] = None,
jitter_under: Optional[dict] = None,
Expand All @@ -81,14 +77,15 @@ def train(
The target timeseries, on the reference period.
dhist : xr.Dataset
The timeseries to adjust, on the reference period.
var : str
Variable on which to do the adjustment
period : list
var : str or list of str
Variable on which to do the adjustment. Currently only supports one variable.
period : list of str
[start, end] of the reference period
method : str
Name of the `sdba.TrainAdjust` method of xclim.
group : str or sdba.Grouper
Grouping information
group : str or sdba.Grouper or dict, optional
Grouping information. If a string, it is interpreted as a grouper on the time dimension. If a dict, it is passed to `sdba.Grouper.from_kwargs`.
Defaults to {"group": "time.dayofyear", "window": 31}.
xclim_train_args : dict
Dict of arguments to pass to the `.train` of the adjustment object.
maximal_calendar: str
Expand All @@ -101,7 +98,7 @@ def train(
jitter_over: dict, optional
If given, a dictionary of args to pass to `jitter_over_thresh`.
align_on: str, optional
`align_on` argument for the fonction `xclim.core.calendar.convert_calendar`.
`align_on` argument for the function `xclim.core.calendar.convert_calendar`.

Returns
-------
Expand All @@ -114,6 +111,8 @@ def train(

"""
# TODO: To be adequately fixed later when we add multivariate
if isinstance(var, str):
var = [var]
if len(var) != 1:
raise ValueError(
"biasadjust currently does not support entries with multiple variables."
Expand All @@ -122,6 +121,7 @@ def train(
ref = dref[var[0]]
hist = dhist[var[0]]

group = group or {"group": "time.dayofyear", "window": 31}
xclim_train_args = xclim_train_args or {}
if method == "DetrendedQuantileMapping":
xclim_train_args.setdefault("nquantiles", 15)
Expand Down Expand Up @@ -189,15 +189,15 @@ def train(
def adjust(
dtrain: xr.Dataset,
dsim: xr.Dataset,
periods: list,
xclim_adjust_args: dict,
periods: Union[list[str], list[list[str]]],
*,
xclim_adjust_args: Optional[dict] = None,
to_level: str = "biasadjusted",
bias_adjust_institution: str = None,
bias_adjust_project: str = None,
bias_adjust_institution: Optional[str] = None,
bias_adjust_project: Optional[str] = None,
moving_yearly_window: Optional[dict] = None,
align_on: Optional[str] = "year",
):
) -> xr.Dataset:
"""
Adjust a simulation.

Expand All @@ -207,11 +207,11 @@ def adjust(
A trained algorithm's dataset, as returned by `train`.
dsim : xr.Dataset
Simulated timeseries, projected period.
periods : list
periods : list of str or list of lists of str
Either [start, end] or list of [start, end] of the simulation periods to be adjusted (one at a time).
xclim_adjust_args : dict
xclim_adjust_args : dict, optional
Dict of arguments to pass to the `.adjust` of the adjustment object.
to_level : str, optional
to_level : str
The processing level to assign to the output.
Defaults to 'biasadjusted'
bias_adjust_institution : str, optional
Expand Down Expand Up @@ -240,6 +240,7 @@ def adjust(
# TODO: To be adequately fixed later

xclim_adjust_args = deepcopy(xclim_adjust_args)
xclim_adjust_args = xclim_adjust_args or {}

if moving_yearly_window:
dsim = construct_moving_yearly_window(dsim, **moving_yearly_window)
Expand Down Expand Up @@ -267,7 +268,6 @@ def adjust(
if simcal != mincal:
sim = convert_calendar(sim, mincal, align_on=align_on)

xclim_adjust_args = xclim_adjust_args or {}
# do the adjustment for all the simulation_period lists
periods = standardize_periods(periods)
slices = []
Expand Down
Loading