From b41be28272c180c8812ff3355aef5c995950c82d Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Fri, 20 Sep 2024 18:29:44 +0200 Subject: [PATCH] Feat/metrics quantiles (#2530) --- CHANGELOG.md | 6 +- darts/ad/anomaly_model/forecasting_am.py | 5 + darts/metrics/__init__.py | 12 +- darts/metrics/metrics.py | 818 ++++++++++++++---- darts/models/forecasting/forecasting_model.py | 93 +- darts/models/forecasting/regression_model.py | 16 +- darts/tests/metrics/test_metrics.py | 600 ++++++++++++- .../models/forecasting/test_backtesting.py | 174 ++++ .../models/forecasting/test_residuals.py | 265 ++++++ darts/tests/utils/test_utils.py | 46 +- darts/timeseries.py | 16 +- darts/utils/likelihood_models.py | 15 +- darts/utils/utils.py | 62 +- 13 files changed, 1929 insertions(+), 199 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fa74e021d..66bf84b146 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,10 +11,14 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improved** -- Added `IQRDetector`, that allows to detect anomalies using the interquartile range algorithm. [#2441] by [Igor Urbanik](https://github.com/u8-igor). +- Added `IQRDetector`, that allows to detect anomalies using the interquartile range algorithm. [#2441](https://github.com/unit8co/darts/issues/2441) by [Igor Urbanik](https://github.com/u8-igor). - Added hyperparameters controlling the hidden layer sizes for the feature encoders in `TiDEModel`. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli). - Added hyperparameter `activation` to `BlockRNNModel` to specify the activation function in case of a multi-layer output network. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli). - Added support for broadcasting to TimeSeries on component and sample level. [#2476](https://https://github.com/unit8co/darts/pull/2476) by [Joel L.](https://github.com/Joelius300). +- Added support for computing metrics, backtest, and residuals on one or multiple quantiles `q`, either from probabilistic predictions or predicted quantiles. [#2530](https://github.com/unit8co/darts/issues/2530) by [Dennis Bader](https://github.com/dennisbader). +- Added quantile interval metrics: `miw` (Mean Interval Width, time aggregated) and `iw` (Interval Width, per time step / non-aggregated) which compute the width of quantile intervals `q_intervals` (expected to be a tuple or sequence of tuples with (lower quantile, upper quantile). [#2530](https://github.com/unit8co/darts/issues/2530) by [Dennis Bader](https://github.com/dennisbader). +- Added property `TimeSeries.shape` to get the shape of the time series. [#2530](https://github.com/unit8co/darts/issues/2530) by [Dennis Bader](https://github.com/dennisbader). +- Added support for parameters `enable_optimization` and `predict_likelihood_parameters` to the forecasting models' `backtest()` and `residuals()` methods. [#2530](https://github.com/unit8co/darts/issues/2530) by [Dennis Bader](https://github.com/dennisbader). - Helper function `darts.utils.utils.generate_index()` now accepts datetime strings as `start` and `end` parameters to generate the pandas DatetimeIndex. [#2522](https://github.com/unit8co/darts/pull/2522) by [Dennis Bader](https://github.com/dennisbader). - Various improvements in the documentation: - Made README's forecasting model support table more colorblind-friendly. [#2433](https://github.com/unit8co/darts/pull/2433) diff --git a/darts/ad/anomaly_model/forecasting_am.py b/darts/ad/anomaly_model/forecasting_am.py index a70c5b21bb..fd3eb9a33a 100644 --- a/darts/ad/anomaly_model/forecasting_am.py +++ b/darts/ad/anomaly_model/forecasting_am.py @@ -132,6 +132,7 @@ def fit( `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. model_fit_kwargs Parameters to be passed on to the forecast model `fit()` method. @@ -212,6 +213,7 @@ def score( `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. return_model_prediction Whether to return the forecasting model prediction along with the anomaly scores. @@ -299,6 +301,7 @@ def predict_series( `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. Returns ------- @@ -394,6 +397,7 @@ def eval_metric( `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. metric The name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). @@ -499,6 +503,7 @@ def show_anomalies( `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. anomalies The ground truth of the anomalies (1 if it is an anomaly and 0 if not). names_of_scorers diff --git a/darts/metrics/__init__.py b/darts/metrics/__init__.py index 7a8cb445da..2d0b5aaa2e 100644 --- a/darts/metrics/__init__.py +++ b/darts/metrics/__init__.py @@ -2,7 +2,10 @@ Metrics ------- -For deterministic forecasts (point predictions with `num_samples == 1`): +For deterministic forecasts (point predictions with `num_samples == 1`), probabilistic forecasts (`num_samples > 1`), +and quantile forecasts. For probablistic and quantile forecasts, use parameter `q` to define the quantile(s) to +compute the deterministic metrics on: + - Aggregated over time: Absolute metrics: - :func:`MERR `: Mean Error @@ -42,8 +45,10 @@ - Aggregated over time: - :func:`MQL `: Mean Quantile Loss - :func:`QR `: Quantile Risk + - :func:`MIW `: Mean Interval Width - Per time step: - :func:`QL `: Quantile Loss + - :func:`IW `: Interval Width For Dynamic Time Warping (DTW) (aggregated over time): - :func:`DTW `: Dynamic Time Warping Metric @@ -57,11 +62,13 @@ coefficient_of_variation, dtw_metric, err, + iw, mae, mape, marre, mase, merr, + miw, mql, mse, msse, @@ -90,6 +97,7 @@ se, sle, sse, + iw, } __all__ = [ @@ -120,4 +128,6 @@ "sle", "smape", "sse", + "iw", + "miw", ] diff --git a/darts/metrics/metrics.py b/darts/metrics/metrics.py index 40a7e6e15e..1c667a7eb3 100644 --- a/darts/metrics/metrics.py +++ b/darts/metrics/metrics.py @@ -8,19 +8,27 @@ import inspect from functools import wraps from inspect import signature -from typing import Callable, List, Optional, Sequence, Tuple, Union +from typing import Any, Callable, List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd from darts import TimeSeries from darts.dataprocessing import dtw from darts.logging import get_logger, raise_log -from darts.utils import _build_tqdm_iterator, _parallel_apply, n_steps_between from darts.utils.ts_utils import SeriesType, get_series_seq_type, series2seq +from darts.utils.utils import ( + _build_tqdm_iterator, + _parallel_apply, + likelihood_component_names, + n_steps_between, + quantile_names, +) logger = get_logger(__name__) TIME_AX = 0 COMP_AX = 1 +SMPL_AX = 2 # Note: for new metrics added to this module to be able to leverage the two decorators, it is required both having # the `actual_series` and `pred_series` parameters, and not having other ``Sequence`` as args (since these decorators @@ -33,6 +41,51 @@ ] +def interval_support(func) -> Callable[..., METRIC_OUTPUT_TYPE]: + """ + This decorator adds support for quantile interval metrics with sanity checks, processing, and extraction of + quantiles from the intervals. + """ + + @wraps(func) + def wrapper_interval_support(*args, **kwargs): + q = kwargs.get("q") + if q is not None: + raise_log( + ValueError( + "`q` is not supported for quantile interval metrics; use `q_interval` instead." + ) + ) + q_interval = kwargs.get("q_interval") + if q_interval is None: + raise_log( + ValueError("Quantile interval metrics require setting `q_interval`.") + ) + if isinstance(q_interval, tuple): + q_interval = [q_interval] + q_interval = np.array(q_interval) + if not q_interval.ndim == 2 or q_interval.shape[1] != 2: + raise_log( + ValueError( + "`q_interval` must be a tuple (float, float) or a sequence of tuples (float, float)." + ), + logger=logger, + ) + if not np.all(q_interval[:, 1] - q_interval[:, 0] > 0): + raise_log( + ValueError( + "all intervals in `q_interval` must be tuples of (lower q, upper q) with `lower q > upper q`. " + f"Received `q_interval={q_interval}`" + ), + logger=logger, + ) + kwargs["q_interval"] = q_interval + kwargs["q"] = np.sort(np.unique(q_interval)) + return func(*args, **kwargs) + + return wrapper_interval_support + + def multi_ts_support(func) -> Callable[..., METRIC_OUTPUT_TYPE]: """ This decorator further adapts the metrics that took as input two (or three for scaled metrics with `insample`) @@ -60,12 +113,7 @@ def wrapper_multi_ts_support(*args, **kwargs): params = signature(func).parameters n_jobs = kwargs.pop("n_jobs", params["n_jobs"].default) - if not isinstance(n_jobs, int): - raise_log(ValueError("n_jobs must be an integer"), logger=logger) - verbose = kwargs.pop("verbose", params["verbose"].default) - if not isinstance(verbose, bool): - raise_log(ValueError("verbose must be a bool"), logger=logger) # sanity check reduction functions _ = _get_reduction( @@ -131,6 +179,38 @@ def wrapper_multi_ts_support(*args, **kwargs): num_series_in_args += int("insample" not in kwargs) kwargs.pop("insample", 0) + # handle `q` (quantile) parameter for probabilistic (or quantile) forecasts + if "q" in params: + # convert `q` to tuple of (quantile values, optional quantile component names) + q = kwargs.get("q", params["q"].default) + q_comp_names = None + if q is None: + kwargs["q"] = None + else: + if isinstance(q, tuple): + q, q_comp_names = q + if isinstance(q, float): + q = np.array([q]) + else: + q = np.array(q) + + if not np.all(q[1:] - q[:-1] > 0.0): + raise_log( + ValueError( + "`q` must be of type `float`, or a sequence of increasing order with unique values only. " + f"Received `q={q}`." + ), + logger=logger, + ) + if not np.all(q >= 0.0) & np.all(q <= 1.0): + raise_log( + ValueError( + f"All `q` values must be in the range `(>=0,<=1)`. Received `q={q}`." + ), + logger=logger, + ) + kwargs["q"] = (q, q_comp_names) + iterator = _build_tqdm_iterator( iterable=zip(*input_series), verbose=verbose, @@ -187,20 +267,61 @@ def wrapper_multivariate_support(*args, **kwargs) -> METRIC_OUTPUT_TYPE: pred_series = args[1] num_series_in_args = 2 - if actual_series.width != pred_series.width: - raise_log( - ValueError( - f"Mismatch between number of components in `actual_series` " - f"(n={actual_series.width}) and `pred_series` (n={pred_series.width}." - ), - logger=logger, - ) + q, q_comp_names = kwargs.get("q"), None + if q is None: + # without quantiles, the number of components must match + if actual_series.n_components != pred_series.n_components: + raise_log( + ValueError( + f"Mismatch between number of components in `actual_series` " + f"(n={actual_series.width}) and `pred_series` (n={pred_series.width})." + ), + logger=logger, + ) + # compute median for stochastic predictions + if pred_series.is_stochastic: + q = np.array([0.5]) + else: + # `q` is required to be a tuple (handled by `multi_ts_support` wrapper) + if not isinstance(q, tuple) or not len(q) == 2: + raise_log( + ValueError( + "`q` must be of tuple of `(np.ndarray, Optional[pd.Index])` " + "where the (quantile values, optioanl quantile component names). " + f"Received `q={q}`." + ), + logger=logger, + ) + q, q_comp_names = q + if not pred_series.is_stochastic: + # quantile component names are required if the predictions are not stochastic (as for stocahstic + # predictions, the quantiles can be retrieved from the sample dimension for each component) + if q_comp_names is None: + q_comp_names = pd.Index( + likelihood_component_names( + components=actual_series.components, + parameter_names=quantile_names(q=q), + ) + ) + if not q_comp_names.isin(pred_series.components).all(): + raise_log( + ValueError( + f"Computing a metric with quantile(s) `q={q}` is only supported for probabilistic " + f"`pred_series` (num samples > 1) or `pred_series` containing the predicted " + f"quantiles as columns / components. Either pass a probabilistic `pred_series` or " + f"a series containing the expected quantile components: {q_comp_names.tolist()} " + ), + logger=logger, + ) + + if "q" in params: + kwargs["q"] = (q, q_comp_names) # handle `insample` parameters for scaled metrics input_series = (actual_series, pred_series) if "insample" in params: insample = args[2] - if actual_series.width != insample.width: + if actual_series.n_components != insample.n_components: raise_log( ValueError( f"Mismatch between number of components in `actual_series` " @@ -212,15 +333,18 @@ def wrapper_multivariate_support(*args, **kwargs) -> METRIC_OUTPUT_TYPE: num_series_in_args += 1 vals = func(*input_series, *args[num_series_in_args:], **kwargs) - if not 1 <= len(vals.shape) <= 2: + # bring vals to shape (n_time, n_comp, n_quantile) + if not 2 <= len(vals.shape) <= 3: raise_log( ValueError( - "Metric output must have 1 dimension for aggregated metrics (e.g. `mae()`, ...), " - "or 2 dimension for time dependent metrics (e.g. `ae()`, ...)" + "Metric output must have 2 dimensions (n components, n quantiles) " + "for aggregated metrics (e.g. `mae()`, ...), " + "or 3 dimension (n times, n components, n quantiles) " + "for time dependent metrics (e.g. `ae()`, ...)" ), logger=logger, ) - elif len(vals.shape) == 1: + if len(vals.shape) == 2: vals = np.expand_dims(vals, TIME_AX) time_reduction = _get_reduction( @@ -231,6 +355,7 @@ def wrapper_multivariate_support(*args, **kwargs) -> METRIC_OUTPUT_TYPE: sanity_check=False, ) if time_reduction is not None: + # -> (1, n_comp, n_quantile) vals = np.expand_dims(time_reduction(vals, axis=TIME_AX), axis=TIME_AX) component_reduction = _get_reduction( @@ -241,40 +366,71 @@ def wrapper_multivariate_support(*args, **kwargs) -> METRIC_OUTPUT_TYPE: sanity_check=False, ) if component_reduction is not None: - vals = np.expand_dims(component_reduction(vals, axis=COMP_AX), axis=COMP_AX) + # -> (*, n_quantile) + vals = component_reduction(vals, axis=COMP_AX) + else: + # -> (*, n_comp * n_quantile), with order [c0_q0, c0_q1, ... c1_q0, c1_q1, ...] + vals = vals.reshape(vals.shape[0], -1) return vals return wrapper_multivariate_support def _get_values( - vals: np.ndarray, stochastic_quantile: Optional[float] = 0.5 + vals: np.ndarray, + vals_components: pd.Index, + actual_components: pd.Index, + q: Optional[Tuple[Sequence[float], Union[Optional[pd.Index]]]] = None, ) -> np.ndarray: """ - Returns a deterministic or probabilistic numpy array from the values of a time series. - For stochastic input values, return either all sample values with (stochastic_quantile=None) or the quantile sample - value with (stochastic_quantile {>=0,<=1}) + Returns a deterministic or probabilistic numpy array from the values of a time series of shape + (times, components, samples / quantiles). + To extract quantile (sample) values from quantile or stachastic `vals`, use `q`. + + Parameters + ---------- + vals + A numpy array with the values of a TimeSeries (actual values or predictions). + vals_components + The components of the `vals` TimeSeries. + actual_components + The components of the actual TimeSeries. + q + Optionally, for stochastic or quantile series/values, return deterministic quantile values. + If not `None`, must a tuple with (quantile values, + `None` if `pred_series` is stochastic else the quantile component names). """ - if vals.shape[2] == 1: # deterministic - out = vals[:, :, 0] - else: # stochastic - if stochastic_quantile is None: - out = vals - else: - out = np.quantile(vals, stochastic_quantile, axis=2) - return out + # return values as is (times, components, samples) + if q is None: + return vals + + q, q_names = q + if vals.shape[SMPL_AX] == 1: # deterministic (or quantile components) input + if q_names is not None: + # `q_names` are the component names of the predicted quantile parameters + # we extract the relevant quantile components with shape (times, components * quantiles) + vals = vals[:, vals_components.get_indexer(q_names)] + # rearrange into (times, components, quantiles) + vals = vals.reshape((len(vals), len(actual_components), -1)) + return vals + + # probabilistic input + # compute multiple quantiles for all times and components; with shape: (quantiles, times, components) + out = np.quantile(vals, q, axis=SMPL_AX) + # rearrange into (times, components, quantiles) + return out.transpose((1, 2, 0)) def _get_values_or_raise( series_a: TimeSeries, series_b: TimeSeries, intersect: bool, - stochastic_quantile: Optional[float] = 0.5, + q: Optional[Tuple[Sequence[float], Union[Optional[pd.Index]]]] = None, remove_nan_union: bool = False, is_insample: bool = False, ) -> Tuple[np.ndarray, np.ndarray]: """Returns the processed numpy values of two time series. Processing can be customized with arguments - `intersect, stochastic_quantile, remove_nan_union`. + `intersect, q, remove_nan_union`. Parameters ---------- @@ -285,9 +441,10 @@ def _get_values_or_raise( A deterministic or stochastic ``TimeSeries`` instance (the predictions `pred_series`). intersect A boolean for whether to only consider the time intersection between `series_a` and `series_b` - stochastic_quantile - Optionally, for stochastic predicted series, return either all sample values with (`stochastic_quantile=None`) - or any deterministic quantile sample values by setting `stochastic_quantile=quantile` {>=0,<=1}. + q + Optionally, for predicted stochastic or quantile series, return deterministic quantile values. + If not `None`, must a tuple with (quantile values, + `None` if `pred_series` is stochastic else the quantile component names). remove_nan_union By setting `remove_non_union` to True, sets all values from `series_a` and `series_b` to `np.nan` at indices where any of the two series contain a NaN value. Only effective when `is_insample=False`. @@ -299,16 +456,6 @@ def _get_values_or_raise( ValueError If `is_insample=False` and the two time series do not have at least a partially overlapping time index. """ - - if not series_a.width == series_b.width: - raise_log( - ValueError("The two time series must have the same number of components"), - logger=logger, - ) - - if not isinstance(intersect, bool): - raise_log(ValueError("The intersect parameter must be a bool"), logger=logger) - make_copy = False if not is_insample: # get the time intersection and values of the two series (corresponds to `actual_series` and `pred_series` @@ -319,15 +466,12 @@ def _get_values_or_raise( vals_a_common = series_a.slice_intersect_values(series_b, copy=make_copy) vals_b_common = series_b.slice_intersect_values(series_a, copy=make_copy) - if not len(vals_a_common) == len(vals_b_common): - raise_log( - ValueError( - "The two time series must have at least a partially overlapping time index." - ), - logger=logger, - ) - - vals_b_det = _get_values(vals_b_common, stochastic_quantile=stochastic_quantile) + vals_b = _get_values( + vals=vals_b_common, + vals_components=series_b.components, + actual_components=series_a.components, + q=q, + ) else: # for `insample` series we extract only values up until before start of `pred_series` # find how many steps `insample` overlaps into `series_b` @@ -347,36 +491,67 @@ def _get_values_or_raise( ) end = end or None vals_a_common = series_a.all_values(copy=make_copy)[:end] - vals_b_det = None - vals_a_det = _get_values(vals_a_common, stochastic_quantile=stochastic_quantile) + vals_b = None + vals_a = _get_values( + vals=vals_a_common, + vals_components=series_a.components, + actual_components=series_a.components, + q=([0.5], None), + ) if not remove_nan_union or is_insample: - return vals_a_det, vals_b_det + return vals_a, vals_b - b_is_deterministic = bool(len(vals_b_det.shape) == 2) - if b_is_deterministic: - isnan_mask = np.logical_or(np.isnan(vals_a_det), np.isnan(vals_b_det)) - isnan_mask_pred = isnan_mask - else: - isnan_mask = np.logical_or( - np.isnan(vals_a_det), np.isnan(vals_b_det).any(axis=2) - ) - isnan_mask_pred = np.repeat( - np.expand_dims(isnan_mask, axis=-1), vals_b_det.shape[2], axis=2 - ) - return np.where(isnan_mask, np.nan, vals_a_det), np.where( - isnan_mask_pred, np.nan, vals_b_det + isnan_mask = np.expand_dims( + np.logical_or(np.isnan(vals_a), np.isnan(vals_b)).any(axis=SMPL_AX), axis=-1 + ) + isnan_mask_pred = np.repeat(isnan_mask, vals_b.shape[SMPL_AX], axis=SMPL_AX) + return np.where(isnan_mask, np.nan, vals_a), np.where( + isnan_mask_pred, np.nan, vals_b ) +def _get_quantile_intervals( + vals: np.ndarray, + q: Tuple[Sequence[float], Any], + q_interval: np.ndarray = None, +) -> Tuple[np.ndarray, np.ndarray]: + """Returns the lower and upper bound values from `vals` for all quantile intervals in `q_interval`. + + Parameters + ---------- + vals + A numpy array with predicted quantile values of shape (n times, n components, n quantiles). + q + A tuple with (quantile values, any). + q_interval + A numpy array with the lower and upper quantile interval bound of shape (n intervals, 2). + """ + q, _ = q + # find index of every `q_interval` value in `q`; we have guarantees from support wrappers: + # - `q` has increasing order + # - `vals` has same order as `q` in dim 3 (quantile dim) + # - `q_interval` holds (lower q, upper q) in that order + q_idx = np.searchsorted(q, q_interval.flatten()).reshape(q_interval.shape) + return vals[:, :, q_idx[:, 0]], vals[:, :, q_idx[:, 1]] + + def _get_wrapped_metric( - func: Callable[..., METRIC_OUTPUT_TYPE], + func: Callable[..., METRIC_OUTPUT_TYPE], n_wrappers: int = 2 ) -> Callable[..., METRIC_OUTPUT_TYPE]: """Returns the inner metric function `func` which bypasses the decorators `multi_ts_support` and `multivariate_support`. It significantly decreases process time compared to calling `func` directly. Only use this to compute a pre-defined metric within the scope of another metric. """ - return func.__wrapped__.__wrapped__ + if not 2 <= n_wrappers <= 3: + raise_log( + NotImplementedError("Only 2-3 wrappers are currently supported"), + logger=logger, + ) + if n_wrappers == 2: + return func.__wrapped__.__wrapped__ + else: + return func.__wrapped__.__wrapped__.__wrapped__ def _get_reduction( @@ -471,6 +646,7 @@ def err( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -484,8 +660,9 @@ def err( .. math:: y_t - \\hat{y}_t - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -496,6 +673,8 @@ def err( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -543,7 +722,11 @@ def err( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=False + actual_series, + pred_series, + intersect, + remove_nan_union=False, + q=q, ) return y_true - y_pred @@ -555,6 +738,7 @@ def merr( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -567,8 +751,9 @@ def merr( .. math:: \\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)} - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -579,6 +764,8 @@ def merr( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -620,6 +807,7 @@ def merr( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -632,6 +820,7 @@ def ae( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -645,8 +834,9 @@ def ae( .. math:: |y_t - \\hat{y}_t| - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -657,6 +847,8 @@ def ae( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -704,7 +896,11 @@ def ae( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=False + actual_series, + pred_series, + intersect, + remove_nan_union=False, + q=q, ) return np.abs(y_true - y_pred) @@ -716,6 +912,7 @@ def mae( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -728,8 +925,9 @@ def mae( .. math:: \\frac{1}{T}\\sum_{t=1}^T{|y_t - \\hat{y}_t|} - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -740,6 +938,8 @@ def mae( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -781,6 +981,7 @@ def mae( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -795,6 +996,7 @@ def ase( m: int = 1, intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -816,8 +1018,9 @@ def ase( .. math:: E_m = MAE(y_{m:t_p}, y_{0:t_p - m}). - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -835,6 +1038,8 @@ def ase( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -895,6 +1100,7 @@ def ase( actual_series, pred_series, intersect, + q=q, ) return errors / error_scale @@ -908,6 +1114,7 @@ def mase( m: int = 1, intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -928,8 +1135,9 @@ def mase( .. math:: E_m = MAE(y_{m:t_p}, y_{0:t_p - m}). - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -947,6 +1155,8 @@ def mase( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1000,6 +1210,7 @@ def mase( insample, m=m, intersect=intersect, + q=q, ), axis=TIME_AX, ) @@ -1012,6 +1223,7 @@ def se( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -1025,8 +1237,9 @@ def se( .. math:: (y_t - \\hat{y}_t)^2. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1037,6 +1250,8 @@ def se( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -1084,7 +1299,11 @@ def se( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=False + actual_series, + pred_series, + intersect, + remove_nan_union=False, + q=q, ) return (y_true - y_pred) ** 2 @@ -1096,6 +1315,7 @@ def mse( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1108,8 +1328,9 @@ def mse( .. math:: \\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)^2}. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1120,6 +1341,8 @@ def mse( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1161,6 +1384,7 @@ def mse( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -1175,6 +1399,7 @@ def sse( m: int = 1, intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -1196,8 +1421,9 @@ def sse( .. math:: E_m = MSE(y_{m:t_p}, y_{0:t_p - m}). - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1215,6 +1441,8 @@ def sse( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -1275,6 +1503,7 @@ def sse( actual_series, pred_series, intersect, + q=q, ) return errors / error_scale @@ -1288,6 +1517,7 @@ def msse( m: int = 1, intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1308,8 +1538,9 @@ def msse( .. math:: E_m = MSE(y_{m:t_p}, y_{0:t_p - m}). - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1327,6 +1558,8 @@ def msse( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1380,6 +1613,7 @@ def msse( insample, m=m, intersect=intersect, + q=q, ), axis=TIME_AX, ) @@ -1392,6 +1626,7 @@ def rmse( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1404,8 +1639,9 @@ def rmse( .. math:: \\sqrt{\\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)^2}} - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1416,6 +1652,8 @@ def rmse( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1457,6 +1695,7 @@ def rmse( actual_series, pred_series, intersect, + q=q, ) ) @@ -1470,6 +1709,7 @@ def rmsse( m: int = 1, intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1490,8 +1730,9 @@ def rmsse( .. math:: E_m = RMSE(y_{m:t_p}, y_{0:t_p - m}). - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1509,6 +1750,8 @@ def rmsse( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1560,6 +1803,7 @@ def rmsse( actual_series, pred_series, intersect, + q=q, ) return errors / error_scale @@ -1571,6 +1815,7 @@ def sle( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -1586,8 +1831,9 @@ def sle( using the natural logarithm. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1598,6 +1844,8 @@ def sle( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -1645,7 +1893,11 @@ def sle( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=False + actual_series, + pred_series, + intersect, + remove_nan_union=False, + q=q, ) y_true, y_pred = np.log(y_true + 1), np.log(y_pred + 1) return (y_true - y_pred) ** 2 @@ -1658,6 +1910,7 @@ def rmsle( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1672,8 +1925,9 @@ def rmsle( using the natural logarithm. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1684,6 +1938,8 @@ def rmsle( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1726,6 +1982,7 @@ def rmsle( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -1739,6 +1996,7 @@ def ape( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -1755,8 +2013,9 @@ def ape( Note that it will raise a `ValueError` if :math:`y_t = 0` for some :math:`t`. Consider using the Absolute Scaled Error (:func:`~darts.metrics.metrics.ase`) in these cases. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1767,6 +2026,8 @@ def ape( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -1819,7 +2080,11 @@ def ape( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=False + actual_series, + pred_series, + intersect, + remove_nan_union=False, + q=q, ) if not (y_true != 0).all(): raise_log( @@ -1838,6 +2103,7 @@ def mape( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -1853,8 +2119,9 @@ def mape( Note that it will raise a `ValueError` if :math:`y_t = 0` for some :math:`t`. Consider using the Mean Absolute Scaled Error (:func:`~darts.metrics.metrics.mase`) in these cases. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1865,6 +2132,8 @@ def mape( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -1912,6 +2181,7 @@ def mape( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -1924,6 +2194,7 @@ def sape( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -1941,8 +2212,9 @@ def sape( Note that it will raise a `ValueError` if :math:`\\left| y_t \\right| + \\left| \\hat{y}_t \\right| = 0` for some :math:`t`. Consider using the Absolute Scaled Error (:func:`~darts.metrics.metrics.ase`) in these cases. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -1953,6 +2225,8 @@ def sape( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -2005,7 +2279,11 @@ def sape( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=True + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, ) if not np.logical_or(y_true != 0, y_pred != 0).all(): raise_log( @@ -2024,6 +2302,7 @@ def smape( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2042,8 +2321,9 @@ def smape( for some :math:`t`. Consider using the Mean Absolute Scaled Error (:func:`~darts.metrics.metrics.mase`) in these cases. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2054,6 +2334,8 @@ def smape( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2101,6 +2383,7 @@ def smape( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -2113,6 +2396,7 @@ def ope( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2126,8 +2410,9 @@ def ope( .. math:: 100 \\cdot \\left| \\frac{\\sum_{t=1}^{T}{y_t} - \\sum_{t=1}^{T}{\\hat{y}_t}}{\\sum_{t=1}^{T}{y_t}} \\right|. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2138,6 +2423,8 @@ def ope( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2181,7 +2468,11 @@ def ope( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=True + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, ) y_true_sum, y_pred_sum = ( np.nansum(y_true, axis=TIME_AX), @@ -2204,6 +2495,7 @@ def arre( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -2217,8 +2509,9 @@ def arre( .. math:: 100 \\cdot \\left| \\frac{y_t - \\hat{y}_t} {\\max_t{y_t} - \\min_t{y_t}} \\right| - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2229,6 +2522,8 @@ def arre( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a @@ -2281,7 +2576,11 @@ def arre( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=True + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, ) y_max, y_min = np.nanmax(y_true, axis=TIME_AX), np.nanmin(y_true, axis=TIME_AX) if not (y_max > y_min).all(): @@ -2303,6 +2602,7 @@ def marre( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2316,8 +2616,9 @@ def marre( .. math:: 100 \\cdot \\frac{1}{T} \\sum_{t=1}^{T} {\\left| \\frac{y_t - \\hat{y}_t} {\\max_t{y_t} - \\min_t{y_t}} \\right|} - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2328,6 +2629,8 @@ def marre( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2372,6 +2675,7 @@ def marre( actual_series, pred_series, intersect, + q=q, ), axis=TIME_AX, ) @@ -2384,6 +2688,7 @@ def r2_score( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2400,8 +2705,9 @@ def r2_score( This metric is not symmetric. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2412,6 +2718,8 @@ def r2_score( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2453,7 +2761,11 @@ def r2_score( .. [1] https://en.wikipedia.org/wiki/Coefficient_of_determination """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=True + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, ) ss_errors = np.nansum((y_true - y_pred) ** 2, axis=TIME_AX) y_hat = np.nanmean(y_true, axis=TIME_AX) @@ -2468,6 +2780,7 @@ def coefficient_of_variation( pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2483,8 +2796,9 @@ def coefficient_of_variation( where :math:`RMSE` is the Root Mean Squared Error (:func:`~darts.metrics.metrics.rmse`), and :math:`\\bar{y}` is the average of :math:`y` over all time steps. - If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples - for time step :math:`t`. + If :math:`\\hat{y}_t` are stochastic (contains several samples) or quantile predictions, use parameter `q` to + specify on which quantile(s) to compute the metric on. By default, it uses the median 0.5 quantile + (over all samples, or, if given, the quantile prediction itself). Parameters ---------- @@ -2495,6 +2809,8 @@ def coefficient_of_variation( intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2533,7 +2849,11 @@ def coefficient_of_variation( """ y_true, y_pred = _get_values_or_raise( - actual_series, pred_series, intersect, remove_nan_union=True + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, ) # not calling rmse as y_true and y_pred are np.ndarray return ( @@ -2629,9 +2949,9 @@ def dtw_metric( def qr( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], - q: float = 0.5, intersect: bool = True, *, + q: Union[float, List[float], Tuple[np.ndarray, pd.Index]] = 0.5, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2660,11 +2980,11 @@ def qr( The (sequence of) actual series. pred_series The (sequence of) predicted series. - q - The quantile (float [0, 1]) of interest for the risk evaluation. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + The quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2713,18 +3033,19 @@ def qr( actual_series, pred_series, intersect, - stochastic_quantile=None, + q=None, remove_nan_union=True, ) z_true = np.nansum(z_true, axis=TIME_AX) z_hat = np.nansum( z_hat, axis=TIME_AX ) # aggregate all individual sample realizations + # quantile loss + q, _ = q z_hat_rho = np.quantile( z_hat, q=q, axis=1 - ) # get the quantile from aggregated samples + ).T # get the quantile from aggregated samples - # quantile loss errors = z_true - z_hat_rho losses = 2 * np.maximum((q - 1) * errors, q * errors) return losses / z_true @@ -2735,9 +3056,9 @@ def qr( def ql( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], - q: float = 0.5, intersect: bool = True, *, + q: Union[float, List[float], Tuple[np.ndarray, pd.Index]] = 0.5, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, @@ -2747,7 +3068,8 @@ def ql( """Quantile Loss (QL). Also known as Pinball Loss. QL is a metric that quantifies the accuracy of a specific quantile :math:`q` from the - predicted value distribution of a stochastic/probabilistic `pred_series` containing N samples. + predicted deterministic quantiles or value distribution of a stochastic/probabilistic `pred_series` containing N + samples. QL computes the quantile of all sample values and the loss per time step. @@ -2756,7 +3078,7 @@ def ql( .. math:: 2 \\max((q - 1) (y_t - \\hat{y}_{t,q}), q (y_t - \\hat{y}_{t,q})), - where :math:`\\hat{y}_{t,q}` is the quantile :math:`q` of all predicted sample values at time :math:`t`. + where :math:`\\hat{y}_{t,q}` is quantile value :math:`q` (of all predicted quantiles or samples) at time :math:`t`. The factor `2` makes the loss more interpretable, as for `q=0.5` the loss is identical to the Absolute Error (:func:`~darts.metrics.metrics.ae`). @@ -2766,11 +3088,11 @@ def ql( The (sequence of) actual series. pred_series The (sequence of) predicted series. - q - The quantile (float [0, 1]) of interest for the loss. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + The quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2816,22 +3138,14 @@ def ql( List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ - if not pred_series.is_stochastic: - raise_log( - ValueError( - "quantile/pinball loss (ql) should only be computed for " - "stochastic predicted TimeSeries." - ), - logger=logger, - ) - y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, - stochastic_quantile=q, + q=q, remove_nan_union=True, ) + q, _ = q errors = y_true - y_pred losses = 2.0 * np.maximum((q - 1) * errors, q * errors) return losses @@ -2842,9 +3156,9 @@ def ql( def mql( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], - q: float = 0.5, intersect: bool = True, *, + q: Union[float, List[float], Tuple[np.ndarray, pd.Index]] = 0.5, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, @@ -2853,7 +3167,8 @@ def mql( """Mean Quantile Loss (MQL). Also known as Pinball Loss. QL is a metric that quantifies the accuracy of a specific quantile :math:`q` from the - predicted value distribution of a stochastic/probabilistic `pred_series` containing N samples. + predicted deterministic quantiles or value distribution of a stochastic/probabilistic `pred_series` containing N + samples. MQL first computes the quantile of all sample values and the loss per time step, and then takes the mean over the time axis. @@ -2863,7 +3178,7 @@ def mql( .. math:: 2 \\frac{1}{T}\\sum_{t=1}^T{\\max((q - 1) (y_t - \\hat{y}_{t,q}), q (y_t - \\hat{y}_{t,q}))}, - where :math:`\\hat{y}_{t,q}` is the quantile :math:`q` of all predicted sample values at time :math:`t`. + where :math:`\\hat{y}_{t,q}` is quantile value :math:`q` (of all predicted quantiles or samples) at time :math:`t`. The factor `2` makes the loss more interpretable, as for `q=0.5` the loss is identical to the Mean Absolute Error (:func:`~darts.metrics.metrics.mae`). @@ -2873,11 +3188,11 @@ def mql( The (sequence of) actual series. pred_series The (sequence of) predicted series. - q - The quantile (float [0, 1]) of interest for the loss. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). + q + The quantile (float [0, 1]) or list of quantiles of interest to compute the metric on. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a @@ -2923,3 +3238,192 @@ def mql( ), axis=TIME_AX, ) + + +@interval_support +@multi_ts_support +@multivariate_support +def iw( + actual_series: Union[TimeSeries, Sequence[TimeSeries]], + pred_series: Union[TimeSeries, Sequence[TimeSeries]], + intersect: bool = True, + *, + q_interval: Union[Tuple[float, float], Sequence[Tuple[float, float]]] = None, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, + time_reduction: Optional[Callable[..., np.ndarray]] = None, + component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, + series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, + n_jobs: int = 1, + verbose: bool = False, +) -> METRIC_OUTPUT_TYPE: + """Interval Width (IL). + + IL gives the width of predicted quantile intervals. + + For the true series :math:`y` and predicted stochastic or quantile series :math:`\\hat{y}` of length :math:`T`, + it is computed per component/column, quantile interval, and time step + :math:`t` as: + + .. math:: \\hat{y}_{t,qh} - \\hat{y}_{t,ql} + + where :math:`\\hat{y}_{t,qh}` are the upper bound quantile values (of all predicted quantiles or samples) at time + :math:`t`, and :math:`\\hat{y}_{t,ql}` are the lower bound quantile values. + + Parameters + ---------- + actual_series + The (sequence of) actual series. + pred_series + The (sequence of) predicted series. + intersect + For time series that are overlapping in time without having the same time index, setting `True` + will consider the values only over their common time interval (intersection in time). + q_interval + The quantile interval(s) to compute the metric on. Must be a tuple (single interval) or sequence tuples + (multiple intervals) with elements (low quantile, high quantile). + q + Quantiles `q` not supported by this metric; use `q_interval` instead. + component_reduction + Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` + of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a + parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the + component axis. If `None`, will return a metric per component. + time_reduction + Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` + of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a + parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the + time axis. If `None`, will return a metric per time step. + series_reduction + Optionally, a function to aggregate the metrics over multiple series. It must reduce a `np.ndarray` + of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a + parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the + series axis. For example with `np.nanmean`, will return the average over all series metrics. If `None`, will + return a metric per component. + n_jobs + The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is + passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` + (sequential). Setting the parameter to `-1` means using all the available processors. + verbose + Optionally, whether to print operations progress + + Returns + ------- + float + A single metric score for: + + - single univariate series. + - single multivariate series with `component_reduction`. + - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and + `time_reduction`. + np.ndarray + A numpy array of metric scores. The array has shape (n time steps, n components) without time + and component reductions. For: + + - single multivariate series and at least `component_reduction=None`. + - single uni/multivariate series and at least `time_reduction=None`. + - a sequence of uni/multivariate series including `series_reduction` and at least one of + `component_reduction=None` or `time_reduction=None`. + List[float] + Same as for type `float` but for a sequence of series. + List[np.ndarray] + Same as for type `np.ndarray` but for a sequence of series. + """ + y_true, y_pred = _get_values_or_raise( + actual_series, + pred_series, + intersect, + remove_nan_union=True, + q=q, + ) + y_pred_lo, y_pred_hi = _get_quantile_intervals(y_pred, q=q, q_interval=q_interval) + return y_pred_hi - y_pred_lo + + +@interval_support +@multi_ts_support +@multivariate_support +def miw( + actual_series: Union[TimeSeries, Sequence[TimeSeries]], + pred_series: Union[TimeSeries, Sequence[TimeSeries]], + intersect: bool = True, + *, + q_interval: Union[Tuple[float, float], Sequence[Tuple[float, float]]] = None, + q: Optional[Union[float, List[float], Tuple[np.ndarray, pd.Index]]] = None, + component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, + series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, + n_jobs: int = 1, + verbose: bool = False, +) -> METRIC_OUTPUT_TYPE: + """Mean Interval Width (IL). + + IL gives the width of predicted quantile intervals aggregated over time. + + For the true series :math:`y` and predicted stochastic or quantile series :math:`\\hat{y}` of length :math:`T`, + it is computed per component/column, quantile interval, and time step + :math:`t` as: + + .. math:: \\frac{1}{T}\\sum_{t=1}^T{\\hat{y}_{t,qh} - \\hat{y}_{t,ql}} + + where :math:`\\hat{y}_{t,qh}` are the upper bound quantile values (of all predicted quantiles or samples) at time + :math:`t`, and :math:`\\hat{y}_{t,ql}` are the lower bound quantile values. + + Parameters + ---------- + actual_series + The (sequence of) actual series. + pred_series + The (sequence of) predicted series. + intersect + For time series that are overlapping in time without having the same time index, setting `True` + will consider the values only over their common time interval (intersection in time). + q_interval + The quantile interval(s) to compute the metric on. Must be a tuple (single interval) or sequence tuples + (multiple intervals) with elements (low quantile, high quantile). + q + Quantiles `q` not supported by this metric; use `q_interval` instead. + component_reduction + Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` + of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a + parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the + component axis. If `None`, will return a metric per component. + series_reduction + Optionally, a function to aggregate the metrics over multiple series. It must reduce a `np.ndarray` + of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a + parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the + series axis. For example with `np.nanmean`, will return the average over all series metrics. If `None`, will + return a metric per component. + n_jobs + The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is + passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` + (sequential). Setting the parameter to `-1` means using all the available processors. + verbose + Optionally, whether to print operations progress + + Returns + ------- + float + A single metric score for: + + - single univariate series. + - single multivariate series with `component_reduction`. + - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. + np.ndarray + A numpy array of metric scores. The array has shape (n components,) without component reduction. For: + + - single multivariate series and at least `component_reduction=None`. + - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. + List[float] + Same as for type `float` but for a sequence of series. + List[np.ndarray] + Same as for type `np.ndarray` but for a sequence of series. + """ + return np.nanmean( + _get_wrapped_metric(iw, n_wrappers=3)( + actual_series, + pred_series, + intersect, + q=q, + q_interval=q_interval, + ), + axis=TIME_AX, + ) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 5a5dc7a738..ee271ceed6 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -57,7 +57,12 @@ get_single_series, series2seq, ) -from darts.utils.utils import generate_index +from darts.utils.utils import ( + generate_index, + likelihood_component_names, + quantile_interval_names, + quantile_names, +) logger = get_logger(__name__) @@ -754,6 +759,7 @@ def historical_forecasts( Default: ``False`` enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. fit_kwargs Additional arguments passed to the model `fit()` method. predict_kwargs @@ -1193,6 +1199,8 @@ def backtest( reduction: Union[Callable[..., float], None] = np.mean, verbose: bool = False, show_warnings: bool = True, + predict_likelihood_parameters: bool = False, + enable_optimization: bool = True, metric_kwargs: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, fit_kwargs: Optional[Dict[str, Any]] = None, predict_kwargs: Optional[Dict[str, Any]] = None, @@ -1312,6 +1320,13 @@ def backtest( Whether to print progress. show_warnings Whether to show warnings related to parameters `start`, and `train_length`. + predict_likelihood_parameters + If set to `True`, the model predict the parameters of its Likelihood parameters instead of the target. Only + supported for probabilistic models with `likelihood="quantile"`, `num_samples = 1` and + `n<=output_chunk_length`. Default: ``False``. + enable_optimization + Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. metric_kwargs Additional arguments passed to `metric()`, such as `'n_jobs'` for parallelization, `'component_reduction'` for reducing the component wise metrics, seasonality `'m'` for scaled metrics, etc. Will pass arguments to @@ -1343,9 +1358,9 @@ def backtest( An numpy array of backtest scores. For single series and one of: - a single `metric` function, `historical_forecasts` generated with `last_points_only=False` - and backtest `reduction=None`. The output has shape (n forecasts,). + and backtest `reduction=None`. The output has shape (n forecasts, *). - multiple `metric` functions and `historical_forecasts` generated with `last_points_only=False`. - The output has shape (n metrics,) when using a backtest `reduction`, and (n metrics, n forecasts) + The output has shape (*, n metrics) when using a backtest `reduction`, and (n forecasts, *, n metrics) when `reduction=None` - multiple uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None` for "per time step metrics" @@ -1391,6 +1406,8 @@ def backtest( last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, + predict_likelihood_parameters=predict_likelihood_parameters, + enable_optimization=enable_optimization, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, sample_weight=sample_weight, @@ -1491,22 +1508,24 @@ def __getitem__(self, index) -> TimeSeries: # get errors for each input `series` backtest_list = [] for i in range(len(cum_len) - 1): - # errors_series with shape `(n metrics, n series specific historical forecasts)` + # errors_series with shape `(n metrics, n series specific historical forecasts, *)` errors_series = errors[:, cum_len[i] : cum_len[i + 1]] if reduction is not None: - # shape `(n metrics, n forecasts)` -> `(n metrics,)` + # shape `(n metrics, n forecasts, *)` -> `(n metrics, *)` errors_series = reduction(errors_series, axis=1) elif last_points_only: - # shape `(n metrics, n forecasts = 1)` -> `(n metrics,)` + # shape `(n metrics, n forecasts = 1, *)` -> `(n metrics, *)` errors_series = errors_series[:, 0] if len(metric) == 1: # shape `(n metrics, *)` -> `(*,)` errors_series = errors_series[0] - elif not last_points_only and reduction is None: + else: # shape `(n metrics, *)` -> `(*, n metrics)` - errors_series = errors_series.T + errors_series = errors_series.transpose( + tuple(i for i in range(1, errors_series.ndim)) + (0,) + ) backtest_list.append(errors_series) return ( @@ -1831,6 +1850,8 @@ def residuals( metric: METRIC_TYPE = metrics.err, verbose: bool = False, show_warnings: bool = True, + predict_likelihood_parameters: bool = False, + enable_optimization: bool = True, metric_kwargs: Optional[Dict[str, Any]] = None, fit_kwargs: Optional[Dict[str, Any]] = None, predict_kwargs: Optional[Dict[str, Any]] = None, @@ -1949,6 +1970,13 @@ def residuals( Whether to print progress. show_warnings Whether to show warnings related to parameters `start`, and `train_length`. + predict_likelihood_parameters + If set to `True`, the model predict the parameters of its Likelihood parameters instead of the target. Only + supported for probabilistic models with `likelihood="quantile"`, `num_samples = 1` and + `n<=output_chunk_length`. Default: ``False``. + enable_optimization + Whether to use the optimized version of historical_forecasts when supported and available. + Default: ``True``. metric_kwargs Additional arguments passed to `metric()`, such as `'n_jobs'` for parallelization, `'m'` for scaled metrics, etc. Will pass arguments only if they are present in the corresponding metric signature. Ignores @@ -2004,6 +2032,8 @@ def residuals( last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, + predict_likelihood_parameters=predict_likelihood_parameters, + enable_optimization=enable_optimization, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, overlap_end=False, @@ -2035,9 +2065,10 @@ def residuals( residuals = [[res] for res in residuals] # sanity check residual output + q, q_interval = metric_kwargs.get("q"), metric_kwargs.get("q_interval") try: - res, fc = residuals[0][0], historical_forecasts[0][0] - _ = np.reshape(res, (len(fc), fc.n_components, 1)) + series_, res, fc = series[0], residuals[0][0], historical_forecasts[0][0] + _ = np.reshape(res, (len(fc), -1, 1)) except Exception as err: raise_log( ValueError( @@ -2051,13 +2082,47 @@ def residuals( # process residuals residuals_out = [] - for fc_list, res_list in zip(historical_forecasts, residuals): + for series_, fc_list, res_list in zip(series, historical_forecasts, residuals): res_list_out = [] + if q is not None: + q = [q] if isinstance(q, float) else q + # multi-quantile metrics yield more components + comp_names = likelihood_component_names( + components=series_.components, + parameter_names=quantile_names(q), + ) + # `q` and `q_interval` are mutually exclusive + elif q_interval is not None: + # multi-quantile metrics yield more components + q_interval = ( + [q_interval] if isinstance(q_interval, tuple) else q_interval + ) + comp_names = likelihood_component_names( + components=series_.components, + parameter_names=quantile_interval_names(q_interval), + ) + else: + comp_names = None for fc, res in zip(fc_list, res_list): - # make sure all residuals have shape (n time steps, n components, n samples=1) + # make sure all residuals have shape (n time steps, n components * n quantiles, n samples=1) if len(res.shape) != 3: - res = np.reshape(res, (len(fc), fc.n_components, 1)) - res_list_out.append(res if values_only else fc.with_values(res)) + res = np.reshape(res, (len(fc), -1, 1)) + if values_only: + res = res + elif (q is None and q_interval is None) and res.shape[ + 1 + ] == fc.n_components: + res = fc.with_values(res) + else: + # quantile (interval) metrics created different number of components; + # create new series with unknown components + res = TimeSeries.from_times_and_values( + times=fc._time_index, + values=res, + columns=comp_names, + ) + res_list_out.append(res) + residuals_out.append(res_list_out) # if required, reduce to `series` input type diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index d22a23329e..4ca5b68ffe 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -56,7 +56,11 @@ ) from darts.utils.multioutput import MultiOutputRegressor from darts.utils.ts_utils import get_single_series, seq2series, series2seq -from darts.utils.utils import _check_quantiles +from darts.utils.utils import ( + _check_quantiles, + likelihood_component_names, + quantile_names, +) logger = get_logger(__name__) @@ -1668,17 +1672,15 @@ def _quantiles_generate_components_names( ) -> List[str]: return self._likelihood_generate_components_names( input_series, - [f"q{quantile:.2f}" for quantile in self._model_container.keys()], + quantile_names(q=self._model_container.keys()), ) def _likelihood_generate_components_names( self, input_series: TimeSeries, parameter_names: List[str] ) -> List[str]: - return [ - f"{tgt_name}_{param_n}" - for tgt_name in input_series.components - for param_n in parameter_names - ] + return likelihood_component_names( + components=input_series.components, parameter_names=parameter_names + ) class _QuantileModelContainer(OrderedDict): diff --git a/darts/tests/metrics/test_metrics.py b/darts/tests/metrics/test_metrics.py index 40754c0bfe..df7a820bf5 100644 --- a/darts/tests/metrics/test_metrics.py +++ b/darts/tests/metrics/test_metrics.py @@ -7,8 +7,9 @@ import pytest import sklearn.metrics -from darts import TimeSeries +from darts import TimeSeries, concatenate from darts.metrics import metrics +from darts.utils.utils import likelihood_component_names, quantile_names def sklearn_mape(*args, **kwargs): @@ -65,6 +66,19 @@ def metric_rmsle(y_true, y_pred, **kwargs): ) +def metric_iw(y_true, y_pred, q_interval=None, **kwargs): + # this tests assumes `y_pred` are stochastic values + if isinstance(q_interval, tuple): + q_interval = [q_interval] + q_interval = np.array(q_interval) + q_lo = q_interval[:, 0] + q_hi = q_interval[:, 1] + y_pred_lo = np.quantile(y_pred, q_lo, axis=2).transpose(1, 2, 0) + y_pred_hi = np.quantile(y_pred, q_hi, axis=2).transpose(1, 2, 0) + res = y_pred_hi - y_pred_lo + return res.reshape(len(y_pred), -1) + + class TestMetrics: np.random.seed(42) pd_train = pd.Series( @@ -963,6 +977,15 @@ def test_arre(self, config): self.helper_test_nan(metric, **kwargs) self.helper_test_non_aggregate(metric, is_aggregate) + with pytest.raises(ValueError) as exc: + _ = metric( + TimeSeries.from_values(np.ones((3, 1, 1))), + TimeSeries.from_values(np.ones((3, 1, 1))), + ) + assert str(exc.value).startswith( + "The difference between the max and min values must " + ) + @pytest.mark.parametrize( "metric", [ @@ -1034,10 +1057,10 @@ def test_sape(self, config): "config", [ (metrics.ase, False, {"time_reduction": np.nanmean}), - # (metrics.sse, False, {"time_reduction": np.nanmean}), - # (metrics.mase, True, {}), - # (metrics.msse, True, {}), - # (metrics.rmsse, True, {}), + (metrics.sse, False, {"time_reduction": np.nanmean}), + (metrics.mase, True, {}), + (metrics.msse, True, {}), + (metrics.rmsse, True, {}), ], ) def test_scaled_errors(self, config): @@ -1110,6 +1133,9 @@ def test_scaled_errors(self, config): assert str(err.value).startswith( "The `insample` series must start before the `pred_series`" ) + # wrong number of components + with pytest.raises(ValueError): + metric(self.series1, self.series2, insample.stack(insample)) # multi-ts, second series is not a TimeSeries with pytest.raises(ValueError): metric([self.series1] * 2, self.series2, [insample] * 2) @@ -1157,6 +1183,22 @@ def test_rho_risk(self): np.testing.assert_array_almost_equal(metrics.qr(s1, s12_stochastic, q=0.0), 0.0) np.testing.assert_array_almost_equal(metrics.qr(s2, s12_stochastic, q=1.0), 0.0) + # preds must be probabilistic + q_names = likelihood_component_names( + self.series1.components, + quantile_names([0.5]), + ) + with pytest.raises(ValueError) as exc: + metrics.qr( + self.series1, + self.series1.with_columns_renamed(self.series1.components, q_names), + q=0.5, + ) + assert ( + str(exc.value) + == "quantile risk (qr) should only be computed for stochastic predicted TimeSeries." + ) + @pytest.mark.parametrize( "config", [ @@ -1241,7 +1283,7 @@ def test_metrics_arguments(self): # should fail if kwargs are passed as args, because of the "*" with pytest.raises(TypeError): - metrics.r2_score(series00, series11, False, np.mean) + metrics.r2_score(series00, series11, False, 0.5, np.mean) def test_multiple_ts_rmse(self): # simple test @@ -1539,3 +1581,549 @@ def helper_test_non_aggregate(self, metric, is_aggregate, val_exp=None): if val_exp is not None: assert (res == -1.0).all() + + @pytest.mark.parametrize( + "config", + list( + itertools.product( + [ + # time dependent but with time reduction + metrics.err, + metrics.ae, + metrics.se, + metrics.sle, + metrics.ase, + metrics.sse, + metrics.ape, + metrics.sape, + metrics.arre, + metrics.ql, + # time aggregates + metrics.merr, + metrics.mae, + metrics.mse, + metrics.rmse, + metrics.rmsle, + metrics.mase, + metrics.msse, + metrics.rmsse, + metrics.mape, + metrics.smape, + metrics.ope, + metrics.marre, + metrics.r2_score, + metrics.coefficient_of_variation, + metrics.mql, + ], + [True, False], # univariate series + [True, False], # single series + ) + ), + ) + def test_metric_quantiles(self, config): + """Test output types and shapes for time aggregated metrics with quantiles: + for single and multiple univariate or multivariate series, in combination + with different component and series reduction functions.""" + np.random.seed(42) + metric, is_univar, is_single = config + params = inspect.signature(metric).parameters + + n_comp = 1 if is_univar else 2 + + qs_all = [0.1, 0.5, 0.8] + components = [str(i) for i in range(n_comp)] + + series_vals = np.random.random((10, n_comp, 1)) + + pred_prob_vals = np.random.random((10, n_comp, 100)) + + pred_vals_qs = [] + for i in range(n_comp): + pred_vals_qs.append( + np.quantile(pred_prob_vals[:, [i]], qs_all, axis=2).transpose(1, 0, 2) + ) + pred_vals_qs = np.concatenate(pred_vals_qs, axis=1) + pred_components = likelihood_component_names( + components=components, parameter_names=quantile_names(q=qs_all) + ) + + series = TimeSeries.from_values(series_vals, columns=components) + series_q_exp = concatenate( + [series[comp] for comp in components for _ in qs_all], axis=1 + ) + pred_prob = TimeSeries.from_values(pred_prob_vals, columns=components) + pred_qs = TimeSeries.from_values(pred_vals_qs, columns=pred_components) + insample = series.shift(-len(series)) + insample_q_exp = concatenate( + [insample[comp] for comp in components for _ in qs_all], axis=1 + ) + shape_time = (len(pred_qs),) if "time_reduction" in params else tuple() + + if not is_single: + series = [series] * 2 + series_q_exp = [series_q_exp] * 2 + pred_prob = [pred_prob] * 2 + pred_qs = [pred_qs] * 2 + insample = [insample] * 2 + insample_q_exp = [insample_q_exp] * 2 + + kwargs = {"actual_series": series} + if "insample" in params: + kwargs["insample"] = insample + + def check_res( + pred_prob_, pred_qs_, shape_exp, series_reduction=None, **test_kwargs + ): + res_prob = metric( + pred_series=pred_prob_, + series_reduction=series_reduction, + **kwargs, + **test_kwargs, + ) + res_qs = metric( + pred_series=pred_qs_, + series_reduction=series_reduction, + **kwargs, + **test_kwargs, + ) + if is_single or series_reduction is not None: + res_prob = [res_prob] + res_qs = [res_qs] + if series_reduction is None and not is_single: + assert len(res_prob) == len(res_qs) == len(pred_prob_) + + for res_p, res_q in zip(res_prob, res_qs): + assert res_p.shape == res_q.shape == shape_exp + np.testing.assert_array_almost_equal(res_p, res_q) + + check_res(pred_prob, pred_qs, shape_time, q=0.1) + # one quantile as list + check_res(pred_prob, pred_qs, shape_time, q=[0.1]) + # multiple quantiles + check_res(pred_prob, pred_qs, shape_time + (2,), q=[0.1, 0.8]) + # all quantiles + check_res(pred_prob, pred_qs, shape_time + (3,), q=[0.1, 0.5, 0.8]) + qs = [0.1, 0.8] + # component and series reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(qs),), + q=qs, + component_reduction=np.mean, + series_reduction=np.mean, + ) + # no component reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(qs) * n_comp,), + q=qs, + component_reduction=None, + series_reduction=np.mean, + ) + # no series reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(qs),), + q=qs, + component_reduction=np.mean, + series_reduction=None, + ) + # no series and component reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(qs) * n_comp,), + q=qs, + component_reduction=None, + series_reduction=None, + ) + + # check that we get identical results as when computing each quantile component against the actual + # target component directly + kwargs_direct = copy.deepcopy(kwargs) + q_direct = {} + if metric.__name__ not in ["ql", "mql"]: + kwargs_direct["actual_series"] = series_q_exp + if "insample" in params: + kwargs_direct["insample"] = insample_q_exp + else: + q_direct["q"] = qs_all + kwargs_direct["actual_series"] = series + + res_direct = metric( + pred_series=pred_qs, component_reduction=None, **kwargs_direct, **q_direct + ) + res_qs = metric( + pred_series=pred_qs, + component_reduction=None, + q=qs_all, + **kwargs, + ) + np.testing.assert_array_almost_equal(res_direct, res_qs) + + def test_invalid_quantiles(self): + np.random.seed(42) + series_a = TimeSeries.from_values(np.random.random((10, 2, 1))) + series_b = TimeSeries.from_values(np.random.random((10, 2, 10))) + + # unsorted quantiles + with pytest.raises(ValueError) as exc: + _ = metrics.mae(series_a, series_b, q=[0.2, 0.1]) + assert "a sequence of increasing order" in str(exc.value) + + # non-unique values metrics + with pytest.raises(ValueError) as exc: + _ = metrics.mae(series_a, series_b, q=[0.2, 0.2]) + assert "with unique values only" in str(exc.value) + + # q > 1 + with pytest.raises(ValueError) as exc: + _ = metrics.mae(series_a, series_b, q=[0.2, 1.01]) + assert "must be in the range `(>=0,<=1)`" in str(exc.value) + + # q < 0 + with pytest.raises(ValueError) as exc: + _ = metrics.mae(series_a, series_b, q=[-0.01, 0.2]) + assert "must be in the range `(>=0,<=1)`" in str(exc.value) + + # but sorted, unique, and valid quantiles work + _ = metrics.mae(series_a, series_b, q=[0.0, 0.5, 1.0]) + + def test_quantile_as_tuple(self): + """Test that `q` as tuple (list of quantiles, quantile component names) gives same results as `q` + as quantile values list.""" + np.random.seed(42) + q = [0.25, 0.75] + + series_a = TimeSeries.from_values(np.random.random((10, 2, 1))) + q_names = pd.Index( + likelihood_component_names(series_a.components, quantile_names(q)) + ) + series_b = TimeSeries.from_values(np.random.random((10, 4, 1)), columns=q_names) + + np.testing.assert_array_almost_equal( + metrics.mae(series_a, series_b, q=(q, q_names)), + metrics.mae(series_a, series_b, q=q), + ) + + def test_custom_metric_wrong_output_shape(self): + """Test that custom metrics must have correct output dim.""" + + @metrics.multi_ts_support + @metrics.multivariate_support + def custom_metric( + actual_series, + pred_series, + intersect=True, + *, + q=None, + time_reduction=None, + component_reduction=np.nanmean, + series_reduction=None, + n_jobs=1, + verbose=False, + out_ndim=1, + ): + return np.ones(tuple(1 for _ in range(out_ndim))) + + for ndim in [1, 4]: + with pytest.raises(ValueError) as exc: + custom_metric(self.series1, self.series2, out_ndim=ndim) + assert str(exc.value).startswith( + "Metric output must have 2 dimensions (n components, n quantiles) for aggregated metrics" + ) + for ndim in [2, 3]: + _ = custom_metric(self.series1, self.series2, out_ndim=ndim) + + def test_wrong_error_scale(self): + with pytest.raises(ValueError) as exc: + _ = metrics._get_error_scale( + self.series1.shift(-len(self.series1)), + self.series1, + m=1, + metric="wrong_metric", + ) + assert str(exc.value).startswith("unknown `metric=wrong_metric`") + + @pytest.mark.parametrize( + "config", + [ + # only time dependent quantile interval metrics + (metrics.iw, metric_iw), + ], + ) + def test_metric_quantile_interval_accuracy(self, config): + """Test output types and shapes for time dependent metrics with quantile intervals: + for single and multiple univariate or multivariate series, in combination + with different component and series reduction functions.""" + np.random.seed(42) + metric, metric_ref = config + n_comp = 2 + components = [str(i) for i in range(n_comp)] + series_vals = np.random.random((10, n_comp, 1)) + pred_prob_vals = np.random.random((10, n_comp, 100)) + series = TimeSeries.from_values(series_vals, columns=components) + pred_prob = TimeSeries.from_values(pred_prob_vals, columns=components) + + def check_ref(**test_kwargs): + res_prob = metric( + actual_series=series, + pred_series=pred_prob, + series_reduction=None, + component_reduction=None, + time_reduction=None, + **test_kwargs, + ) + res_ref = metric_ref( + y_true=series.all_values(), + y_pred=pred_prob.all_values(), + **test_kwargs, + ) + np.testing.assert_array_almost_equal(res_prob, res_ref) + + # one interval as tuple + check_ref(q_interval=(0.1, 0.5)) + # one interval in list + check_ref(q_interval=[(0.1, 0.5)]) + # multiple intervals + check_ref(q_interval=[(0.1, 0.5), (0.5, 0.8)]) + + @pytest.mark.parametrize( + "config", + list( + itertools.product( + [ + # time dependent but with time reduction + metrics.iw, + metrics.miw, + ], + [True, False], # univariate series + [True, False], # single series + ) + ), + ) + def test_metric_quantile_interval(self, config): + """Test output types and shapes for time aggregated metrics with quantile intervals: + for single and multiple univariate or multivariate series, in combination + with different component and series reduction functions.""" + np.random.seed(42) + metric, is_univar, is_single = config + params = inspect.signature(metric).parameters + + n_comp = 1 if is_univar else 2 + + qs_all = [0.1, 0.5, 0.8] + components = [str(i) for i in range(n_comp)] + + series_vals = np.random.random((10, n_comp, 1)) + pred_prob_vals = np.random.random((10, n_comp, 100)) + + pred_vals_qs = [] + for i in range(n_comp): + pred_vals_qs.append( + np.quantile(pred_prob_vals[:, [i]], qs_all, axis=2).transpose(1, 0, 2) + ) + pred_vals_qs = np.concatenate(pred_vals_qs, axis=1) + pred_components = likelihood_component_names( + components=components, parameter_names=quantile_names(q=qs_all) + ) + + series = TimeSeries.from_values(series_vals, columns=components) + pred_prob = TimeSeries.from_values(pred_prob_vals, columns=components) + pred_qs = TimeSeries.from_values(pred_vals_qs, columns=pred_components) + shape_time = (len(pred_qs),) if "time_reduction" in params else tuple() + + if not is_single: + series = [series] * 2 + pred_prob = [pred_prob] * 2 + pred_qs = [pred_qs] * 2 + + kwargs = {"actual_series": series} + + def check_res( + pred_prob_, pred_qs_, shape_exp, series_reduction=None, **test_kwargs + ): + res_prob = metric( + actual_series=series, + pred_series=pred_prob_, + series_reduction=series_reduction, + **test_kwargs, + ) + res_qs = metric( + actual_series=series, + pred_series=pred_qs_, + series_reduction=series_reduction, + **test_kwargs, + ) + if is_single or series_reduction is not None: + res_prob = [res_prob] + res_qs = [res_qs] + if series_reduction is None and not is_single: + assert len(res_prob) == len(res_qs) == len(pred_prob_) + + for res_p, res_q in zip(res_prob, res_qs): + assert res_p.shape == res_q.shape == shape_exp + np.testing.assert_array_almost_equal(res_p, res_q) + return res_qs + + # one interval as tuple + res = check_res(pred_prob, pred_qs, shape_time, q_interval=(0.1, 0.5)) + # one interval in list + res2 = check_res(pred_prob, pred_qs, shape_time, q_interval=[(0.1, 0.5)]) + np.testing.assert_array_almost_equal(res, res2) + # multiple intervals + check_res( + pred_prob, pred_qs, shape_time + (2,), q_interval=[(0.1, 0.5), (0.5, 0.8)] + ) + # all intervals + check_res( + pred_prob, + pred_qs, + shape_time + (3,), + q_interval=[(0.1, 0.5), (0.5, 0.8), (0.1, 0.8)], + ) + q_intervals = [(0.1, 0.5), (0.5, 0.8)] + # component and series reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(q_intervals),), + q_interval=q_intervals, + component_reduction=np.mean, + series_reduction=np.mean, + ) + # no component reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(q_intervals) * n_comp,), + q_interval=q_intervals, + component_reduction=None, + series_reduction=np.mean, + ) + # no series reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(q_intervals),), + q_interval=q_intervals, + component_reduction=np.mean, + series_reduction=None, + ) + # no series and component reduction + check_res( + pred_prob, + pred_qs, + shape_time + (len(q_intervals) * n_comp,), + q_interval=q_intervals, + component_reduction=None, + series_reduction=None, + ) + + # check that we get identical results as when computing intervals separately (on the time aggregated case) + if "time_reduction" in params: + kwargs["time_reduction"] = np.mean + res_lo = metric( + pred_series=pred_qs, + component_reduction=None, + q_interval=(0.1, 0.5), + **kwargs, + ) + res_hi = metric( + pred_series=pred_qs, + component_reduction=None, + q_interval=(0.5, 0.8), + **kwargs, + ) + res_multi = metric( + pred_series=pred_qs, + component_reduction=None, + q_interval=[(0.1, 0.5), (0.5, 0.8)], + **kwargs, + ) + if is_single: + res_lo = [res_lo] + res_hi = [res_hi] + res_multi = [res_multi] + res_lo_hi = [] + for res_lo_, res_hi_ in zip(res_lo, res_hi): + if res_lo_.ndim == 0: + res_lo_ = np.expand_dims(res_lo_, -1) + res_hi_ = np.expand_dims(res_hi_, -1) + res_lo_hi_ = np.concatenate([res_lo_, res_hi_]) + else: + res_lo_hi_ = np.concatenate( + [(res_lo_[i], res_hi_[i]) for i in range(n_comp)], + ) + res_lo_hi.append(res_lo_hi_) + np.testing.assert_array_almost_equal(res_lo_hi, res_multi) + + def test_invalid_quantile_intervals(self): + np.random.seed(42) + series_a = TimeSeries.from_values(np.random.random((10, 2, 1))) + series_b = TimeSeries.from_values(np.random.random((10, 2, 10))) + + # q not supported + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q=[0.2]) + assert str(exc.value).startswith( + "`q` is not supported for quantile interval metrics" + ) + + # no quantile interval + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=None) + assert str(exc.value).startswith( + "Quantile interval metrics require setting `q_interval`." + ) + + # invalid interval type + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=0.6) + assert ( + str(exc.value) + == "`q_interval` must be a tuple (float, float) or a sequence of tuples (float, float)." + ) + + # invalid tuple length + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=(0.1, 0.2, 0.3)) + assert ( + str(exc.value) + == "`q_interval` must be a tuple (float, float) or a sequence of tuples (float, float)." + ) + + # one tuple has invalid length invalid tuple length (raises a numpy error) + with pytest.raises(ValueError): + _ = metrics.iw(series_a, series_b, q_interval=[(0.1, 0.2), (0.2, 0.3, 0.4)]) + + # interval upper bound too high + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=(0.1, 1.1)) + assert str(exc.value).startswith( + "All `q` values must be in the range `(>=0,<=1)`." + ) + + # interval lower bound too low + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=(-0.01, 0.1)) + assert str(exc.value).startswith( + "All `q` values must be in the range `(>=0,<=1)`." + ) + + # lower interval equal to higher interval + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=(0.2, 0.2)) + assert str(exc.value).startswith( + "all intervals in `q_interval` must be tuples of (lower q, upper q)" + ) + + # lower interval higher than higher interval + with pytest.raises(ValueError) as exc: + _ = metrics.iw(series_a, series_b, q_interval=(0.3, 0.2)) + assert str(exc.value).startswith( + "all intervals in `q_interval` must be tuples of (lower q, upper q)" + ) diff --git a/darts/tests/models/forecasting/test_backtesting.py b/darts/tests/models/forecasting/test_backtesting.py index 0f6336043a..60f1b3e525 100644 --- a/darts/tests/models/forecasting/test_backtesting.py +++ b/darts/tests/models/forecasting/test_backtesting.py @@ -26,6 +26,7 @@ from darts.utils.timeseries_generation import linear_timeseries as lt from darts.utils.timeseries_generation import random_walk_timeseries as rt from darts.utils.timeseries_generation import sine_timeseries as st +from darts.utils.utils import generate_index logger = get_logger(__name__) @@ -1252,6 +1253,179 @@ def time_reduced_metric(*args, **kwargs): for bt in bt_list: np.testing.assert_array_almost_equal(bt, bt_expected) + @pytest.mark.parametrize( + "config", + itertools.product( + [ + [metrics.mae], # mae does not support time_reduction + [metrics.mae, metrics.ae], # ae supports time_reduction + [metrics.miw], # quantile interval metric + [metrics.miw, metrics.iw], + ], + [True, False], # last_points_only + ), + ) + def test_metric_quantiles_lpo(self, config): + """Tests backtest with quantile and quantile interval metrics from expected probabilistic or quantile + historical forecasts.""" + metric, lpo = config + is_interval_metric = metric[0].__name__ == "miw" + + q = [0.05, 0.5, 0.60, 0.95] + q_interval = [(0.05, 0.50), (0.50, 0.60), (0.60, 0.95), (0.05, 0.60)] + + y = lt(length=20) + y = y.stack(y + 1.0) + hfc = TimeSeries.from_times_and_values( + times=generate_index(start=y.start_time() + 10 * y.freq, length=10), + values=np.random.random((10, 1, 100)), + ) + hfc = hfc.stack(hfc + 1.0) + y = [y, y] + if lpo: + hfc = [hfc, hfc] + else: + hfc = [[hfc, hfc], [hfc]] + + metric_kwargs = [{"component_reduction": np.median}] + if not is_interval_metric: + metric_kwargs[0]["q"] = q + else: + metric_kwargs[0]["q_interval"] = q_interval + if len(metric) > 1: + # give metric specific kwargs + metric_kwargs2 = { + "component_reduction": np.median, + "time_reduction": np.mean, + } + if not is_interval_metric: + metric_kwargs2["q"] = q + else: + metric_kwargs2["q_interval"] = q_interval + metric_kwargs.append(metric_kwargs2) + + model = NaiveDrift() + + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=None, + metric_kwargs=metric_kwargs, + ) + assert isinstance(bts, list) and len(bts) == 2 + if lpo: + bts = [[bt] for bt in bts] + # `ae` with time and component reduction is equal to `mae` with component reduction + hfc_single = hfc[0][0] if not lpo else hfc[0] + q_kwargs = {"q": q} if not is_interval_metric else {"q_interval": q_interval} + bt_expected = metric[0]( + y[0], hfc_single, component_reduction=np.median, **q_kwargs + ) + shape_expected = (len(q),) + if len(metric) > 1: + bt_expected = np.concatenate([bt_expected[:, None]] * 2, axis=1) + shape_expected += (len(metric),) + for bt_list in bts: + for bt in bt_list: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=np.mean, + metric_kwargs=metric_kwargs, + ) + assert isinstance(bts, list) and len(bts) == 2 + for bt in bts: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + + def time_reduced_metric(*args, **kwargs): + metric_f = metrics.iw if is_interval_metric else metrics.ae + return metric_f(*args, **kwargs, time_reduction=np.mean) + + # check that single kwargs can be used for all metrics if params are supported + metric = [metric[0], time_reduced_metric] + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=None, + metric_kwargs=metric_kwargs[0], + ) + assert isinstance(bts, list) and len(bts) == 2 + if lpo: + bts = [[bt] for bt in bts] + # `ae` / `miw` with time and component reduction is equal to `mae` / `miw` with component reduction + bt_expected = metric[0]( + y[0], hfc_single, component_reduction=np.median, **q_kwargs + ) + bt_expected = np.concatenate([bt_expected[:, None]] * 2, axis=1) + shape_expected = (len(q), len(metric)) + for bt_list in bts: + for bt in bt_list: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=np.mean, + metric_kwargs=metric_kwargs[0], + ) + assert isinstance(bts, list) and len(bts) == 2 + for bt in bts: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + + # without component reduction + metric_kwargs = {"component_reduction": None} + if not is_interval_metric: + metric_kwargs["q"] = q + else: + metric_kwargs["q_interval"] = q_interval + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=None, + metric_kwargs=metric_kwargs, + ) + assert isinstance(bts, list) and len(bts) == 2 + if lpo: + bts = [[bt] for bt in bts] + + # `ae` / `iw` with time and no component reduction is equal to `mae` / `miw` without component reduction + bt_expected = metric[0](y[0], hfc_single, **metric_kwargs) + bt_expected = np.concatenate([bt_expected[:, None]] * 2, axis=1) + shape_expected = (len(q) * y[0].width, len(metric)) + for bt_list in bts: + for bt in bt_list: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + + bts = model.backtest( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + reduction=np.mean, + metric_kwargs=metric_kwargs, + ) + assert isinstance(bts, list) and len(bts) == 2 + for bt in bts: + assert bt.shape == shape_expected + np.testing.assert_array_almost_equal(bt, bt_expected) + @pytest.mark.parametrize( "config", product([True, False], [True, False]), diff --git a/darts/tests/models/forecasting/test_residuals.py b/darts/tests/models/forecasting/test_residuals.py index 0e975240c4..40d15ebf0e 100644 --- a/darts/tests/models/forecasting/test_residuals.py +++ b/darts/tests/models/forecasting/test_residuals.py @@ -1,15 +1,23 @@ import itertools import numpy as np +import pandas as pd import pytest import darts.metrics as metrics +from darts import TimeSeries, concatenate from darts.datasets import AirPassengersDataset from darts.logging import get_logger from darts.models import LinearRegressionModel, NaiveDrift, NaiveSeasonal from darts.tests.models.forecasting.test_regression_models import dummy_timeseries from darts.utils.timeseries_generation import constant_timeseries as ct from darts.utils.timeseries_generation import linear_timeseries as lt +from darts.utils.utils import ( + generate_index, + likelihood_component_names, + quantile_interval_names, + quantile_names, +) logger = get_logger(__name__) @@ -612,3 +620,260 @@ def test_sample_weight(self, config): for res_nw, res_w in zip(res_non_weighted, res_weighted): with pytest.raises(AssertionError): np.testing.assert_array_almost_equal(res_w, res_nw) + + @pytest.mark.parametrize( + "config", + itertools.product( + [metrics.ae, metrics.iw], # quantile (interval) metrics + [True, False], # last_points_only + [False, True], # from stochastic predictions (or predicted quantiles) + ), + ) + def test_residuals_with_quantiles_metrics(self, config): + """Tests residuals with quantile metrics from expected probabilistic or quantile historical forecasts.""" + metric, lpo, stochastic_pred = config + is_interval_metric = metric.__name__ == "iw" + + # multi-quantile metrics yield more components + q = [0.05, 0.50, 0.60, 0.95] + q_interval = [(0.05, 0.50), (0.50, 0.60), (0.60, 0.95), (0.05, 0.60)] + + y = lt(length=20) + y = y.stack(y + 1.0) + if not is_interval_metric: + q_comp_names_expected = pd.Index( + likelihood_component_names( + components=y.components, + parameter_names=quantile_names(q), + ) + ) + else: + q_comp_names_expected = pd.Index( + likelihood_component_names( + components=y.components, + parameter_names=quantile_interval_names(q_interval), + ) + ) + # historical forecasts + vals = np.random.random((10, 1, 100)) + if not stochastic_pred: + vals = np.quantile(vals, q, axis=2).transpose((1, 0, 2)) + comp_names = pd.Index( + likelihood_component_names( + components=y.components, + parameter_names=quantile_names(q=q), + ) + ) + else: + comp_names = y.components + vals = np.concatenate([vals, vals + 1], axis=1) + hfc = TimeSeries.from_times_and_values( + times=generate_index(start=y.start_time() + 10 * y.freq, length=10), + values=vals, + columns=comp_names, + ) + + y = [y, y] + if lpo: + hfc = [hfc, hfc] + else: + hfc = [[hfc, hfc], [hfc]] + + metric_kwargs = {"component_reduction": None} + if not is_interval_metric: + metric_kwargs["q"] = q + else: + metric_kwargs["q_interval"] = q_interval + + model = NaiveDrift() + + # return TimeSeries + bts = model.residuals( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + ) + assert isinstance(bts, list) and len(bts) == 2 + if lpo: + bts = [[bt] for bt in bts] + + # `ae` with time and component reduction is equal to `mae` with component reduction + hfc_single = hfc[0][0] if not lpo else hfc[0] + bt_expected = metric(y[0], hfc_single, **metric_kwargs) + shape_expected = (len(hfc_single), len(q) * y[0].n_components) + for bt_list in bts: + for bt in bt_list: + assert bt.shape[:2] == shape_expected + assert bt.components.equals(q_comp_names_expected) + np.testing.assert_array_almost_equal(bt.values(), bt_expected) + + # values only + bts = model.residuals( + series=y, + historical_forecasts=hfc, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + values_only=True, + ) + assert isinstance(bts, list) and len(bts) == 2 + if lpo: + bts = [[bt] for bt in bts] + + # `ae` with time and component reduction is equal to `mae` with component reduction + for bt_list in bts: + for bt in bt_list: + assert bt.shape[:2] == shape_expected + np.testing.assert_array_almost_equal(bt[:, :, 0], bt_expected) + + @pytest.mark.parametrize( + "config", + list( + itertools.product( + [metrics.ae, metrics.iw], # quantile (interval) metrics + [True, False], # last_points_only + ) + ), + ) + def test_quantiles_from_model(self, config): + """Tests residuals from quantile regression model works for both direct likelihood parameter prediction or + sampled prediction by giving the correct metrics kwargs.""" + metric, lpo = config + + is_interval_metric = metric.__name__ == "iw" + + # multi-quantile metrics yield more components + q = [0.05, 0.50, 0.95] + q_interval = [(0.05, 0.50), (0.50, 0.95), (0.05, 0.95)] + + y = lt(length=20) + y = y.stack(y + 1.0) + if not is_interval_metric: + q_comp_names_expected = pd.Index( + likelihood_component_names( + components=y.components, + parameter_names=quantile_names(q), + ) + ) + else: + q_comp_names_expected = pd.Index( + likelihood_component_names( + components=y.components, + parameter_names=quantile_interval_names(q_interval), + ) + ) + y = [y, y] + metric_kwargs = {"component_reduction": None} + if not is_interval_metric: + metric_kwargs["q"] = q + else: + metric_kwargs["q_interval"] = q_interval + + icl = 3 + model = LinearRegressionModel( + lags=icl, output_chunk_length=1, likelihood="quantile", quantiles=q + ) + model.fit(y) + + # quantile forecasts + bts = model.residuals( + series=y, + forecast_horizon=1, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + predict_likelihood_parameters=True, + retrain=False, + ) + assert isinstance(bts, list) and len(bts) == 2 + if not lpo: + bts = [concatenate(bt, axis=0) for bt in bts] + + # `ae` with time and component reduction is equal to `mae` with component reduction + shape_expected = (len(y[0]) - icl, len(q) * y[0].n_components) + for bt in bts: + assert bt.shape[:2] == shape_expected + assert bt.components.equals(q_comp_names_expected) + + # probabilistic forecasts + bts_prob = model.residuals( + series=y, + forecast_horizon=1, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + predict_likelihood_parameters=False, + num_samples=1000, + retrain=False, + ) + assert isinstance(bts_prob, list) and len(bts_prob) == 2 + if not lpo: + bts_prob = [concatenate(bt, axis=0) for bt in bts_prob] + for bt_p, bt_q in zip(bts_prob, bts): + assert bt_p.shape == bt_q.shape + assert bt_p.components.equals(bt_q.components) + # check that the results are similar + assert np.abs(bt_p.all_values() - bt_q.all_values()).max() < 0.1 + + # single quantile + q_single = 0.05 + q_interval_single = (0.05, 0.50) + metric_kwargs = {"component_reduction": None} + if not is_interval_metric: + metric_kwargs["q"] = q_single + else: + metric_kwargs["q_interval"] = q_interval_single + bts = model.residuals( + series=y, + forecast_horizon=1, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + predict_likelihood_parameters=True, + retrain=False, + ) + assert isinstance(bts, list) and len(bts) == 2 + if not lpo: + bts = [concatenate(bt, axis=0) for bt in bts] + + # `ae` with time and component reduction is equal to `mae` with component reduction + shape_expected = (len(y[0]) - icl, y[0].n_components) + for bt in bts: + assert bt.shape[:2] == shape_expected + assert bt.components.equals( + pd.Index( + likelihood_component_names( + y[0].components, + parameter_names=( + quantile_names([q_single]) + if not is_interval_metric + else quantile_interval_names([q_interval_single]) + ), + ) + ) + ) + + # wrong quantile + q_wrong = [0.99] + q_interval_wrong = (0.05, 0.99) + metric_kwargs = {"component_reduction": None} + if not is_interval_metric: + metric_kwargs["q"] = q_wrong + else: + metric_kwargs["q_interval"] = q_interval_wrong + with pytest.raises(ValueError) as exc: + _ = model.residuals( + series=y, + forecast_horizon=1, + metric=metric, + last_points_only=lpo, + metric_kwargs=metric_kwargs, + predict_likelihood_parameters=True, + retrain=False, + ) + assert str(exc.value).startswith( + f"Computing a metric with quantile(s) " + f"`q={'[0.99]' if not is_interval_metric else '[0.05 0.99]'}` is only supported" + ) diff --git a/darts/tests/utils/test_utils.py b/darts/tests/utils/test_utils.py index 3b82c5f1b2..d629851cea 100644 --- a/darts/tests/utils/test_utils.py +++ b/darts/tests/utils/test_utils.py @@ -7,7 +7,15 @@ from darts.utils import _with_sanity_checks from darts.utils.missing_values import extract_subseries from darts.utils.ts_utils import retain_period_common_to_all -from darts.utils.utils import expand_arr, freqs, generate_index, n_steps_between +from darts.utils.utils import ( + expand_arr, + freqs, + generate_index, + likelihood_component_names, + n_steps_between, + quantile_interval_names, + quantile_names, +) class TestUtils: @@ -587,3 +595,39 @@ def test_expand_arr(self, config): arr = expand_arr(arr, ndim=3) assert arr.shape == shape_expected np.testing.assert_array_almost_equal(arr, arr_expected) + + def test_likelihood_component_names(self): + names = likelihood_component_names(["a", "b"], ["1", "2", "3"]) + assert names == ["a_1", "a_2", "a_3", "b_1", "b_2", "b_3"] + + assert ( + likelihood_component_names(pd.Index(["a", "b"]), ["1", "2", "3"]) == names + ) + + @pytest.mark.parametrize( + "config", + [ + (0.25, "a_q0.25"), + (0.2501, "a_q0.25"), + ([0.25], ["a_q0.25"]), + ([0.25, 0.75], ["a_q0.25", "a_q0.75"]), + ], + ) + def test_quantile_names(self, config): + q, names_expected = config + names = quantile_names(q, "a") + assert names == names_expected + + @pytest.mark.parametrize( + "config", + [ + ((0.25, 0.5), "a_q0.25_q0.50"), + ((0.2501, 0.4999), "a_q0.25_q0.50"), + ([(0.25, 0.5)], ["a_q0.25_q0.50"]), + ([(0.25, 0.50), (0.6, 0.75)], ["a_q0.25_q0.50", "a_q0.60_q0.75"]), + ], + ) + def test_quantile_interval_names(self, config): + q, names_expected = config + names = quantile_interval_names(q, "a") + assert names == names_expected diff --git a/darts/timeseries.py b/darts/timeseries.py index edadecb422..f6fe161464 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -74,6 +74,7 @@ # dimension names in the DataArray # the "time" one can be different, if it has a name in the underlying Series/DataFrame. DIMS = ("time", "component", "sample") +AXES = {"time": 0, "component": 1, "sample": 2} VALID_INDEX_TYPES = (pd.DatetimeIndex, pd.RangeIndex) STATIC_COV_TAG = "static_covariates" @@ -1361,15 +1362,20 @@ def bottom_level_series(self) -> Optional[List[Self]]: else None ) + @property + def shape(self) -> Tuple[int]: + """The shape of the series (n_timesteps, n_components, n_samples).""" + return self._xa.shape + @property def n_samples(self) -> int: """Number of samples contained in the series.""" - return len(self._xa.sample) + return self.shape[AXES["sample"]] @property def n_components(self) -> int: """Number of components (dimensions) contained in the series.""" - return len(self._xa.component) + return self.shape[AXES["component"]] @property def width(self) -> int: @@ -1379,12 +1385,12 @@ def width(self) -> int: @property def n_timesteps(self) -> int: """Number of time steps in the series.""" - return len(self._time_index) + return self.shape[AXES["time"]] @property def is_deterministic(self) -> bool: """Whether this series is deterministic.""" - return self.n_samples == 1 + return self.shape[AXES["sample"]] == 1 @property def is_stochastic(self) -> bool: @@ -1399,7 +1405,7 @@ def is_probabilistic(self) -> bool: @property def is_univariate(self) -> bool: """Whether this series is univariate.""" - return self.n_components == 1 + return self.shape[AXES["component"]] == 1 @property def freq(self) -> Union[pd.DateOffset, int]: diff --git a/darts/utils/likelihood_models.py b/darts/utils/likelihood_models.py index 6d4d7fd1a1..75b7d86f07 100644 --- a/darts/utils/likelihood_models.py +++ b/darts/utils/likelihood_models.py @@ -60,7 +60,11 @@ from darts.logging import raise_if_not # TODO: Table on README listing distribution, possible priors and wiki article -from darts.utils.utils import _check_quantiles +from darts.utils.utils import ( + _check_quantiles, + likelihood_component_names, + quantile_names, +) MIN_CAUCHY_GAMMA_SAMPLING = 1e-100 @@ -1354,11 +1358,10 @@ def _params_from_output(self, model_output: torch.Tensor) -> None: def likelihood_components_names(self, input_series: TimeSeries) -> List[str]: """Each component have their own quantiles""" - return [ - f"{tgt_name}_q{quantile:.2f}" - for tgt_name in input_series.components - for quantile in self.quantiles - ] + return likelihood_component_names( + components=input_series.components, + parameter_names=quantile_names(self.quantiles), + ) def simplified_name(self) -> str: return "quantile" diff --git a/darts/utils/utils.py b/darts/utils/utils.py index 125e1b8afc..c7c2480215 100644 --- a/darts/utils/utils.py +++ b/darts/utils/utils.py @@ -6,7 +6,7 @@ from enum import Enum from functools import wraps from inspect import Parameter, getcallargs, signature -from typing import Callable, Iterator, List, Optional, Tuple, TypeVar, Union +from typing import Callable, Iterator, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np import pandas as pd @@ -67,6 +67,66 @@ class ModelMode(Enum): } +def likelihood_component_names( + components: Union[pd.Index, List[str]], parameter_names: List[str] +): + """Generates formatted likelihood parameter names for components and parameter names. + + The order of the returned names is: `[comp1_param_1, ... comp1_param_n, ..., comp_n_param_n]`. + + Parameters + ---------- + components + A sequence of component names to add to the beginning of the returned names. + parameter_names + A sequence of likelihood parameter names to add to the end of the returned names. + """ + return [ + f"{tgt_name}_{param_n}" + for tgt_name in components + for param_n in parameter_names + ] + + +def quantile_names(q: Union[float, List[float]], component: Optional[str] = None): + """Generates formatted quantile names, optionally added to a component name. + + Parameters + ---------- + q + A float or list of floats with the quantiles to generate the names for. + component + Optionally, a component name to add to the beginning of the quantile names. + """ + # predicted quantile text format + comp = f"{component}_" if component is not None else "" + if isinstance(q, float): + return f"{comp}q{q:.2f}" + else: + return [f"{comp}q{q_i:.2f}" for q_i in q] + + +def quantile_interval_names( + q_interval: Union[Tuple[float, float], Sequence[Tuple[float, float]]], + component: Optional[str] = None, +): + """Generates formatted quantile interval names, optionally added to a component name. + + Parameters + ---------- + q_interval + A tuple or multiple tuples with the (lower bound, upper bound) of the quantile intervals. + component + Optionally, a component name to add to the beginning of the quantile names. + """ + # predicted quantile text format + comp = f"{component}_" if component is not None else "" + if isinstance(q_interval, tuple): + return f"{comp}q{q_interval[0]:.2f}_q{q_interval[1]:.2f}" + else: + return [f"{comp}q{q_lo:.2f}_q{q_hi:.2f}" for q_lo, q_hi in q_interval] + + def _build_tqdm_iterator(iterable, verbose, **kwargs): """ Build an iterable, possibly using tqdm (either in notebook or regular mode)