diff --git a/README.html b/README.html index 08038d6b81..be2257aa1a 100644 --- a/README.html +++ b/README.html @@ -553,14 +553,14 @@
Prophet (see install notes)
Prophet (see install notes)
🟩 🟥
🟥 🟩 🟥
🟩 🟥
🟥
FFT (Fast Fourier Transform)
FFT (Fast Fourier Transform)
🟩 🟥
🟥 🟥 🟥
-"""
-StatsForecast utils
------------
-"""
-
-import numpy as np
+import numpy as np
# In a normal distribution, 68.27 percentage of values lie within one standard deviation of the mean
one_sigma_rule = 68.27
@@ -181,7 +176,7 @@ Source code for darts.models.components.statsforecast_utils
std: float,
num_samples: int,
n: int,
-) -> np.array:
+) -> np.ndarray:
"""Generate samples assuming a Normal distribution."""
samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T
samples = np.expand_dims(samples, axis=1)
diff --git a/_modules/darts/models/forecasting/arima.html b/_modules/darts/models/forecasting/arima.html
index c28ee32abc..cbfb7a9524 100644
--- a/_modules/darts/models/forecasting/arima.html
+++ b/_modules/darts/models/forecasting/arima.html
@@ -238,14 +238,37 @@ Source code for darts.models.forecasting.arima
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import ARIMA
+ >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+ >>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # define ARIMA parameters
+ >>> model = ARIMA(p=12, d=1, q=2)
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> pred.values()
+ array([[451.36489334],
+ [416.88972829],
+ [443.10520391],
+ [481.07892911],
+ [502.11286509],
+ [555.50153984]])
"""
super().__init__(add_encoders=add_encoders)
self.order = p, d, q
diff --git a/_modules/darts/models/forecasting/auto_arima.html b/_modules/darts/models/forecasting/auto_arima.html
index d08744a29f..78016c7396 100644
--- a/_modules/darts/models/forecasting/auto_arima.html
+++ b/_modules/darts/models/forecasting/auto_arima.html
@@ -221,14 +221,37 @@
Source code for darts.models.forecasting.auto_arima
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import AutoARIMA
+ >>> from darts.utils.timeseries_generation import holidays_timeseries
+ >>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # define some boundaries for the parameters
+ >>> model = AutoARIMA(start_p=8, max_p=12, start_q=1)
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> pred.values()
+ array([[449.79716178],
+ [416.31180633],
+ [445.28005229],
+ [485.27121314],
+ [507.61787454],
+ [561.26993332]])
"""
super().__init__(add_encoders=add_encoders)
self.model = PmdAutoARIMA(*autoarima_args, **autoarima_kwargs)
diff --git a/_modules/darts/models/forecasting/baselines.html b/_modules/darts/models/forecasting/baselines.html
index 81d792f0e1..58638e1f61 100644
--- a/_modules/darts/models/forecasting/baselines.html
+++ b/_modules/darts/models/forecasting/baselines.html
@@ -193,6 +193,22 @@ Source code for darts.models.forecasting.baselines
This model has no parameter, and always predicts the
mean value of the training series.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import NaiveMean
+ >>> series = AirPassengersDataset().load()
+ >>> model = NaiveMean()
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[280.29861111],
+ [280.29861111],
+ [280.29861111],
+ [280.29861111],
+ [280.29861111],
+ [280.29861111]])
"""
super().__init__()
self.mean_val = None
@@ -230,6 +246,23 @@ Source code for darts.models.forecasting.baselines
----------
K
the number of last time steps of the training set to repeat
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import NaiveSeasonal
+ >>> series = AirPassengersDataset().load()
+ # prior analysis suggested seasonality of 12
+ >>> model = NaiveSeasonal(K=12)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[417.],
+ [391.],
+ [419.],
+ [461.],
+ [472.],
+ [535.]])
"""
super().__init__()
self.last_k_vals = None
@@ -273,6 +306,22 @@ Source code for darts.models.forecasting.baselines
and extends it in the future. For a training series of length :math:`T`, we have:
.. math:: \\hat{y}_{T+h} = y_T + h\\left( \\frac{y_T - y_1}{T - 1} \\right)
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import NaiveDrift
+ >>> series = AirPassengersDataset().load()
+ >>> model = NaiveDrift()
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[434.23776224],
+ [436.47552448],
+ [438.71328671],
+ [440.95104895],
+ [443.18881119],
+ [445.42657343]])
"""
super().__init__()
@@ -314,6 +363,22 @@ Source code for darts.models.forecasting.baselines
----------
input_chunk_length
The size of the sliding window used to calculate the moving average
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import NaiveMovingAverage
+ >>> series = AirPassengersDataset().load()
+ # using the average of the last 6 months
+ >>> model = NaiveMovingAverage(input_chunk_length=6)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[503.16666667],
+ [483.36111111],
+ [462.9212963 ],
+ [455.40817901],
+ [454.47620885],
+ [465.22224366]])
"""
super().__init__()
self.input_chunk_length = input_chunk_length
@@ -367,7 +432,8 @@ Source code for darts.models.forecasting.baselines
[docs]class NaiveEnsembleModel(EnsembleModel):
def __init__(
self,
- models: List[ForecastingModel],
+ forecasting_models: List[ForecastingModel],
+ train_forecasting_models: bool = True,
show_warnings: bool = True,
):
"""Naive combination model
@@ -380,18 +446,44 @@ Source code for darts.models.forecasting.baselines
Parameters
----------
- models
+ forecasting_models
List of forecasting models whose predictions to ensemble
+ train_forecasting_models
+ Whether to train the `forecasting_models` from scratch. If `False`, the models are not trained when calling
+ `fit()` and `predict()` can be called directly (only supported if all the `forecasting_models` are
+ pretrained `GlobalForecastingModels`). Default: ``True``.
show_warnings
Whether to show warnings related to models covariates support.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import NaiveEnsembleModel, NaiveSeasonal, LinearRegressionModel
+ >>> series = AirPassengersDataset().load()
+ >>> # defining the ensemble
+ >>> model = NaiveEnsembleModel([NaiveSeasonal(K=12), LinearRegressionModel(lags=4)])
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[439.23152974],
+ [431.41161602],
+ [439.72888401],
+ [453.70180806],
+ [454.96757177],
+ [485.16604194]])
"""
super().__init__(
- models=models,
- train_num_samples=None,
+ forecasting_models=forecasting_models,
+ train_num_samples=1,
train_samples_reduction=None,
+ train_forecasting_models=train_forecasting_models,
show_warnings=show_warnings,
)
+ # ensemble model initialised with trained global models can directly call predict()
+ if self.all_trained and not train_forecasting_models:
+ self._fit_called = True
+
[docs] def fit(
self,
series: Union[TimeSeries, Sequence[TimeSeries]],
@@ -403,13 +495,13 @@ Source code for darts.models.forecasting.baselines
past_covariates=past_covariates,
future_covariates=future_covariates,
)
- for model in self.models:
- kwargs = dict(series=series)
- if model.supports_past_covariates:
- kwargs["past_covariates"] = past_covariates
- if model.supports_future_covariates:
- kwargs["future_covariates"] = future_covariates
- model.fit(**kwargs)
+ if self.train_forecasting_models:
+ for model in self.forecasting_models:
+ model._fit_wrapper(
+ series=series,
+ past_covariates=past_covariates,
+ future_covariates=future_covariates,
+ )
return self
@@ -429,9 +521,6 @@ Source code for darts.models.forecasting.baselines
logger,
)
- if series is None:
- series = self.training_series
-
if isinstance(predictions, Sequence):
return [
self._target_average(p, ts)
@@ -448,7 +537,7 @@ Source code for darts.models.forecasting.baselines
def _target_average(self, prediction: TimeSeries, series: TimeSeries) -> TimeSeries:
"""Average across the components, keep n_samples, rename components"""
- n_forecasting_models = len(self.models)
+ n_forecasting_models = len(self.forecasting_models)
n_components = series.n_components
prediction_values = prediction.all_values(copy=False)
target_values = np.zeros(
@@ -476,12 +565,12 @@ Source code for darts.models.forecasting.baselines
def _params_average(self, prediction: TimeSeries, series: TimeSeries) -> TimeSeries:
"""Average across the components after grouping by likelihood parameter, rename components"""
# str or torch Likelihood
- likelihood = getattr(self.models[0], "likelihood")
+ likelihood = getattr(self.forecasting_models[0], "likelihood")
if isinstance(likelihood, str):
- likelihood_n_params = self.models[0].num_parameters
+ likelihood_n_params = self.forecasting_models[0].num_parameters
else: # Likelihood
likelihood_n_params = likelihood.num_parameters
- n_forecasting_models = len(self.models)
+ n_forecasting_models = len(self.forecasting_models)
n_components = series.n_components
# aggregate across predictions [model1_param0, model1_param1, ..., modeln_param0, modeln_param1]
prediction_values = prediction.values(copy=False)
diff --git a/_modules/darts/models/forecasting/block_rnn_model.html b/_modules/darts/models/forecasting/block_rnn_model.html
index 12cf58d9be..14f688af88 100644
--- a/_modules/darts/models/forecasting/block_rnn_model.html
+++ b/_modules/darts/models/forecasting/block_rnn_model.html
@@ -176,7 +176,10 @@ Source code for darts.models.forecasting.block_rnn_model
import torch.nn as nn
from darts.logging import get_logger, raise_if_not
-from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLPastCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
logger = get_logger(__name__)
@@ -268,6 +271,7 @@ Source code for darts.models.forecasting.block_rnn_model
last = feature
self.fc = nn.Sequential(*feats)
+ @io_processor
def forward(self, x_in: Tuple):
x, _ = x_in
# data is of size (batch_size, input_chunk_length, input_size)
@@ -361,6 +365,9 @@ Source code for darts.models.forecasting.block_rnn_model
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [1]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -401,11 +408,14 @@ Source code for darts.models.forecasting.block_rnn_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -463,6 +473,41 @@ Source code for darts.models.forecasting.block_rnn_model
show_warnings
whether to show warnings raised from PyTorch Lightning. Useful to detect potential issues of
your forecasting use case. Default: ``False``.
+
+ References
+ ----------
+ .. [1] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import BlockRNNModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> model = BlockRNNModel(
+ >>> input_chunk_length=12,
+ >>> output_chunk_length=6,
+ >>> n_rnn_layers=2,
+ >>> n_epochs=50,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[4.97979827],
+ [3.9707572 ],
+ [5.27869295],
+ [5.19697244],
+ [5.28424783],
+ [5.22497681]])
+
+ .. note::
+ `RNN example notebook <https://unit8co.github.io/darts/examples/04-RNN-examples.html>`_ presents techniques
+ that can be used to improve the forecasts quality compared to this simple usage example.
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/catboost_model.html b/_modules/darts/models/forecasting/catboost_model.html
index 2e8d3a4618..3100579403 100644
--- a/_modules/darts/models/forecasting/catboost_model.html
+++ b/_modules/darts/models/forecasting/catboost_model.html
@@ -233,11 +233,14 @@ Source code for darts.models.forecasting.catboost_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -261,6 +264,35 @@ Source code for darts.models.forecasting.catboost_model
that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
**kwargs
Additional keyword arguments passed to `catboost.CatBoostRegressor`.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import CatBoostModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = CatBoostModel(
+ >>> lags=12,
+ >>> lags_past_covariates=12,
+ >>> lags_future_covariates=[0,1,2,3,4,5],
+ >>> output_chunk_length=6
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1006.4153701 ],
+ [1006.41907237],
+ [1006.30872957],
+ [1006.28614154],
+ [1006.22355514],
+ [1006.21607546]])
"""
kwargs["random_state"] = random_state # seed for tree learner
self.kwargs = kwargs
diff --git a/_modules/darts/models/forecasting/croston.html b/_modules/darts/models/forecasting/croston.html
index a76276a5a6..480617deb8 100644
--- a/_modules/darts/models/forecasting/croston.html
+++ b/_modules/darts/models/forecasting/croston.html
@@ -225,11 +225,14 @@ Source code for darts.models.forecasting.croston
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
@@ -241,6 +244,23 @@ Source code for darts.models.forecasting.croston
.. [2] Ruud H. Teunter, Aris A. Syntetos, and M. Zied Babai.
Intermittent demand: Linking forecasting to inventory obsolescence.
European Journal of Operational Research, 214(3):606 – 615, 2011.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import Croston
+ >>> series = AirPassengersDataset().load()
+ >>> # use the optimized version to automatically select best alpha parameter
+ >>> model = Croston(version="optimized")
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[461.7666],
+ [461.7666],
+ [461.7666],
+ [461.7666],
+ [461.7666],
+ [461.7666]])
"""
super().__init__(add_encoders=add_encoders)
raise_if_not(
diff --git a/_modules/darts/models/forecasting/dlinear.html b/_modules/darts/models/forecasting/dlinear.html
index 8c80fa64b1..b6fd0fb411 100644
--- a/_modules/darts/models/forecasting/dlinear.html
+++ b/_modules/darts/models/forecasting/dlinear.html
@@ -176,7 +176,10 @@ Source code for darts.models.forecasting.dlinear
import torch.nn as nn
from darts.logging import raise_if
-from darts.models.forecasting.pl_forecasting_module import PLMixedCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLMixedCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import MixedCovariatesTorchModel
MixedCovariatesTrainTensorType = Tuple[
@@ -322,6 +325,7 @@ Source code for darts.models.forecasting.dlinear
layer_in_dim_static_cov, layer_out_dim
)
+ @io_processor
def forward(
self, x_in: Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]
):
@@ -462,6 +466,9 @@ Source code for darts.models.forecasting.dlinear
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -502,11 +509,14 @@ Source code for darts.models.forecasting.dlinear
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -569,6 +579,41 @@ Source code for darts.models.forecasting.dlinear
----------
.. [1] Zeng, A., Chen, M., Zhang, L., & Xu, Q. (2022).
Are Transformers Effective for Time Series Forecasting?. arXiv preprint arXiv:2205.13504.
+ .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import DLinearModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = DLinearModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=20,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[667.20957388],
+ [666.76986848],
+ [666.67733306],
+ [666.06625381],
+ [665.8529289 ],
+ [665.75320573]])
+
+ .. note::
+ This simple usage example produces poor forecasts. In order to obtain better performance, user should
+ transform the input data, increase the number of epochs, use a validation set, optimize the hyper-
+ parameters, ...
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/ensemble_model.html b/_modules/darts/models/forecasting/ensemble_model.html
index 96b10139e0..37bf65103c 100644
--- a/_modules/darts/models/forecasting/ensemble_model.html
+++ b/_modules/darts/models/forecasting/ensemble_model.html
@@ -170,7 +170,6 @@ Source code for darts.models.forecasting.ensemble_model
"""
from abc import abstractmethod
-from functools import reduce
from typing import List, Optional, Sequence, Tuple, Union
from darts.logging import get_logger, raise_if, raise_if_not, raise_log
@@ -179,7 +178,7 @@ Source code for darts.models.forecasting.ensemble_model
GlobalForecastingModel,
LocalForecastingModel,
)
-from darts.timeseries import TimeSeries
+from darts.timeseries import TimeSeries, concatenate
from darts.utils.utils import series2seq
logger = get_logger(__name__)
@@ -196,7 +195,7 @@ Source code for darts.models.forecasting.ensemble_model
Parameters
----------
- models
+ forecasting_models
List of forecasting models whose predictions to ensemble
.. note::
@@ -206,29 +205,35 @@ Source code for darts.models.forecasting.ensemble_model
Number of prediction samples from each forecasting model for multi-level ensembles. The n_samples
dimension will be reduced using the `train_samples_reduction` method.
train_samples_reduction
- If `models` are probabilistic and `train_num_samples` > 1, method used to
- reduce the samples dimension to 1. Possible values: "mean", "median" or float value corresponding
- to the desired quantile.
+ If `forecasting_models` are probabilistic and `train_num_samples` > 1, method used to reduce the
+ samples dimension to 1. Possible values: "mean", "median" or float value corresponding to the
+ desired quantile.
+ retrain_forecasting_models
+ If set to `False`, the `forecasting_models` are not retrained when calling `fit()` (only supported
+ if all the `forecasting_models` are pretrained `GlobalForecastingModels`). Default: ``True``.
show_warnings
Whether to show warnings related to models covariates support.
"""
def __init__(
self,
- models: List[ForecastingModel],
+ forecasting_models: List[ForecastingModel],
train_num_samples: int,
- train_samples_reduction: Union[str, float],
+ train_samples_reduction: Optional[Union[str, float]],
+ train_forecasting_models: bool = True,
show_warnings: bool = True,
):
raise_if_not(
- isinstance(models, list) and models,
- "Cannot instantiate EnsembleModel with an empty list of models",
+ isinstance(forecasting_models, list) and forecasting_models,
+ "Cannot instantiate EnsembleModel with an empty list of `forecasting_models`",
logger,
)
- is_local_model = [isinstance(model, LocalForecastingModel) for model in models]
+ is_local_model = [
+ isinstance(model, LocalForecastingModel) for model in forecasting_models
+ ]
is_global_model = [
- isinstance(model, GlobalForecastingModel) for model in models
+ isinstance(model, GlobalForecastingModel) for model in forecasting_models
]
self.is_local_ensemble = all(is_local_model)
@@ -244,23 +249,46 @@ Source code for darts.models.forecasting.ensemble_model
]
),
"All models must be of type `GlobalForecastingModel`, or `LocalForecastingModel`. "
- "Also, make sure that all models in `forecasting_model/models` are instantiated.",
+ "Also, make sure that all `forecasting_models` are instantiated.",
logger,
)
+ model_fit_status = [m._fit_called for m in forecasting_models]
+ self.all_trained = all(model_fit_status)
+ some_trained = any(model_fit_status)
+
raise_if(
- any([m._fit_called for m in models]),
- "Cannot instantiate EnsembleModel with trained/fitted models. "
- "Consider resetting all models with `my_model.untrained_model()`",
+ (not self.is_global_ensemble and some_trained)
+ or (self.is_global_ensemble and not (self.all_trained or not some_trained)),
+ "Cannot instantiate EnsembleModel with a mixture of unfitted and fitted `forecasting_models`. "
+ "Consider resetting all models with `my_model.untrained_model()` or using only trained "
+ "GlobalForecastingModels together with `retrain_forecasting_models=False`.",
logger,
)
+ if train_forecasting_models:
+ # prevent issues with pytorch-lightning trainer during retraining
+ raise_if(
+ some_trained,
+ "`retrain_forecasting_models=True` but some `forecasting_models` were already fitted. "
+ "Consider resetting all the `forecasting_models` with `my_model.untrained_model()` "
+ "before passing them to the `EnsembleModel`.",
+ logger,
+ )
+ else:
+ raise_if_not(
+ self.is_global_ensemble and self.all_trained,
+ "`retrain_forecasting_models=False` is supported only if all the `forecasting_models` are "
+ "already trained `GlobalForecastingModels`.",
+ logger,
+ )
+
raise_if(
train_num_samples is not None
and train_num_samples > 1
- and all([not m._is_probabilistic for m in models]),
+ and all([not m._is_probabilistic for m in forecasting_models]),
"`train_num_samples` is greater than 1 but the `RegressionEnsembleModel` "
- "contains only deterministic models.",
+ "contains only deterministic `forecasting_models`.",
logger,
)
@@ -283,16 +311,20 @@ Source code for darts.models.forecasting.ensemble_model
)
else:
raise_log(
- f"`train_samples_reduction` type not supported "
- f"({train_samples_reduction}). Must be `float` "
- f" or one of {supported_reduction}.",
+ ValueError(
+ f"`train_samples_reduction` type not supported "
+ f"({train_samples_reduction}). Must be `float` "
+ f" or one of {supported_reduction}."
+ ),
logger,
)
super().__init__()
- self.models = models
+ self.forecasting_models = forecasting_models
self.train_num_samples = train_num_samples
self.train_samples_reduction = train_samples_reduction
+ self.train_forecasting_models = train_forecasting_models
+ self.show_warnings = show_warnings
if show_warnings:
if (
@@ -300,8 +332,8 @@ Source code for darts.models.forecasting.ensemble_model
and not self._full_past_covariates_support()
):
logger.warning(
- "Some models in the ensemble do not support past covariates, the past covariates will be "
- "provided only to the models supporting them when calling fit()` or `predict()`. "
+ "Some `forecasting_models` in the ensemble do not support past covariates, the past covariates "
+ "will be provided only to the models supporting them when calling fit()` or `predict()`. "
"To hide these warnings, set `show_warnings=False`."
)
@@ -310,8 +342,8 @@ Source code for darts.models.forecasting.ensemble_model
and not self._full_future_covariates_support()
):
logger.warning(
- "Some models in the ensemble do not support future covariates, the future covariates will be "
- "provided only to the models supporting them when calling `fit()` or `predict()`. "
+ "Some `forecasting_models` in the ensemble do not support future covariates, the future covariates"
+ " will be provided only to the models supporting them when calling `fit()` or `predict()`. "
"To hide these warnings, set `show_warnings=False`."
)
@@ -333,8 +365,8 @@ Source code for darts.models.forecasting.ensemble_model
# local models OR mix of local and global models
raise_if(
not self.is_global_ensemble and not is_single_series,
- "The models contain at least one LocalForecastingModel, which does not support training on multiple "
- "series.",
+ "The `forecasting_models` contain at least one LocalForecastingModel, which does not support training "
+ "on multiple series.",
logger,
)
@@ -364,7 +396,7 @@ Source code for darts.models.forecasting.ensemble_model
def _stack_ts_seq(self, predictions):
# stacks list of predictions into one multivariate timeseries
- return reduce(lambda a, b: a.stack(b), predictions)
+ return concatenate(predictions, axis=1)
def _stack_ts_multiseq(self, predictions_list):
# stacks multiple sequences of timeseries elementwise
@@ -372,7 +404,7 @@ Source code for darts.models.forecasting.ensemble_model
def _model_encoder_settings(self):
raise NotImplementedError(
- "Encoders are not supported by EnsembleModels. Instead add encoder to the underlying `models`."
+ "Encoders are not supported by EnsembleModels. Instead add encoder to the underlying `forecasting_models`."
)
def _make_multiple_predictions(
@@ -383,7 +415,7 @@ Source code for darts.models.forecasting.ensemble_model
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
num_samples: int = 1,
predict_likelihood_parameters: bool = False,
- ):
+ ) -> Union[TimeSeries, Sequence[TimeSeries]]:
is_single_series = isinstance(series, TimeSeries) or series is None
# maximize covariate usage
predictions = [
@@ -399,15 +431,11 @@ Source code for darts.models.forecasting.ensemble_model
num_samples=num_samples if model._is_probabilistic else 1,
predict_likelihood_parameters=predict_likelihood_parameters,
)
- for model in self.models
+ for model in self.forecasting_models
]
# reduce the probabilistics series
- if (
- self.train_samples_reduction is not None
- and self.train_num_samples is not None
- and self.train_num_samples > 1
- ):
+ if self.train_samples_reduction is not None and self.train_num_samples > 1:
predictions = [
self._predictions_reduction(prediction) for prediction in predictions
]
@@ -428,6 +456,13 @@ Source code for darts.models.forecasting.ensemble_model
verbose: bool = False,
predict_likelihood_parameters: bool = False,
) -> Union[TimeSeries, Sequence[TimeSeries]]:
+ # ensure forecasting models all rely on the same series during inference
+ if series is None:
+ series = self.training_series
+ if past_covariates is None:
+ past_covariates = self.past_covariate_series
+ if future_covariates is None:
+ future_covariates = self.future_covariate_series
super().predict(
n=n,
@@ -511,11 +546,11 @@ Source code for darts.models.forecasting.ensemble_model
@property
def min_train_series_length(self) -> int:
- return max(model.min_train_series_length for model in self.models)
+ return max(model.min_train_series_length for model in self.forecasting_models)
@property
def min_train_samples(self) -> int:
- return max(model.min_train_samples for model in self.models)
+ return max(model.min_train_samples for model in self.forecasting_models)
@property
def extreme_lags(
@@ -530,7 +565,7 @@ Source code for darts.models.forecasting.ensemble_model
]:
def find_max_lag_or_none(lag_id, aggregator) -> Optional[int]:
max_lag = None
- for model in self.models:
+ for model in self.forecasting_models:
curr_lag = model.extreme_lags[lag_id]
if max_lag is None:
max_lag = curr_lag
@@ -550,7 +585,7 @@ Source code for darts.models.forecasting.ensemble_model
"""
tmp = [
m.output_chunk_length
- for m in self.models
+ for m in self.forecasting_models
if m.output_chunk_length is not None
]
@@ -561,7 +596,7 @@ Source code for darts.models.forecasting.ensemble_model
@property
def _models_are_probabilistic(self) -> bool:
- return all([model._is_probabilistic for model in self.models])
+ return all([model._is_probabilistic for model in self.forecasting_models])
@property
def _models_same_likelihood(self) -> bool:
@@ -572,7 +607,7 @@ Source code for darts.models.forecasting.ensemble_model
models_likelihood = set()
lkl_same_params = True
tmp_quantiles = None
- for m in self.models:
+ for m in self.forecasting_models:
# regression model likelihood is a string, torch-based model likelihoods is an object
likelihood = getattr(m, "likelihood")
is_obj_lkl = not isinstance(likelihood, str)
@@ -598,7 +633,15 @@ Source code for darts.models.forecasting.ensemble_model
"""EnsembleModel can predict likelihood parameters if all its forecasting models were fitted with the
same likelihood.
"""
- return self._models_same_likelihood
+ return (
+ all(
+ [
+ m.supports_likelihood_parameter_prediction
+ for m in self.forecasting_models
+ ]
+ )
+ and self._models_same_likelihood
+ )
@property
def _is_probabilistic(self) -> bool:
@@ -606,21 +649,33 @@ Source code for darts.models.forecasting.ensemble_model
@property
def supports_multivariate(self) -> bool:
- return all([model.supports_multivariate for model in self.models])
+ return all([model.supports_multivariate for model in self.forecasting_models])
@property
def supports_past_covariates(self) -> bool:
- return any([model.supports_past_covariates for model in self.models])
+ return any(
+ [model.supports_past_covariates for model in self.forecasting_models]
+ )
@property
def supports_future_covariates(self) -> bool:
- return any([model.supports_future_covariates for model in self.models])
+ return any(
+ [model.supports_future_covariates for model in self.forecasting_models]
+ )
+
+ @property
+ def _supports_non_retrainable_historical_forecasts(self) -> bool:
+ return self.is_global_ensemble
def _full_past_covariates_support(self) -> bool:
- return all([model.supports_past_covariates for model in self.models])
+ return all(
+ [model.supports_past_covariates for model in self.forecasting_models]
+ )
def _full_future_covariates_support(self) -> bool:
- return all([model.supports_future_covariates for model in self.models])
+ return all(
+ [model.supports_future_covariates for model in self.forecasting_models]
+ )
def _verify_past_future_covariates(self, past_covariates, future_covariates):
"""
@@ -629,13 +684,13 @@ Source code for darts.models.forecasting.ensemble_model
raise_if(
past_covariates is not None and not self.supports_past_covariates,
"`past_covariates` were provided to an `EnsembleModel` but none of its "
- "base models support such covariates.",
+ "`forecasting_models` support such covariates.",
logger,
)
raise_if(
future_covariates is not None and not self.supports_future_covariates,
"`future_covariates` were provided to an `EnsembleModel` but none of its "
- "base models support such covariates.",
+ "`forecasting_models` support such covariates.",
logger,
)
diff --git a/_modules/darts/models/forecasting/exponential_smoothing.html b/_modules/darts/models/forecasting/exponential_smoothing.html
index fbfb000de8..d5bef01109 100644
--- a/_modules/darts/models/forecasting/exponential_smoothing.html
+++ b/_modules/darts/models/forecasting/exponential_smoothing.html
@@ -233,6 +233,24 @@ Source code for darts.models.forecasting.exponential_smoothing
:func:`statsmodels.tsa.holtwinters.ExponentialSmoothing.fit()`.
See `the documentation
<https://www.statsmodels.org/stable/generated/statsmodels.tsa.holtwinters.ExponentialSmoothing.fit.html>`_.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import ExponentialSmoothing
+ >>> from darts.utils.utils import ModelMode, SeasonalityMode
+ >>> series = AirPassengersDataset().load()
+ >>> # using Holt's exponential smoothing
+ >>> model = ExponentialSmoothing(trend=ModelMode.ADDITIVE, seasonal=SeasonalityMode.MULTIPLICATIVE)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[445.24283838],
+ [418.22618932],
+ [465.31305075],
+ [494.95129261],
+ [505.4770514 ],
+ [573.31519186]])
"""
super().__init__()
self.trend = trend
diff --git a/_modules/darts/models/forecasting/fft.html b/_modules/darts/models/forecasting/fft.html
index b8e8e26a08..b82ab5ff56 100644
--- a/_modules/darts/models/forecasting/fft.html
+++ b/_modules/darts/models/forecasting/fft.html
@@ -420,6 +420,30 @@ Source code for darts.models.forecasting.fft
global trend, and do not perform any frequency filtering:
>>> FFT(required_matches={'month'}, trend='exp')
+
+ Simple usage example, using one of the dataset available in darts
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import FFT
+ >>> series = AirPassengersDataset().load()
+ >>> # increase the number of frequency and use a polynomial trend of degree 2
+ >>> model = FFT(
+ >>> nr_freqs_to_keep=20,
+ >>> trend= "poly",
+ >>> trend_poly_degree=2
+ >>> )
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[471.79323146],
+ [494.6381425 ],
+ [504.5659999 ],
+ [515.82463265],
+ [520.59404623],
+ [547.26720705]])
+
+ .. note::
+ `FFT example notebook <https://unit8co.github.io/darts/examples/03-FFT-examples.html>`_ presents techniques
+ that can be used to improve the forecasts quality compared to this simple usage example.
"""
super().__init__()
self.nr_freqs_to_keep = nr_freqs_to_keep
diff --git a/_modules/darts/models/forecasting/forecasting_model.html b/_modules/darts/models/forecasting/forecasting_model.html
index 5571003ca1..71d1f24f5f 100644
--- a/_modules/darts/models/forecasting/forecasting_model.html
+++ b/_modules/darts/models/forecasting/forecasting_model.html
@@ -191,6 +191,11 @@ Source code for darts.models.forecasting.forecasting_model
from random import sample
from typing import Any, BinaryIO, Callable, Dict, List, Optional, Sequence, Tuple, Union
+try:
+ from typing import Literal
+except ImportError:
+ from typing_extensions import Literal
+
import numpy as np
import pandas as pd
@@ -701,12 +706,40 @@ Source code for darts.models.forecasting.forecasting_model
series = args[0]
_historical_forecasts_general_checks(self, series, kwargs)
- def _get_last_prediction_time(self, series, forecast_horizon, overlap_end):
+ def _get_last_prediction_time(
+ self,
+ series,
+ forecast_horizon,
+ overlap_end,
+ latest_possible_prediction_start,
+ ):
+ # when overlap_end=True, we can simply use the precomputed last possible prediction start point
if overlap_end:
- last_valid_pred_time = series.time_index[-1]
- else:
+ return latest_possible_prediction_start
+
+ # (1) otherwise, we have to step `forecast_horizon` steps back.
+ # (2) additionally, we check whether the `latest_possible_prediction_start` was shifted back
+ # from the overall theoretical latest possible prediction start point (which is by definition
+ # the first time step after the end of the target series) due to too short covariates.
+ theoretical_latest_prediction_start = series.end_time() + series.freq
+ if latest_possible_prediction_start == theoretical_latest_prediction_start:
+ # (1)
last_valid_pred_time = series.time_index[-forecast_horizon]
-
+ else:
+ # (2)
+ covariates_shift = (
+ len(
+ generate_index(
+ start=latest_possible_prediction_start,
+ end=theoretical_latest_prediction_start,
+ freq=series.freq,
+ )
+ )
+ - 2
+ )
+ last_valid_pred_time = series.time_index[
+ -(forecast_horizon + covariates_shift)
+ ]
return last_valid_pred_time
def _check_optimizable_historical_forecasts(
@@ -727,6 +760,7 @@ Source code for darts.models.forecasting.forecasting_model
num_samples: int = 1,
train_length: Optional[int] = None,
start: Optional[Union[pd.Timestamp, float, int]] = None,
+ start_format: Literal["position", "value"] = "value",
forecast_horizon: int = 1,
stride: int = 1,
retrain: Union[bool, int, Callable[..., bool]] = True,
@@ -776,15 +810,14 @@ Source code for darts.models.forecasting.forecasting_model
steps available, all steps up until prediction time are used, as in default case. Needs to be at least
`min_train_series_length`.
start
- Optionally, the first point in time at which a prediction is computed for a future time.
- This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``.
- If a ``float``, the parameter will be treated as the proportion of the time series
- that should lie before the first prediction point.
- If an ``int``, the parameter will be treated as an integer index to the time index of
- `series` that will be used as first prediction time.
- If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time
- directly.
- If ``None``, the first prediction time will automatically be set to:
+ Optionally, the first point in time at which a prediction is computed. This parameter supports:
+ ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+ If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+ If an ``int``, it is either the index position of the first prediction point for `series` with a
+ `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+ the index position with `start_format="position"`.
+ If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+ If ``None``, the first prediction point will automatically be set to:
- the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
predictable point is earlier than the first trainable point.
@@ -795,6 +828,13 @@ Source code for darts.models.forecasting.forecasting_model
Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
(default behavior with ``None``) and start at the first trainable/predictable point.
+ start_format
+ Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a
+ `pd.RangeIndex`.
+ If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+ from `(-len(series), len(series) - 1)`.
+ If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+ an error if the value is not in `series`' index. Default: ``'value'``
forecast_horizon
The forecast horizon for the predictions.
stride
@@ -965,6 +1005,7 @@ Source code for darts.models.forecasting.forecasting_model
future_covariates=future_covariates,
num_samples=num_samples,
start=start,
+ start_format=start_format,
forecast_horizon=forecast_horizon,
stride=stride,
overlap_end=overlap_end,
@@ -1043,6 +1084,7 @@ Source code for darts.models.forecasting.forecasting_model
forecast_horizon=forecast_horizon,
overlap_end=overlap_end,
start=start,
+ start_format=start_format,
show_warnings=show_warnings,
)
@@ -1075,7 +1117,10 @@ Source code for darts.models.forecasting.forecasting_model
# iterate and forecast
for _counter, pred_time in enumerate(iterator):
# drop everything after `pred_time` to train on / predict with shifting input
- train_series = series_.drop_after(pred_time)
+ if pred_time <= series_.end_time():
+ train_series = series_.drop_after(pred_time)
+ else:
+ train_series = series_
# optionally, apply moving window (instead of expanding window)
if train_length_ and len(train_series) > train_length_:
@@ -1197,6 +1242,7 @@ Source code for darts.models.forecasting.forecasting_model
num_samples: int = 1,
train_length: Optional[int] = None,
start: Optional[Union[pd.Timestamp, float, int]] = None,
+ start_format: Literal["position", "value"] = "value",
forecast_horizon: int = 1,
stride: int = 1,
retrain: Union[bool, int, Callable[..., bool]] = True,
@@ -1252,25 +1298,31 @@ Source code for darts.models.forecasting.forecasting_model
steps available, all steps up until prediction time are used, as in default case. Needs to be at least
`min_train_series_length`.
start
- Optionally, the first point in time at which a prediction is computed for a future time.
- This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``.
- If a ``float``, the parameter will be treated as the proportion of the time series
- that should lie before the first prediction point.
- If an ``int``, the parameter will be treated as an integer index to the time index of
- `series` that will be used as first prediction time.
- If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time
- directly.
- If ``None``, the first prediction time will automatically be set to:
- - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
- predictable point is earlier than the first trainable point.
-
- - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
- or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
-
- - the first trainable point (given `train_length`) otherwise
+ Optionally, the first point in time at which a prediction is computed. This parameter supports:
+ ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+ If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+ If an ``int``, it is either the index position of the first prediction point for `series` with a
+ `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+ the index position with `start_format="position"`.
+ If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+ If ``None``, the first prediction point will automatically be set to:
+
+ - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
+ predictable point is earlier than the first trainable point.
+ - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
+ or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
+ - the first trainable point (given `train_length`) otherwise
+
Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
(default behavior with ``None``) and start at the first trainable/predictable point.
+ start_format
+ Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a
+ `pd.RangeIndex`.
+ If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+ from `(-len(series), len(series) - 1)`.
+ If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+ an error if the value is not in `series`' index. Default: ``'value'``
forecast_horizon
The forecast horizon for the point predictions.
stride
@@ -1327,6 +1379,7 @@ Source code for darts.models.forecasting.forecasting_model
num_samples=num_samples,
train_length=train_length,
start=start,
+ start_format=start_format,
forecast_horizon=forecast_horizon,
stride=stride,
retrain=retrain,
@@ -1377,6 +1430,7 @@ Source code for darts.models.forecasting.forecasting_model
forecast_horizon: Optional[int] = None,
stride: int = 1,
start: Union[pd.Timestamp, float, int] = 0.5,
+ start_format: Literal["position", "value"] = "value",
last_points_only: bool = False,
show_warnings: bool = True,
val_series: Optional[TimeSeries] = None,
@@ -1442,17 +1496,38 @@ Source code for darts.models.forecasting.forecasting_model
forecast_horizon
The integer value of the forecasting horizon. Activates expanding window mode.
stride
- The number of time steps between two consecutive predictions. Only used in expanding window mode.
+ Only used in expanding window mode. The number of time steps between two consecutive predictions.
start
- The ``int``, ``float`` or ``pandas.Timestamp`` that represents the starting point in the time index
- of `series` from which predictions will be made to evaluate the model.
- For a detailed description of how the different data types are interpreted, please see the documentation
- for `ForecastingModel.backtest`. Only used in expanding window mode.
+ Only used in expanding window mode. Optionally, the first point in time at which a prediction is computed.
+ This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+ If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+ If an ``int``, it is either the index position of the first prediction point for `series` with a
+ `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+ the index position with `start_format="position"`.
+ If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+ If ``None``, the first prediction point will automatically be set to:
+
+ - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
+ predictable point is earlier than the first trainable point.
+ - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
+ or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
+ - the first trainable point (given `train_length`) otherwise
+
+ Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
+ Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
+ (default behavior with ``None``) and start at the first trainable/predictable point.
+ start_format
+ Only used in expanding window mode. Defines the `start` format. Only effective when `start` is an integer
+ and `series` is indexed with a `pd.RangeIndex`.
+ If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+ from `(-len(series), len(series) - 1)`.
+ If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+ an error if the value is not in `series`' index. Default: ``'value'``
last_points_only
- Whether to use the whole forecasts or only the last point of each forecast to compute the error. Only used
- in expanding window mode.
+ Only used in expanding window mode. Whether to use the whole forecasts or only the last point of each
+ forecast to compute the error.
show_warnings
- Whether to show warnings related to the `start` parameter. Only used in expanding window mode.
+ Only used in expanding window mode. Whether to show warnings related to the `start` parameter.
val_series
The TimeSeries instance used for validation in split mode. If provided, this series must start right after
the end of `series`; so that a proper comparison of the forecast can be made.
@@ -1553,6 +1628,7 @@ Source code for darts.models.forecasting.forecasting_model
future_covariates=future_covariates,
num_samples=1,
start=start,
+ start_format=start_format,
forecast_horizon=forecast_horizon,
stride=stride,
metric=metric,
@@ -2060,6 +2136,7 @@ Source code for darts.models.forecasting.forecasting_model
future_covariates: Optional[Sequence[TimeSeries]] = None,
num_samples: int = 1,
start: Optional[Union[pd.Timestamp, float, int]] = None,
+ start_format: Literal["position", "value"] = "value",
forecast_horizon: int = 1,
stride: int = 1,
overlap_end: bool = False,
@@ -2316,13 +2393,13 @@ Source code for darts.models.forecasting.forecasting_model
def _predict_wrapper(
self,
n: int,
- series: TimeSeries,
- past_covariates: Optional[TimeSeries],
- future_covariates: Optional[TimeSeries],
+ series: Union[TimeSeries, Sequence[TimeSeries]],
+ past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
+ future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
num_samples: int,
verbose: bool = False,
predict_likelihood_parameters: bool = False,
- ) -> TimeSeries:
+ ) -> Union[TimeSeries, Sequence[TimeSeries]]:
kwargs = dict()
if self.supports_likelihood_parameter_prediction:
kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters
@@ -2338,9 +2415,9 @@ Source code for darts.models.forecasting.forecasting_model
def _fit_wrapper(
self,
- series: TimeSeries,
- past_covariates: Optional[TimeSeries],
- future_covariates: Optional[TimeSeries],
+ series: Union[TimeSeries, Sequence[TimeSeries]],
+ past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
+ future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
):
self.fit(
series=series,
diff --git a/_modules/darts/models/forecasting/kalman_forecaster.html b/_modules/darts/models/forecasting/kalman_forecaster.html
index 2b7410f888..c270c6b0e2 100644
--- a/_modules/darts/models/forecasting/kalman_forecaster.html
+++ b/_modules/darts/models/forecasting/kalman_forecaster.html
@@ -234,14 +234,42 @@ Source code for darts.models.forecasting.kalman_forecaster
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import KalmanForecaster
+ >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+ >>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # increasing the size of the state vector
+ >>> model = KalmanForecaster(dim_x=12)
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> pred.values()
+ array([[474.40680728],
+ [440.51801726],
+ [461.94512461],
+ [494.42090089],
+ [528.6436328 ],
+ [590.30647185]])
+
+ .. note::
+ `Kalman example notebook <https://unit8co.github.io/darts/examples/10-Kalman-filter-examples.html>`_
+ presents techniques that can be used to improve the forecasts quality compared to this simple usage
+ example.
"""
super().__init__(add_encoders=add_encoders)
self.dim_x = dim_x
diff --git a/_modules/darts/models/forecasting/lgbm.html b/_modules/darts/models/forecasting/lgbm.html
index 96229c4787..a881665f16 100644
--- a/_modules/darts/models/forecasting/lgbm.html
+++ b/_modules/darts/models/forecasting/lgbm.html
@@ -177,13 +177,15 @@ Source code for darts.models.forecasting.lgbm
https://github.com/unit8co/darts/blob/master/INSTALL.md
"""
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
import lightgbm as lgb
import numpy as np
from darts.logging import get_logger
from darts.models.forecasting.regression_model import (
+ FUTURE_LAGS_TYPE,
+ LAGS_TYPE,
RegressionModelWithCategoricalCovariates,
_LikelihoodMixin,
)
@@ -195,13 +197,13 @@ Source code for darts.models.forecasting.lgbm
[docs]class LightGBMModel(RegressionModelWithCategoricalCovariates, _LikelihoodMixin):
def __init__(
self,
- lags: Union[int, list] = None,
- lags_past_covariates: Union[int, List[int]] = None,
- lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+ lags: Optional[LAGS_TYPE] = None,
+ lags_past_covariates: Optional[LAGS_TYPE] = None,
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
- likelihood: str = None,
- quantiles: List[float] = None,
+ likelihood: Optional[str] = None,
+ quantiles: Optional[List[float]] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
use_static_covariates: bool = True,
@@ -215,17 +217,33 @@ Source code for darts.models.forecasting.lgbm
Parameters
----------
lags
- Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
- are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+ Lagged target `series` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `series` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_past_covariates
- Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
- `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
- with lags < 0 is required.
+ Lagged `past_covariates` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+ where `0` corresponds to the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_future_covariates
- Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
- given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
- `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
- of integers with lags is required.
+ Lagged `future_covariates` values used to predict the next time step/s.
+ If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+ future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+ 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -242,11 +260,14 @@ Source code for darts.models.forecasting.lgbm
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -281,6 +302,36 @@ Source code for darts.models.forecasting.lgbm
treated as categorical are integer-encoded.
**kwargs
Additional keyword arguments passed to `lightgbm.LGBRegressor`.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import LightGBMModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = LightGBMModel(
+ >>> lags=12,
+ >>> lags_past_covariates=12,
+ >>> lags_future_covariates=[0,1,2,3,4,5],
+ >>> output_chunk_length=6,
+ >>> verbose=-1
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1006.85376674],
+ [1006.83998586],
+ [1006.63884831],
+ [1006.57201255],
+ [1006.52290556],
+ [1006.39550065]])
"""
kwargs["random_state"] = random_state # seed for tree learner
self.kwargs = kwargs
diff --git a/_modules/darts/models/forecasting/linear_regression_model.html b/_modules/darts/models/forecasting/linear_regression_model.html
index 8a0ff0f95d..b819543453 100644
--- a/_modules/darts/models/forecasting/linear_regression_model.html
+++ b/_modules/darts/models/forecasting/linear_regression_model.html
@@ -172,14 +172,19 @@ Source code for darts.models.forecasting.linear_regression_model
A forecasting model using a linear regression of some of the target series' lags, as well as optionally some
covariate series lags in order to obtain a forecast.
"""
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
import numpy as np
from scipy.optimize import linprog
from sklearn.linear_model import LinearRegression, PoissonRegressor, QuantileRegressor
from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin
+from darts.models.forecasting.regression_model import (
+ FUTURE_LAGS_TYPE,
+ LAGS_TYPE,
+ RegressionModel,
+ _LikelihoodMixin,
+)
from darts.timeseries import TimeSeries
logger = get_logger(__name__)
@@ -188,13 +193,13 @@ Source code for darts.models.forecasting.linear_regression_model
[docs]class LinearRegressionModel(RegressionModel, _LikelihoodMixin):
def __init__(
self,
- lags: Union[int, list] = None,
- lags_past_covariates: Union[int, List[int]] = None,
- lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+ lags: Optional[LAGS_TYPE] = None,
+ lags_past_covariates: Optional[LAGS_TYPE] = None,
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
- likelihood: str = None,
- quantiles: List[float] = None,
+ likelihood: Optional[str] = None,
+ quantiles: Optional[List[float]] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
use_static_covariates: bool = True,
@@ -205,17 +210,33 @@ Source code for darts.models.forecasting.linear_regression_model
Parameters
----------
lags
- Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
- are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+ Lagged target `series` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `series` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_past_covariates
- Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
- `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
- with lags < 0 is required.
+ Lagged `past_covariates` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+ where `0` corresponds to the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_future_covariates
- Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
- given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
- `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
- of integers with lags is required.
+ Lagged `future_covariates` values used to predict the next time step/s.
+ If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+ future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+ 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -232,11 +253,14 @@ Source code for darts.models.forecasting.linear_regression_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -263,6 +287,36 @@ Source code for darts.models.forecasting.linear_regression_model
Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to
`sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to
`sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`).
+
+ Examples
+ --------
+ Deterministic forecasting, using past/future covariates (optional)
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import LinearRegressionModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = LinearRegressionModel(
+ >>> lags=12,
+ >>> lags_past_covariates=12,
+ >>> lags_future_covariates=[0,1,2,3,4,5],
+ >>> output_chunk_length=6,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1005.72085839],
+ [1005.6548696 ],
+ [1005.65403772],
+ [1005.6846175 ],
+ [1005.75753605],
+ [1005.81830675]])
"""
self.kwargs = kwargs
self._median_idx = None
diff --git a/_modules/darts/models/forecasting/nbeats.html b/_modules/darts/models/forecasting/nbeats.html
index f679bda1d9..72bccefccb 100644
--- a/_modules/darts/models/forecasting/nbeats.html
+++ b/_modules/darts/models/forecasting/nbeats.html
@@ -178,7 +178,10 @@ Source code for darts.models.forecasting.nbeats
<
import torch.nn as nn
from darts.logging import get_logger, raise_if_not, raise_log
-from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLPastCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
from darts.utils.torch import MonteCarloDropout
@@ -657,6 +660,7 @@ Source code for darts.models.forecasting.nbeats
<
self.stacks_list[-1].blocks[-1].backcast_linear_layer.requires_grad_(False)
self.stacks_list[-1].blocks[-1].backcast_g.requires_grad_(False)
+ @io_processor
def forward(self, x_in: Tuple):
x, _ = x_in
@@ -783,6 +787,9 @@ Source code for darts.models.forecasting.nbeats
<
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -823,11 +830,14 @@ Source code for darts.models.forecasting.nbeats
<
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -889,6 +899,39 @@ Source code for darts.models.forecasting.nbeats
<
References
----------
.. [1] https://openreview.net/forum?id=r1ecqn4YwB
+ .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import NBEATSModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # changing the activation function of the encoder/decoder to LeakyReLU
+ >>> model = NBEATSModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=5,
+ >>> activation='LeakyReLU'
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[ 929.78509085],
+ [1013.66339481],
+ [ 999.8843893 ],
+ [ 892.66032082],
+ [ 921.09781534],
+ [ 950.37965429]])
+
+ .. note::
+ `NBEATS example notebook <https://unit8co.github.io/darts/examples/07-NBEATS-examples.html>`_
+ presents techniques that can be used to improve the forecasts quality compared to this simple usage
+ example.
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/nhits.html b/_modules/darts/models/forecasting/nhits.html
index 9beeb2c864..3b325f99cd 100644
--- a/_modules/darts/models/forecasting/nhits.html
+++ b/_modules/darts/models/forecasting/nhits.html
@@ -178,7 +178,10 @@ Source code for darts.models.forecasting.nhits
import torch.nn.functional as F
from darts.logging import get_logger, raise_if_not
-from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLPastCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
from darts.utils.torch import MonteCarloDropout
@@ -584,6 +587,7 @@
Source code for darts.models.forecasting.nhits
# on this params (the last block backcast is not part of the final output of the net).
self.stacks_list[-1].blocks[-1].backcast_linear_layer.requires_grad_(False)
+ @io_processor
def forward(self, x_in: Tuple):
x, _ = x_in
@@ -719,6 +723,9 @@
Source code for darts.models.forecasting.nhits
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -759,11 +766,14 @@
Source code for darts.models.forecasting.nhits
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -826,6 +836,34 @@
Source code for darts.models.forecasting.nhits
----------
.. [1] C. Challu et al. "N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting",
https://arxiv.org/abs/2201.12886
+ .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import NHiTSModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # increasing the number of blocks
+ >>> model = NHiTSModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> num_blocks=2,
+ >>> n_epochs=5,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[958.2354389 ],
+ [939.23201079],
+ [987.51425784],
+ [919.41209025],
+ [925.09583093],
+ [938.95625528]])
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/nlinear.html b/_modules/darts/models/forecasting/nlinear.html
index dac14db6ca..cb46e6016b 100644
--- a/_modules/darts/models/forecasting/nlinear.html
+++ b/_modules/darts/models/forecasting/nlinear.html
@@ -176,7 +176,10 @@
Source code for darts.models.forecasting.nlinear
import torch.nn as nn
from darts.logging import raise_if
-from darts.models.forecasting.pl_forecasting_module import PLMixedCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLMixedCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import MixedCovariatesTorchModel
@@ -273,6 +276,7 @@ Source code for darts.models.forecasting.nlinear
layer_in_dim_static_cov, layer_out_dim
)
+ @io_processor
def forward(
self, x_in: Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]
):
@@ -305,7 +309,8 @@ Source code for darts.models.forecasting.nlinear
x = x.permute(0, 2, 1, 3)
else:
if self.normalize:
- seq_last = x[:, -1:, :].detach() # (batch, 1, in_dim)
+ # get last values only for target features
+ seq_last = x[:, -1:, : self.output_dim].detach()
x = x - seq_last
x = self.layer(x.view(batch, -1)) # (batch, out_len * out_dim * nr_params)
@@ -313,9 +318,6 @@ Source code for darts.models.forecasting.nlinear
batch, self.output_chunk_length, self.output_dim * self.nr_params
)
- if self.normalize:
- x = x + seq_last # Note: works only when nr_params == 1
-
if self.future_cov_dim != 0:
# x_future might be shorter than output_chunk_length when n < output_chunk_length
# so we need to pad it with zeros at the end to match the output_chunk_length
@@ -338,7 +340,8 @@ Source code for darts.models.forecasting.nlinear
)
x = x.view(batch, self.output_chunk_length, self.output_dim, self.nr_params)
-
+ if self.normalize:
+ x = x + seq_last.view(seq_last.shape + (1,))
return x
@@ -413,6 +416,9 @@ Source code for darts.models.forecasting.nlinear
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -453,11 +459,14 @@ Source code for darts.models.forecasting.nlinear
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -518,6 +527,36 @@ Source code for darts.models.forecasting.nlinear
----------
.. [1] Zeng, A., Chen, M., Zhang, L., & Xu, Q. (2022).
Are Transformers Effective for Time Series Forecasting?. arXiv preprint arXiv:2205.13504.
+ .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import NLinearModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = NLinearModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=20,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[429.56117169],
+ [428.93264096],
+ [428.35210616],
+ [428.13154426],
+ [427.98781641],
+ [428.00325481]])
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/pl_forecasting_module.html b/_modules/darts/models/forecasting/pl_forecasting_module.html
index a7cdec95e4..239571f15b 100644
--- a/_modules/darts/models/forecasting/pl_forecasting_module.html
+++ b/_modules/darts/models/forecasting/pl_forecasting_module.html
@@ -179,6 +179,7 @@ Source code for darts.models.forecasting.pl_forecasting_module
from joblib import Parallel, delayed
from darts.logging import get_logger, raise_if, raise_log
+from darts.models.components.layer_norm_variants import RINorm
from darts.timeseries import TimeSeries
from darts.utils.likelihood_models import Likelihood
from darts.utils.timeseries_generation import _build_forecast_series
@@ -191,6 +192,43 @@ Source code for darts.models.forecasting.pl_forecasting_module
pl_160_or_above = int(tokens[0]) > 1 or int(tokens[0]) == 1 and int(tokens[1]) >= 6
+[docs]def io_processor(forward):
+ """Applies some input / output processing to PLForecastingModule.forward.
+ Note that this wrapper must be added to each of PLForecastinModule's subclasses forward methods.
+ Here is an example how to add the decorator:
+
+ ```python
+ @io_processor
+ def forward(self, *args, **kwargs)
+ pass
+ ```
+
+ Applies
+ -------
+ Reversible Instance Normalization
+ normalizes batch input target features, and inverse transform the forward output back to the original scale
+ """
+
+ def forward_wrapper(self, *args, **kwargs):
+ if not self.use_reversible_instance_norm:
+ return forward(self, *args, **kwargs)
+
+ # x is input batch tuple which by definition has the past features in the first element starting with the
+ # first n target features
+ x: Tuple = args[0][0]
+ # apply reversible instance normalization
+ x[:, :, : self.n_targets] = self.rin(x[:, :, : self.n_targets])
+ # run the forward pass
+ out = forward(self, *((x, *args[0][1:]), *args[1:]), **kwargs)
+ # inverse transform target output back to original scale; by definition the first output
+ if isinstance(out, tuple):
+ return self.rin.inverse(out[0]), *out[1:]
+ else:
+ return self.rin.inverse(out)
+
+ return forward_wrapper
+
+
[docs]class PLForecastingModule(pl.LightningModule, ABC):
@abstractmethod
def __init__(
@@ -207,6 +245,7 @@ Source code for darts.models.forecasting.pl_forecasting_module
optimizer_kwargs: Optional[Dict] = None,
lr_scheduler_cls: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
lr_scheduler_kwargs: Optional[Dict] = None,
+ use_reversible_instance_norm: bool = False,
) -> None:
"""
PyTorch Lightning-based Forecasting Module.
@@ -251,6 +290,14 @@ Source code for darts.models.forecasting.pl_forecasting_module
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [1]_.
+ It is only applied to the features of the target series and not the covariates.
+
+ References
+ ----------
+ .. [1] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
"""
super().__init__()
@@ -274,6 +321,9 @@ Source code for darts.models.forecasting.pl_forecasting_module
# saved in checkpoint to be able to instantiate a model without calling fit_from_dataset
self.train_sample_shape = train_sample_shape
+ self.n_targets = (
+ train_sample_shape[0][1] if train_sample_shape is not None else 1
+ )
# persist optimiser and LR scheduler parameters
self.optimizer_cls = optimizer_cls
@@ -288,6 +338,13 @@ Source code for darts.models.forecasting.pl_forecasting_module
self.train_metrics = torch_metrics.clone(prefix="train_")
self.val_metrics = torch_metrics.clone(prefix="val_")
+ # reversible instance norm
+ self.use_reversible_instance_norm = use_reversible_instance_norm
+ if use_reversible_instance_norm:
+ self.rin = RINorm(input_dim=self.n_targets)
+ else:
+ self.rin = None
+
# initialize prediction parameters
self.pred_n: Optional[int] = None
self.pred_num_samples: Optional[int] = None
diff --git a/_modules/darts/models/forecasting/prophet_model.html b/_modules/darts/models/forecasting/prophet_model.html
index 99c252b3bf..e1cdbb65bd 100644
--- a/_modules/darts/models/forecasting/prophet_model.html
+++ b/_modules/darts/models/forecasting/prophet_model.html
@@ -195,11 +195,17 @@ Source code for darts.models.forecasting.prophet_model
country_holidays: Optional[str] = None,
suppress_stdout_stderror: bool = True,
add_encoders: Optional[dict] = None,
- cap: Union[
- float, Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]]
+ cap: Optional[
+ Union[
+ float,
+ Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]],
+ ]
] = None,
- floor: Union[
- float, Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]]
+ floor: Optional[
+ Union[
+ float,
+ Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]],
+ ]
] = None,
**prophet_kwargs,
):
@@ -259,11 +265,14 @@ Source code for darts.models.forecasting.prophet_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
@@ -291,6 +300,32 @@ Source code for darts.models.forecasting.prophet_model
Some optional keyword arguments for Prophet.
For information about the parameters see:
`The Prophet source code <https://github.com/facebook/prophet/blob/master/python/prophet/forecaster.py>`_.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import Prophet
+ >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+ >>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # adding a seasonality (daily, weekly and yearly are included by default) and holidays
+ >>> model = Prophet(
+ >>> add_seasonalities={
+ >>> 'name':"quarterly_seasonality",
+ >>> 'seasonal_periods':4,
+ >>> 'fourier_order':5
+ >>> },
+ >>> )
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[472.26891239],
+ [467.56955721],
+ [494.47230467],
+ [493.10568429],
+ [497.54686113],
+ [539.11716811]])
"""
super().__init__(add_encoders=add_encoders)
diff --git a/_modules/darts/models/forecasting/random_forest.html b/_modules/darts/models/forecasting/random_forest.html
index 8eb3961950..b04a01ce7a 100644
--- a/_modules/darts/models/forecasting/random_forest.html
+++ b/_modules/darts/models/forecasting/random_forest.html
@@ -181,12 +181,16 @@ Source code for darts.models.forecasting.random_forest
----------
.. [1] https://en.wikipedia.org/wiki/Random_forest
"""
-from typing import List, Optional, Tuple, Union
+from typing import Optional
from sklearn.ensemble import RandomForestRegressor
from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel
+from darts.models.forecasting.regression_model import (
+ FUTURE_LAGS_TYPE,
+ LAGS_TYPE,
+ RegressionModel,
+)
logger = get_logger(__name__)
@@ -194,9 +198,9 @@ Source code for darts.models.forecasting.random_forest
[docs]class RandomForest(RegressionModel):
def __init__(
self,
- lags: Union[int, list] = None,
- lags_past_covariates: Union[int, List[int]] = None,
- lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+ lags: Optional[LAGS_TYPE] = None,
+ lags_past_covariates: Optional[LAGS_TYPE] = None,
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
n_estimators: Optional[int] = 100,
@@ -210,17 +214,33 @@ Source code for darts.models.forecasting.random_forest
Parameters
----------
lags
- Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
- are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+ Lagged target `series` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `series` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_past_covariates
- Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
- `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
- with lags < 0 is required.
+ Lagged `past_covariates` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+ where `0` corresponds to the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_future_covariates
- Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
- given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
- `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
- of integers with lags is required.
+ Lagged `future_covariates` values used to predict the next time step/s.
+ If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+ future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+ 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -237,11 +257,14 @@ Source code for darts.models.forecasting.random_forest
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -259,6 +282,36 @@ Source code for darts.models.forecasting.random_forest
that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
**kwargs
Additional keyword arguments passed to `sklearn.ensemble.RandomForest`.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import RandomForest
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # random forest with 200 trees trained with MAE
+ >>> model = RandomForest(
+ >>> lags=12,
+ >>> lags_past_covariates=12,
+ >>> lags_future_covariates=[0,1,2,3,4,5],
+ >>> output_chunk_length=6,
+ >>> n_estimators=200,
+ >>> criterion="absolute_error",
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1006.29805],
+ [1006.23675],
+ [1006.17325],
+ [1006.10295],
+ [1006.06505],
+ [1006.05465]])
"""
self.n_estimators = n_estimators
self.max_depth = max_depth
diff --git a/_modules/darts/models/forecasting/regression_ensemble_model.html b/_modules/darts/models/forecasting/regression_ensemble_model.html
index c05f0746c1..785d10b5e0 100644
--- a/_modules/darts/models/forecasting/regression_ensemble_model.html
+++ b/_modules/darts/models/forecasting/regression_ensemble_model.html
@@ -178,7 +178,7 @@ Source code for darts.models.forecasting.regression_ensemble_model
from darts.models.forecasting.forecasting_model import ForecastingModel
from darts.models.forecasting.linear_regression_model import LinearRegressionModel
from darts.models.forecasting.regression_model import RegressionModel
-from darts.timeseries import TimeSeries
+from darts.timeseries import TimeSeries, concatenate
from darts.utils.utils import seq2series, series2seq
logger = get_logger(__name__)
@@ -190,8 +190,10 @@ Source code for darts.models.forecasting.regression_ensemble_model
forecasting_models: List[ForecastingModel],
regression_train_n_points: int,
regression_model=None,
- regression_train_num_samples: Optional[int] = 1,
+ regression_train_num_samples: int = 1,
regression_train_samples_reduction: Optional[Union[str, float]] = "median",
+ train_forecasting_models: bool = True,
+ train_using_historical_forecasts: bool = False,
show_warnings: bool = True,
):
"""
@@ -206,6 +208,9 @@ Source code for darts.models.forecasting.regression_ensemble_model
If `future_covariates` or `past_covariates` are provided at training or inference time,
they will be passed only to the forecasting models supporting them.
+ If `forecasting_models` contains exclusively GlobalForecastingModels, they can be pre-trained. Otherwise,
+ the `forecasting_models` must be untrained.
+
The regression model does not leverage the covariates passed to ``fit()`` and ``predict()``.
Parameters
@@ -213,7 +218,9 @@ Source code for darts.models.forecasting.regression_ensemble_model
forecasting_models
List of forecasting models whose predictions to ensemble
regression_train_n_points
- The number of points to use to train the regression model
+ The number of points per series to use to train the regression model. Can be set to `-1` to use the
+ entire series to train the regressor if `forecasting_models` are already fitted and
+ `train_forecasting_models=False`.
regression_model
Any regression model with ``predict()`` and ``fit()`` methods (e.g. from scikit-learn)
Default: ``darts.model.LinearRegressionModel(fit_intercept=False)``
@@ -230,19 +237,51 @@ Source code for darts.models.forecasting.regression_ensemble_model
`regression_train_num_samples will be passed only to the probabilistic ones.
..
regression_train_samples_reduction
- If `forecasting models` are probabilistic and `regression_train_num_samples` > 1, method used to
+ If `forecasting_models` are probabilistic and `regression_train_num_samples` > 1, method used to
reduce the samples before passing them to the regression model. Possible values: "mean", "median"
or float value corresponding to the desired quantile. Default: "median"
+ train_forecasting_models
+ If set to `False`, the `forecasting_models` are not retrained when calling `fit()` (only supported
+ if all the `forecasting_models` are pretrained `GlobalForecastingModels`). Default: ``True``.
+ train_using_historical_forecasts
+ If set to `True`, use `historical_forecasts()` to generate the forecasting models' predictions used to
+ train the regression model in `fit()`. Available when `forecasting_models` contains only
+ `GlobalForecastingModels`. Recommended when `regression_train_n_points` is greater than
+ `output_chunk_length` of the underlying `forecasting_models`.
+ Default: ``False``.
show_warnings
Whether to show warnings related to forecasting_models covariates support.
References
----------
.. [1] D. H. Wolpert, “Stacked generalization”, Neural Networks, vol. 5, no. 2, pp. 241–259, Jan. 1992
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import RegressionEnsembleModel, NaiveSeasonal, LinearRegressionModel
+ >>> series = AirPassengersDataset().load()
+ >>> model = RegressionEnsembleModel(
+ >>> forecasting_models = [
+ >>> NaiveSeasonal(K=12),
+ >>> LinearRegressionModel(lags=4)
+ >>> ],
+ >>> regression_train_n_points=20
+ >>> )
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[494.24050364],
+ [464.3869697 ],
+ [496.53180506],
+ [544.82269341],
+ [557.35256055],
+ [630.24334385]])
"""
super().__init__(
- models=forecasting_models,
+ forecasting_models=forecasting_models,
train_num_samples=regression_train_num_samples,
train_samples_reduction=regression_train_samples_reduction,
+ train_forecasting_models=train_forecasting_models,
show_warnings=show_warnings,
)
@@ -267,61 +306,242 @@ Source code for darts.models.forecasting.regression_ensemble_model
)
self.regression_model: RegressionModel = regression_model
- self.train_n_points = regression_train_n_points
+
+ raise_if(
+ regression_train_n_points == -1
+ and not (self.all_trained and (not train_forecasting_models)),
+ "`regression_train_n_points` can only be `-1` if `retrain_forecasting_model=False` and "
+ "all `forecasting_models` are already fitted.",
+ logger,
+ )
+
+ # converted to List[int] if regression_train_n_points=-1 and ensemble is trained with multiple series
+ self.train_n_points: Union[int, List[int]] = regression_train_n_points
+
+ raise_if(
+ train_using_historical_forecasts and not self.is_global_ensemble,
+ "`train_using_historical_forecasts=True` is only available when all "
+ "`forecasting_models` are global models.",
+ logger,
+ )
+
+ self.train_using_historical_forecasts = train_using_historical_forecasts
def _split_multi_ts_sequence(
- self, n: int, ts_sequence: Sequence[TimeSeries]
+ self, n: Union[int, List[int]], ts_sequence: Sequence[TimeSeries]
) -> Tuple[Sequence[TimeSeries], Sequence[TimeSeries]]:
- left = [ts[:-n] for ts in ts_sequence]
- right = [ts[-n:] for ts in ts_sequence]
+ if isinstance(n, int):
+ n = [n] * len(ts_sequence)
+ left = [ts[:-n_] for ts, n_ in zip(ts_sequence, n)]
+ right = [ts[-n_:] for ts, n_ in zip(ts_sequence, n)]
return left, right
+ def _make_multiple_historical_forecasts(
+ self,
+ train_n_points: int,
+ series: Union[TimeSeries, Sequence[TimeSeries]],
+ direct_predictions: Union[TimeSeries, Sequence[TimeSeries]],
+ past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
+ future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
+ num_samples: int = 1,
+ ) -> Union[TimeSeries, Sequence[TimeSeries]]:
+ """
+ For GlobalForecastingModel, when predicting n > output_chunk_length, `historical_forecasts()` generally
+ produce better forecasts than `predict()`.
+
+ To get as close as possible to the predictions generated by the forecasting models during inference,
+ `historical_forecasts` forecast horizon is equal to each model output_chunk_length.
+
+ train_n_points are generated, starting from the end of the series.
+ """
+ is_single_series = isinstance(series, TimeSeries)
+ series = series2seq(series)
+ direct_predictions = series2seq(direct_predictions)
+ past_covariates = series2seq(past_covariates)
+ future_covariates = series2seq(future_covariates)
+
+ n_components = series[0].n_components
+ model_predict_cols = direct_predictions[0].columns.tolist()
+
+ predictions = []
+ for m_idx, model in enumerate(self.forecasting_models):
+ # we start historical fc at multiple of the output length before the end.
+ n_ocl_back = train_n_points // model.output_chunk_length
+ start_hist_forecasts = n_ocl_back * model.output_chunk_length
+
+ # we use the precomputed `direct_prediction` to fill any missing prediction
+ # timesteps at the beginning (if train_n_points is not perfectly divisible by output length)
+ missing_steps = train_n_points % model.output_chunk_length
+
+ tmp_pred = model.historical_forecasts(
+ series=series,
+ past_covariates=past_covariates
+ if model.supports_past_covariates
+ else None,
+ future_covariates=future_covariates
+ if model.supports_future_covariates
+ else None,
+ forecast_horizon=model.output_chunk_length,
+ stride=model.output_chunk_length,
+ num_samples=num_samples if model._is_probabilistic else 1,
+ start=-start_hist_forecasts,
+ start_format="position",
+ retrain=False,
+ overlap_end=False,
+ last_points_only=False,
+ show_warnings=self.show_warnings,
+ predict_likelihood_parameters=False,
+ )
+ # concatenate the strided predictions of output_chunk_length values each
+ if is_single_series:
+ tmp_pred = [concatenate(tmp_pred, axis=0)]
+ else:
+ tmp_pred = [concatenate(sub_pred, axis=0) for sub_pred in tmp_pred]
+
+ # add the missing steps at beginning by taking the first values of precomputed predictions
+ if missing_steps:
+ # add the missing steps at beginning by taking the first values of precomputed predictions
+ # get the model's direct (uni/multivariate) predictions
+ pred_cols = model_predict_cols[
+ m_idx * n_components : (m_idx + 1) * n_components
+ ]
+ hfc_cols = tmp_pred[0].columns.tolist()
+ tmp_pred = [
+ concatenate(
+ [
+ preds_dir[:missing_steps][pred_cols].with_columns_renamed(
+ pred_cols, hfc_cols
+ ),
+ preds_hfc,
+ ],
+ axis=0,
+ )
+ for preds_dir, preds_hfc in zip(direct_predictions, tmp_pred)
+ ]
+ predictions.append(tmp_pred)
+
+ tmp_predictions = []
+ # slice the forecasts, training series-wise, to align them
+ for prediction in predictions:
+ tmp_predictions.append([ts for idx, ts in enumerate(prediction)])
+ predictions = [seq2series(prediction) for prediction in tmp_predictions]
+
+ # reduce the probabilistics series
+ if self.train_samples_reduction is not None and self.train_num_samples > 1:
+ predictions = [
+ self._predictions_reduction(prediction) for prediction in predictions
+ ]
+
+ return (
+ self._stack_ts_seq(predictions)
+ if is_single_series
+ else self._stack_ts_multiseq(predictions)
+ )
+
[docs] def fit(
self,
series: Union[TimeSeries, Sequence[TimeSeries]],
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
):
+ """
+ Fits the forecasting models with the entire series except the last `regression_train_n_points` values, which
+ are used to train the regression model.
+
+ If `forecasting_models` contains fitted `GlobalForecastingModels` and `train_forecasting_model=False`,
+ only the regression model will be trained.
+
+ Parameters
+ ----------
+ series
+ TimeSeries or Sequence[TimeSeries] object containing the target values.
+ past_covariates
+ Optionally, a series or sequence of series specifying past-observed covariates passed to the
+ forecasting models
+ future_covariates
+ Optionally, a series or sequence of series specifying future-known covariates passed to the
+ forecasting models
+ """
super().fit(
series, past_covariates=past_covariates, future_covariates=future_covariates
)
# spare train_n_points points to serve as regression target
is_single_series = isinstance(series, TimeSeries)
- if is_single_series:
- train_n_points_too_big = len(self.training_series) <= self.train_n_points
- else:
- train_n_points_too_big = any(
- [len(s) <= self.train_n_points for s in series]
+ if self.train_n_points == -1:
+ if is_single_series:
+ train_n_points = [len(series)]
+ else:
+ # maximize each series usage
+ train_n_points = [len(ts) for ts in series]
+
+ # shift by the forecasting models' largest input length
+ all_shifts = []
+ # when it's not clearly defined, extreme_lags returns
+ # min_train_serie_length for the LocalForecastingModels
+ for model in self.forecasting_models:
+ min_target_lag, _, _, _, _, _ = model.extreme_lags
+ if min_target_lag is not None:
+ all_shifts.append(-min_target_lag)
+
+ input_shift = max(all_shifts)
+ idx_series_too_short = []
+ tmp_train_n_points = []
+ for idx, ts_length in enumerate(train_n_points):
+ ajusted_length = ts_length - input_shift
+ if ajusted_length < 0:
+ idx_series_too_short.append(idx)
+ else:
+ tmp_train_n_points.append(ajusted_length)
+
+ raise_if(
+ len(idx_series_too_short) > 0,
+ f"TimeSeries at indexes {idx_series_too_short} of `series` are too short to train the regression "
+ f"model due to the number of values necessary to produce one prediction : {input_shift}.",
+ logger,
)
+ if is_single_series:
+ self.train_n_points = tmp_train_n_points[0]
+ else:
+ self.train_n_points = tmp_train_n_points
+
+ train_n_points_too_big = False
+ else:
+ # self.train_n_points is necessarily an integer
+ if is_single_series:
+ train_n_points_too_big = len(series) <= self.train_n_points
+ else:
+ train_n_points_too_big = any(
+ [len(s) <= self.train_n_points for s in series]
+ )
+
raise_if(
train_n_points_too_big,
- "`regression_train_n_points` parameter too big (must be smaller or "
- "equal to the number of points in training_series)",
+ "`regression_train_n_points` parameter too big (must be strictly smaller than "
+ "the number of points in training_series)",
logger,
)
if is_single_series:
- forecast_training = self.training_series[: -self.train_n_points]
- regression_target = self.training_series[-self.train_n_points :]
+ forecast_training = series[: -self.train_n_points]
+ regression_target = series[-self.train_n_points :]
else:
forecast_training, regression_target = self._split_multi_ts_sequence(
self.train_n_points, series
)
- for model in self.models:
- # maximize covariate usage
- model._fit_wrapper(
- series=forecast_training,
- past_covariates=past_covariates
- if model.supports_past_covariates
- else None,
- future_covariates=future_covariates
- if model.supports_future_covariates
- else None,
- )
-
+ if self.train_forecasting_models:
+ for model in self.forecasting_models:
+ # maximize covariate usage
+ model._fit_wrapper(
+ series=forecast_training,
+ past_covariates=past_covariates,
+ future_covariates=future_covariates,
+ )
+
+ # we can call direct prediction in any case. Even if we overwrite with historical
+ # forecasts later on, it serves as a input validation
predictions = self._make_multiple_predictions(
n=self.train_n_points,
series=forecast_training,
@@ -330,23 +550,33 @@ Source code for darts.models.forecasting.regression_ensemble_model
num_samples=self.train_num_samples,
)
+ if self.train_using_historical_forecasts:
+ predictions = self._make_multiple_historical_forecasts(
+ train_n_points=self.train_n_points,
+ series=series,
+ direct_predictions=predictions,
+ past_covariates=past_covariates,
+ future_covariates=future_covariates,
+ num_samples=self.train_num_samples,
+ )
+
# train the regression model on the individual models' predictions
self.regression_model.fit(
series=regression_target, future_covariates=predictions
)
# prepare the forecasting models for further predicting by fitting them with the entire data
-
- # Some models (incl. Neural-Network based models) may need to be 'reset' to allow being retrained from scratch
- self.models = [model.untrained_model() for model in self.models]
-
- for model in self.models:
- kwargs = dict(series=series)
- if model.supports_past_covariates:
- kwargs["past_covariates"] = past_covariates
- if model.supports_future_covariates:
- kwargs["future_covariates"] = future_covariates
- model.fit(**kwargs)
+ if self.train_forecasting_models:
+ # Some models may need to be 'reset' to allow being retrained from scratch, especially torch-based models
+ self.forecasting_models: List[ForecastingModel] = [
+ model.untrained_model() for model in self.forecasting_models
+ ]
+ for model in self.forecasting_models:
+ model._fit_wrapper(
+ series=series,
+ past_covariates=past_covariates,
+ future_covariates=future_covariates,
+ )
return self
[docs] def ensemble(
@@ -384,7 +614,11 @@ Source code for darts.models.forecasting.regression_ensemble_model
Optional[int],
]:
extreme_lags_ = super().extreme_lags
- return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:]
+ # shift min_target_lag in the past to account for the regression model training set
+ if extreme_lags_[0] is None:
+ return (-self.train_n_points,) + extreme_lags_[1:]
+ else:
+ return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:]
@property
def output_chunk_length(self) -> int:
diff --git a/_modules/darts/models/forecasting/regression_model.html b/_modules/darts/models/forecasting/regression_model.html
index 8a6782b81c..22640f35de 100644
--- a/_modules/darts/models/forecasting/regression_model.html
+++ b/_modules/darts/models/forecasting/regression_model.html
@@ -196,6 +196,11 @@ Source code for darts.models.forecasting.regression_model
from collections import OrderedDict
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+try:
+ from typing import Literal
+except ImportError:
+ from typing_extensions import Literal
+
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
@@ -222,13 +227,18 @@ Source code for darts.models.forecasting.regression_model
logger = get_logger(__name__)
+LAGS_TYPE = Union[int, List[int], Dict[str, Union[int, List[int]]]]
+FUTURE_LAGS_TYPE = Union[
+ Tuple[int, int], List[int], Dict[str, Union[Tuple[int, int], List[int]]]
+]
+
[docs]class RegressionModel(GlobalForecastingModel):
def __init__(
self,
- lags: Union[int, list] = None,
- lags_past_covariates: Union[int, List[int]] = None,
- lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+ lags: Optional[LAGS_TYPE] = None,
+ lags_past_covariates: Optional[LAGS_TYPE] = None,
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
model=None,
@@ -241,17 +251,33 @@ Source code for darts.models.forecasting.regression_model
Parameters
----------
lags
- Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
- are used (from -1 backward). Otherwise, a list of integers with lags is required (each lag must be < 0).
+ Lagged target `series` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `series` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_past_covariates
- Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
- `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
- with lags < 0 is required.
+ Lagged `past_covariates` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+ where `0` corresponds to the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_future_covariates
- Number of lagged future_covariates values used to predict the next time step. If a tuple (past, future) is
- given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
- `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
- of integers with lags is required.
+ Lagged `future_covariates` values used to predict the next time step/s.
+ If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+ future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+ 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -268,11 +294,14 @@ Source code for darts.models.forecasting.regression_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -288,12 +317,43 @@ Source code for darts.models.forecasting.regression_model
Whether the model should use static covariate information in case the input `series` passed to ``fit()``
contain static covariates. If ``True``, and static covariates are available at fitting time, will enforce
that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import RegressionModel
+ >>> from sklearn.linear_model import Ridge
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # wrap around the sklearn Ridge model
+ >>> model = RegressionModel(
+ >>> model=Ridge(),
+ >>> lags=12,
+ >>> lags_past_covariates=4,
+ >>> lags_future_covariates=(0,6),
+ >>> output_chunk_length=6
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1005.73340676],
+ [1005.71159051],
+ [1005.7322616 ],
+ [1005.76314504],
+ [1005.82204348],
+ [1005.89100967]])
"""
super().__init__(add_encoders=add_encoders)
self.model = model
self.lags: Dict[str, List[int]] = {}
+ self.component_lags: Dict[str, Dict[str, List[int]]] = {}
self.input_dim = None
self.multi_models = multi_models
self._considers_static_covariates = use_static_covariates
@@ -329,94 +389,158 @@ Source code for darts.models.forecasting.regression_model
"At least one of `lags`, `lags_future_covariates` or `lags_past_covariates` must be not None.",
)
- lags_type_checks = [
- (lags, "lags"),
- (lags_past_covariates, "lags_past_covariates"),
- ]
+ # convert lags arguments to list of int
+ self.lags, self.component_lags = self._generate_lags(
+ lags=lags,
+ lags_past_covariates=lags_past_covariates,
+ lags_future_covariates=lags_future_covariates,
+ )
- for _lags, lags_name in lags_type_checks:
- raise_if_not(
- isinstance(_lags, (int, list)) or _lags is None,
- f"`{lags_name}` must be of type int or list. Given: {type(_lags)}.",
- )
- raise_if(
- isinstance(_lags, bool),
- f"`{lags_name}` must be of type int or list, not bool.",
- )
+ self.pred_dim = self.output_chunk_length if self.multi_models else 1
- raise_if_not(
- isinstance(lags_future_covariates, (tuple, list))
- or lags_future_covariates is None,
- f"`lags_future_covariates` must be of type tuple or list. Given: {type(lags_future_covariates)}.",
- )
+ def _generate_lags(
+ self,
+ lags: Optional[LAGS_TYPE],
+ lags_past_covariates: Optional[LAGS_TYPE],
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE],
+ ) -> Tuple[Dict[str, List[int]], Dict[str, Dict[str, List[int]]]]:
+ """
+ Based on the type of the argument and the nature of the covariates, perform some sanity checks before
+ converting the lags to a list of integer.
- if isinstance(lags_future_covariates, tuple):
- raise_if_not(
- len(lags_future_covariates) == 2
- and isinstance(lags_future_covariates[0], int)
- and isinstance(lags_future_covariates[1], int),
- "`lags_future_covariates` tuple must be of length 2, and must contain two integers",
- )
- raise_if(
- isinstance(lags_future_covariates[0], bool)
- or isinstance(lags_future_covariates[1], bool),
- "`lags_future_covariates` tuple must contain integers, not bool",
- )
+ If lags are provided as a dictionary, the lags values are contained in self.component_lags and the self.lags
+ attributes contain only the extreme values
+ If the lags are provided as integer, list, tuple or dictionary containing only the 'default_lags' keys, the lags
+ values are contained in the self.lags attribute and the self.component_lags is an empty dictionary.
+ """
+ processed_lags: Dict[str, List[int]] = dict()
+ processed_component_lags: Dict[str, Dict[str, List[int]]] = dict()
+ for lags_values, lags_name, lags_abbrev in zip(
+ [lags, lags_past_covariates, lags_future_covariates],
+ ["lags", "lags_past_covariates", "lags_future_covariates"],
+ ["target", "past", "future"],
+ ):
+ if lags_values is None:
+ continue
- # set lags
- if isinstance(lags, int):
- raise_if_not(lags > 0, f"`lags` must be strictly positive. Given: {lags}.")
- # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict)
- self.lags["target"] = list(range(-lags, 0))
- elif isinstance(lags, list):
- for lag in lags:
- raise_if(
- not isinstance(lag, int) or (lag >= 0),
- f"Every element of `lags` must be a strictly negative integer. Given: {lags}.",
+ # converting to dictionary to run sanity checks
+ if not isinstance(lags_values, dict):
+ lags_values = {"default_lags": lags_values}
+ elif len(lags_values) == 0:
+ raise_log(
+ ValueError(
+ f"When passed as a dictionary, `{lags_name}` must contain at least one key."
+ ),
+ logger,
)
- if lags:
- self.lags["target"] = sorted(lags)
- if isinstance(lags_past_covariates, int):
- raise_if_not(
- lags_past_covariates > 0,
- f"`lags_past_covariates` must be an integer > 0. Given: {lags_past_covariates}.",
- )
- self.lags["past"] = list(range(-lags_past_covariates, 0))
- elif isinstance(lags_past_covariates, list):
- for lag in lags_past_covariates:
- raise_if(
- not isinstance(lag, int) or (lag >= 0),
- f"Every element of `lags_covariates` must be an integer < 0. Given: {lags_past_covariates}.",
- )
- if lags_past_covariates:
- self.lags["past"] = sorted(lags_past_covariates)
+ invalid_type = False
+ supported_types = ""
+ min_lags = None
+ max_lags = None
+ tmp_components_lags: Dict[str, List[int]] = dict()
+ for comp_name, comp_lags in lags_values.items():
+ if lags_name == "lags_future_covariates":
+ if isinstance(comp_lags, tuple):
+ raise_if_not(
+ len(comp_lags) == 2
+ and isinstance(comp_lags[0], int)
+ and isinstance(comp_lags[1], int),
+ f"`{lags_name}` - `{comp_name}`: tuple must be of length 2, and must contain two integers",
+ logger,
+ )
- if isinstance(lags_future_covariates, tuple):
- raise_if_not(
- lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
- f"`lags_future_covariates` tuple must contain integers >= 0. Given: {lags_future_covariates}.",
- )
- if (
- lags_future_covariates[0] is not None
- and lags_future_covariates[1] is not None
- ):
- if not (
- lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0
- ):
- self.lags["future"] = list(
- range(-lags_future_covariates[0], lags_future_covariates[1])
+ raise_if(
+ isinstance(comp_lags[0], bool)
+ or isinstance(comp_lags[1], bool),
+ f"`{lags_name}` - `{comp_name}`: tuple must contain integers, not bool",
+ logger,
+ )
+
+ raise_if_not(
+ comp_lags[0] >= 0 and comp_lags[1] >= 0,
+ f"`{lags_name}` - `{comp_name}`: tuple must contain positive integers. Given: {comp_lags}.",
+ logger,
+ )
+ raise_if(
+ comp_lags[0] == 0 and comp_lags[1] == 0,
+ f"`{lags_name}` - `{comp_name}`: tuple cannot be (0, 0) as it corresponds to an empty "
+ f"list of lags.",
+ logger,
+ )
+ tmp_components_lags[comp_name] = list(
+ range(-comp_lags[0], comp_lags[1])
+ )
+ elif isinstance(comp_lags, list):
+ for lag in comp_lags:
+ raise_if(
+ not isinstance(lag, int) or isinstance(lag, bool),
+ f"`{lags_name}` - `{comp_name}`: list must contain only integers. Given: {comp_lags}.",
+ logger,
+ )
+ tmp_components_lags[comp_name] = sorted(comp_lags)
+ else:
+ invalid_type = True
+ supported_types = "tuple or a list"
+ else:
+ if isinstance(comp_lags, int):
+ raise_if_not(
+ comp_lags > 0,
+ f"`{lags_name}` - `{comp_name}`: integer must be strictly positive . Given: {comp_lags}.",
+ logger,
+ )
+ tmp_components_lags[comp_name] = list(range(-comp_lags, 0))
+ elif isinstance(comp_lags, list):
+ for lag in comp_lags:
+ raise_if(
+ not isinstance(lag, int) or (lag >= 0),
+ f"`{lags_name}` - `{comp_name}`: list must contain only strictly negative integers. "
+ f"Given: {comp_lags}.",
+ logger,
+ )
+ tmp_components_lags[comp_name] = sorted(comp_lags)
+ else:
+ invalid_type = True
+ supported_types = "strictly positive integer or a list"
+
+ if invalid_type:
+ raise_log(
+ ValueError(
+ f"`{lags_name}` - `{comp_name}`: must be either a {supported_types}. "
+ f"Gived : {type(comp_lags)}."
+ ),
+ logger,
)
- elif isinstance(lags_future_covariates, list):
- for lag in lags_future_covariates:
- raise_if(
- not isinstance(lag, int) or isinstance(lag, bool),
- f"Every element of `lags_future_covariates` must be an integer. Given: {lags_future_covariates}.",
- )
- if lags_future_covariates:
- self.lags["future"] = sorted(lags_future_covariates)
- self.pred_dim = self.output_chunk_length if self.multi_models else 1
+ # extracting min and max lags va
+ if min_lags is None:
+ min_lags = tmp_components_lags[comp_name][0]
+ else:
+ min_lags = min(min_lags, tmp_components_lags[comp_name][0])
+
+ if max_lags is None:
+ max_lags = tmp_components_lags[comp_name][-1]
+ else:
+ max_lags = max(max_lags, tmp_components_lags[comp_name][-1])
+
+ # revert to shared lags logic when applicable
+ if list(tmp_components_lags.keys()) == ["default_lags"]:
+ processed_lags[lags_abbrev] = tmp_components_lags["default_lags"]
+ else:
+ processed_lags[lags_abbrev] = [min_lags, max_lags]
+ processed_component_lags[lags_abbrev] = tmp_components_lags
+
+ return processed_lags, processed_component_lags
+
+ def _get_lags(self, lags_type: str):
+ """
+ If lags were specified in a component-wise manner, they are contained in self.component_lags and
+ the values in self.lags should be ignored as they correspond just the extreme values.
+ """
+ if lags_type in self.component_lags:
+ return self.component_lags[lags_type]
+ else:
+ return self.lags.get(lags_type)
@property
def _model_encoder_settings(
@@ -457,16 +581,12 @@ Source code for darts.models.forecasting.regression_model
Optional[int],
Optional[int],
]:
- min_target_lag = self.lags.get("target")[0] if "target" in self.lags else None
+ min_target_lag = self.lags["target"][0] if "target" in self.lags else None
max_target_lag = self.output_chunk_length - 1
- min_past_cov_lag = self.lags.get("past")[0] if "past" in self.lags else None
- max_past_cov_lag = self.lags.get("past")[-1] if "past" in self.lags else None
- min_future_cov_lag = (
- self.lags.get("future")[0] if "future" in self.lags else None
- )
- max_future_cov_lag = (
- self.lags.get("future")[-1] if "future" in self.lags else None
- )
+ min_past_cov_lag = self.lags["past"][0] if "past" in self.lags else None
+ max_past_cov_lag = self.lags["past"][-1] if "past" in self.lags else None
+ min_future_cov_lag = self.lags["future"][0] if "future" in self.lags else None
+ max_future_cov_lag = self.lags["future"][-1] if "future" in self.lags else None
return (
min_target_lag,
max_target_lag,
@@ -509,24 +629,13 @@ Source code for darts.models.forecasting.regression_model
return self.model.estimators_[horizon + target_dim]
- def _get_last_prediction_time(self, series, forecast_horizon, overlap_end):
- # overrides the ForecastingModel _get_last_prediction_time, taking care of future lags if any
- extra_shift = max(0, max(lags[-1] for lags in self.lags.values()))
-
- if overlap_end:
- last_valid_pred_time = series.time_index[-1 - extra_shift]
- else:
- last_valid_pred_time = series.time_index[-forecast_horizon - extra_shift]
-
- return last_valid_pred_time
-
def _create_lagged_data(
- self, target_series, past_covariates, future_covariates, max_samples_per_ts
+ self,
+ target_series: Sequence[TimeSeries],
+ past_covariates: Sequence[TimeSeries],
+ future_covariates: Sequence[TimeSeries],
+ max_samples_per_ts: int,
):
- lags = self.lags.get("target")
- lags_past_covariates = self.lags.get("past")
- lags_future_covariates = self.lags.get("future")
-
(
features,
labels,
@@ -537,9 +646,9 @@ Source code for darts.models.forecasting.regression_model
output_chunk_length=self.output_chunk_length,
past_covariates=past_covariates,
future_covariates=future_covariates,
- lags=lags,
- lags_past_covariates=lags_past_covariates,
- lags_future_covariates=lags_future_covariates,
+ lags=self._get_lags("target"),
+ lags_past_covariates=self._get_lags("past"),
+ lags_future_covariates=self._get_lags("future"),
uses_static_covariates=self.uses_static_covariates,
last_static_covariates_shape=None,
max_samples_per_ts=max_samples_per_ts,
@@ -548,7 +657,26 @@ Source code for darts.models.forecasting.regression_model
concatenate=False,
)
+ expected_nb_feat = (
+ features[0].shape[1]
+ if isinstance(features, Sequence)
+ else features.shape[1]
+ )
for i, (X_i, y_i) in enumerate(zip(features, labels)):
+ # TODO: account for scenario where two wrong shapes can silently hide the problem
+ if expected_nb_feat != X_i.shape[1]:
+ shape_error_msg = []
+ for ts, cov_name, arg_name in zip(
+ [target_series, past_covariates, future_covariates],
+ ["target", "past", "future"],
+ ["series", "past_covariates", "future_covariates"],
+ ):
+ if ts is not None and ts[i].width != self.input_dim[cov_name]:
+ shape_error_msg.append(
+ f"Expected {self.input_dim[cov_name]} components but received "
+ f"{ts[i].width} components at index {i} of `{arg_name}`."
+ )
+ raise_log(ValueError("\n".join(shape_error_msg)), logger)
features[i] = X_i[:, :, 0]
labels[i] = y_i[:, :, 0]
@@ -559,10 +687,10 @@ Source code for darts.models.forecasting.regression_model
def _fit_model(
self,
- target_series,
- past_covariates,
- future_covariates,
- max_samples_per_ts,
+ target_series: Sequence[TimeSeries],
+ past_covariates: Sequence[TimeSeries],
+ future_covariates: Sequence[TimeSeries],
+ max_samples_per_ts: int,
**kwargs,
):
"""
@@ -587,9 +715,9 @@ Source code for darts.models.forecasting.regression_model
target_series=target_series,
past_covariates=past_covariates,
future_covariates=future_covariates,
- lags=self.lags.get("target"),
- lags_past_covariates=self.lags.get("past"),
- lags_future_covariates=self.lags.get("future"),
+ lags=self._get_lags("target"),
+ lags_past_covariates=self._get_lags("past"),
+ lags_future_covariates=self._get_lags("future"),
output_chunk_length=self.output_chunk_length,
concatenate=False,
use_static_covariates=self.uses_static_covariates,
@@ -711,6 +839,52 @@ Source code for darts.models.forecasting.regression_model
past_covariates=seq2series(past_covariates),
future_covariates=seq2series(future_covariates),
)
+ variate2arg = {
+ "target": "lags",
+ "past": "lags_past_covariates",
+ "future": "lags_future_covariates",
+ }
+
+ # if provided, component-wise lags must be defined for all the components of the first series
+ component_lags_error_msg = []
+ for variate_type, variate in zip(
+ ["target", "past", "future"], [series, past_covariates, future_covariates]
+ ):
+ if variate_type not in self.component_lags:
+ continue
+
+ # ignore the fallback lags entry
+ provided_components = set(self.component_lags[variate_type].keys())
+ required_components = set(variate[0].components)
+
+ wrong_components = list(
+ provided_components - {"default_lags"} - required_components
+ )
+ missing_keys = list(required_components - provided_components)
+ # lags were specified for unrecognized components
+ if len(wrong_components) > 0:
+ component_lags_error_msg.append(
+ f"The `{variate2arg[variate_type]}` dictionary specifies lags for components that are not "
+ f"present in the series : {wrong_components}. They must be removed to avoid any ambiguity."
+ )
+ elif len(missing_keys) > 0 and "default_lags" not in provided_components:
+ component_lags_error_msg.append(
+ f"The {variate2arg[variate_type]} dictionary is missing the lags for the following components "
+ f"present in the series: {missing_keys}. The key 'default_lags' can be used to provide lags for "
+ f"all the non-explicitely defined components."
+ )
+ else:
+ # reorder the components based on the input series, insert the default when necessary
+ self.component_lags[variate_type] = {
+ comp_name: self.component_lags[variate_type][comp_name]
+ if comp_name in self.component_lags[variate_type]
+ else self.component_lags[variate_type]["default_lags"]
+ for comp_name in variate[0].components
+ }
+
+ # single error message for all the lags arguments
+ if len(component_lags_error_msg) > 0:
+ raise_log(ValueError("\n".join(component_lags_error_msg)), logger)
self._fit_model(
series, past_covariates, future_covariates, max_samples_per_ts, **kwargs
@@ -912,23 +1086,61 @@ Source code for darts.models.forecasting.regression_model
series_matrix = np.concatenate(
[series_matrix, predictions[-1]], axis=1
)
- np_X.append(
- series_matrix[
- :,
- [
- lag - (shift + last_step_shift)
- for lag in self.lags["target"]
- ],
- ].reshape(len(series) * num_samples, -1)
- )
- # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
- for cov_type in ["past", "future"]:
- if cov_type in covariate_matrices:
+ # component-wise lags
+ if "target" in self.component_lags:
+ tmp_X = [
+ series_matrix[
+ :,
+ [lag - (shift + last_step_shift) for lag in comp_lags],
+ comp_i,
+ ]
+ for comp_i, (comp, comp_lags) in enumerate(
+ self.component_lags["target"].items()
+ )
+ ]
+ # values are grouped by component
+ np_X.append(
+ np.concatenate(tmp_X, axis=1).reshape(
+ len(series) * num_samples, -1
+ )
+ )
+ else:
+ # values are grouped by lags
np_X.append(
- covariate_matrices[cov_type][
- :, relative_cov_lags[cov_type] + t_pred
+ series_matrix[
+ :,
+ [
+ lag - (shift + last_step_shift)
+ for lag in self.lags["target"]
+ ],
].reshape(len(series) * num_samples, -1)
)
+ # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
+ for cov_type in ["past", "future"]:
+ if cov_type in covariate_matrices:
+ # component-wise lags
+ if cov_type in self.component_lags:
+ tmp_X = [
+ covariate_matrices[cov_type][
+ :,
+ np.array(comp_lags) - self.lags[cov_type][0] + t_pred,
+ comp_i,
+ ]
+ for comp_i, (comp, comp_lags) in enumerate(
+ self.component_lags[cov_type].items()
+ )
+ ]
+ np_X.append(
+ np.concatenate(tmp_X, axis=1).reshape(
+ len(series) * num_samples, -1
+ )
+ )
+ else:
+ np_X.append(
+ covariate_matrices[cov_type][
+ :, relative_cov_lags[cov_type] + t_pred
+ ].reshape(len(series) * num_samples, -1)
+ )
# concatenate retrieved lags
X = np.concatenate(np_X, axis=1)
@@ -1064,6 +1276,7 @@ Source code for darts.models.forecasting.regression_model
future_covariates: Optional[Sequence[TimeSeries]] = None,
num_samples: int = 1,
start: Optional[Union[pd.Timestamp, float, int]] = None,
+ start_format: Literal["position", "value"] = "value",
forecast_horizon: int = 1,
stride: int = 1,
overlap_end: bool = False,
@@ -1116,6 +1329,7 @@ Source code for darts.models.forecasting.regression_model
future_covariates=future_covariates,
num_samples=num_samples,
start=start,
+ start_format=start_format,
forecast_horizon=forecast_horizon,
stride=stride,
overlap_end=overlap_end,
@@ -1130,6 +1344,7 @@ Source code for darts.models.forecasting.regression_model
future_covariates=future_covariates,
num_samples=num_samples,
start=start,
+ start_format=start_format,
forecast_horizon=forecast_horizon,
stride=stride,
overlap_end=overlap_end,
@@ -1493,11 +1708,14 @@ Source code for darts.models.forecasting.regression_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
diff --git a/_modules/darts/models/forecasting/rnn_model.html b/_modules/darts/models/forecasting/rnn_model.html
index f5d67a338d..ca984f233c 100644
--- a/_modules/darts/models/forecasting/rnn_model.html
+++ b/_modules/darts/models/forecasting/rnn_model.html
@@ -176,7 +176,10 @@ Source code for darts.models.forecasting.rnn_model
import torch.nn as nn
from darts.logging import get_logger, raise_if_not
-from darts.models.forecasting.pl_forecasting_module import PLDualCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLDualCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import DualCovariatesTorchModel
from darts.timeseries import TimeSeries
from darts.utils.data import DualCovariatesShiftedDataset, TrainingDataset
@@ -253,6 +256,7 @@ Source code for darts.models.forecasting.rnn_model
# The RNN module needs a linear layer V that transforms hidden states into outputs, individually
self.V = nn.Linear(hidden_dim, target_size * nr_params)
+ @io_processor
def forward(
self, x_in: Tuple, h: Optional[torch.Tensor] = None
) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -488,11 +492,14 @@ Source code for darts.models.forecasting.rnn_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -550,16 +557,49 @@ Source code for darts.models.forecasting.rnn_model
show_warnings
whether to show warnings raised from PyTorch Lightning. Useful to detect potential issues of
your forecasting use case. Default: ``False``.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import RNNModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # `training_length` > `input_chunk_length` to mimic inference constraints
+ >>> model = RNNModel(
+ >>> model="RNN",
+ >>> input_chunk_length=6,
+ >>> training_length=18,
+ >>> n_epochs=20,
+ >>> )
+ >>> model.fit(target, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[ 3.18922903],
+ [ 1.17791019],
+ [ 0.39992814],
+ [ 0.13277921],
+ [ 0.02523252],
+ [-0.01829086]])
+
+ .. note::
+ `RNN example notebook <https://unit8co.github.io/darts/examples/04-RNN-examples.html>`_ presents techniques
+ that can be used to improve the forecasts quality compared to this simple usage example.
"""
# create copy of model parameters
model_kwargs = {key: val for key, val in self.model_params.items()}
- if model_kwargs.get("output_chunk_length") is not None:
- logger.warning(
- "ignoring user defined `output_chunk_length`. RNNModel uses a fixed `output_chunk_length=1`."
- )
-
- model_kwargs["output_chunk_length"] = 1
+ for kwarg, default_value in zip(
+ ["output_chunk_length", "use_reversible_instance_norm"], [1, False]
+ ):
+ if model_kwargs.get(kwarg) is not None:
+ logger.warning(
+ f"ignoring user defined `{kwarg}`. RNNModel uses a fixed "
+ f"`{kwarg}={default_value}`."
+ )
+ model_kwargs[kwarg] = default_value
super().__init__(**self._extract_torch_model_params(**model_kwargs))
diff --git a/_modules/darts/models/forecasting/sf_auto_arima.html b/_modules/darts/models/forecasting/sf_auto_arima.html
index 5f8a36afb7..28994949e5 100644
--- a/_modules/darts/models/forecasting/sf_auto_arima.html
+++ b/_modules/darts/models/forecasting/sf_auto_arima.html
@@ -199,15 +199,13 @@ Source code for darts.models.forecasting.sf_auto_arima
It is probabilistic, whereas :class:`AutoARIMA` is not.
We refer to the `statsforecast AutoARIMA documentation
- <https://nixtla.github.io/statsforecast/models.html#arima-methods>`_
- for the documentation of the arguments.
+ <https://nixtla.github.io/statsforecast/src/core/models.html#autoarima>`_
+ for the exhaustive documentation of the arguments.
Parameters
----------
autoarima_args
Positional arguments for ``statsforecasts.models.AutoARIMA``.
- autoarima_kwargs
- Keyword arguments for ``statsforecasts.models.AutoARIMA``.
add_encoders
A large number of future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
@@ -220,23 +218,39 @@ Source code for darts.models.forecasting.sf_auto_arima
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+ autoarima_kwargs
+ Keyword arguments for ``statsforecasts.models.AutoARIMA``.
Examples
--------
- >>> from darts.models import StatsForecastAutoARIMA
>>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import StatsForecastAutoARIMA
+ >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
>>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # define StatsForecastAutoARIMA parameters
>>> model = StatsForecastAutoARIMA(season_length=12)
- >>> model.fit(series[:-36])
- >>> pred = model.predict(36, num_samples=100)
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> pred.values()
+ array([[450.55179949],
+ [415.00597806],
+ [454.61353249],
+ [486.51218795],
+ [504.09229632],
+ [555.06463942]])
"""
super().__init__(add_encoders=add_encoders)
self.model = SFAutoARIMA(*autoarima_args, **autoarima_kwargs)
diff --git a/_modules/darts/models/forecasting/sf_auto_ces.html b/_modules/darts/models/forecasting/sf_auto_ces.html
index 6f736ef2c5..04f9c8ff65 100644
--- a/_modules/darts/models/forecasting/sf_auto_ces.html
+++ b/_modules/darts/models/forecasting/sf_auto_ces.html
@@ -185,8 +185,8 @@ Source code for darts.models.forecasting.sf_auto_ces
<https://onlinelibrary.wiley.com/doi/full/10.1002/nav.22074>
We refer to the `statsforecast AutoCES documentation
- <https://nixtla.github.io/statsforecast/models.html#autoces>`_
- for the documentation of the arguments.
+ <https://nixtla.github.io/statsforecast/src/core/models.html#autoces>`_
+ for the exhaustive documentation of the arguments.
Parameters
----------
@@ -195,16 +195,22 @@ Source code for darts.models.forecasting.sf_auto_ces
autoces_kwargs
Keyword arguments for ``statsforecasts.models.AutoCES``.
- ..
-
Examples
--------
- >>> from darts.models import StatsForecastAutoCES
>>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import StatsForecastAutoCES
>>> series = AirPassengersDataset().load()
- >>> model = StatsForecastAutoCES(season_length=12)
- >>> model.fit(series[:-36])
- >>> pred = model.predict(36, num_samples=100)
+ >>> # define StatsForecastAutoCES parameters
+ >>> model = StatsForecastAutoCES(season_length=12, model="Z")
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[453.03417969],
+ [429.34039307],
+ [488.64471436],
+ [500.28955078],
+ [519.79962158],
+ [586.47503662]])
"""
super().__init__()
self.model = SFAutoCES(*autoces_args, **autoces_kwargs)
diff --git a/_modules/darts/models/forecasting/sf_auto_ets.html b/_modules/darts/models/forecasting/sf_auto_ets.html
index dee22f7d55..b6cb517094 100644
--- a/_modules/darts/models/forecasting/sf_auto_ets.html
+++ b/_modules/darts/models/forecasting/sf_auto_ets.html
@@ -187,7 +187,9 @@ Source code for darts.models.forecasting.sf_auto_ets
[docs]class StatsForecastAutoETS(FutureCovariatesLocalForecastingModel):
- def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs):
+ def __init__(
+ self, *autoets_args, add_encoders: Optional[dict] = None, **autoets_kwargs
+ ):
"""ETS based on `Statsforecasts package
<https://github.com/Nixtla/statsforecast>`_.
@@ -195,8 +197,9 @@ Source code for darts.models.forecasting.sf_auto_ets
but typically requires more time on the first call, because it relies
on Numba and jit compilation.
- This model accepts the same arguments as the `statsforecast ETS
- <https://nixtla.github.io/statsforecast/models.html#ets>`_. package.
+ We refer to the `statsforecast AutoETS documentation
+ <https://nixtla.github.io/statsforecast/src/core/models.html#autoets>`_
+ for the exhaustive documentation of the arguments.
In addition to the StatsForecast implementation, this model can handle future covariates. It does so by first
regressing the series against the future covariates using the :class:'LinearRegressionModel' model and then
@@ -206,19 +209,8 @@ Source code for darts.models.forecasting.sf_auto_ets
Parameters
----------
- season_length
- Number of observations per cycle. Default: 1.
- model
- Three-character string identifying method using the framework
- terminology of Hyndman et al. (2002). Possible values are:
-
- * "A" or "M" for error state,
- * "N", "A" or "Ad" for trend state,
- * "N", "A" or "M" for season state.
-
- For instance, "ANN" means additive error, no trend and no seasonality.
- Furthermore, the character "Z" is a placeholder telling statsforecast
- to search for the best model using AICs. Default: "ZZZ".
+ autoets_args
+ Positional arguments for ``statsforecasts.models.AutoETS``.
add_encoders
A large number of future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
@@ -231,26 +223,42 @@ Source code for darts.models.forecasting.sf_auto_ets
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+ autoets_kwargs
+ Keyword arguments for ``statsforecasts.models.AutoETS``.
Examples
--------
>>> from darts.datasets import AirPassengersDataset
>>> from darts.models import StatsForecastAutoETS
+ >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
>>> series = AirPassengersDataset().load()
+ >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+ >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+ >>> # define StatsForecastAutoETS parameters
>>> model = StatsForecastAutoETS(season_length=12, model="AZZ")
- >>> model.fit(series[:-36])
- >>> pred = model.predict(36)
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> pred.values()
+ array([[441.40323676],
+ [415.09871431],
+ [448.90785391],
+ [491.38584654],
+ [493.11817462],
+ [549.88974472]])
"""
super().__init__(add_encoders=add_encoders)
- self.model = SFAutoETS(*ets_args, **ets_kwargs)
+ self.model = SFAutoETS(*autoets_args, **autoets_kwargs)
self._linreg = None
def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None):
@@ -289,7 +297,7 @@ Source code for darts.models.forecasting.sf_auto_ets
super()._predict(n, future_covariates, num_samples)
forecast_dict = self.model.predict(
h=n,
- level=(one_sigma_rule,), # ask one std for the confidence interval
+ level=[one_sigma_rule], # ask one std for the confidence interval
)
mu_ets, std = unpack_sf_dict(forecast_dict)
diff --git a/_modules/darts/models/forecasting/sf_auto_theta.html b/_modules/darts/models/forecasting/sf_auto_theta.html
index 0c1ca0b039..c6563496b9 100644
--- a/_modules/darts/models/forecasting/sf_auto_theta.html
+++ b/_modules/darts/models/forecasting/sf_auto_theta.html
@@ -193,8 +193,8 @@ Source code for darts.models.forecasting.sf_auto_theta
It is probabilistic, whereas :class:`FourTheta` is not.
We refer to the `statsforecast AutoTheta documentation
- <https://nixtla.github.io/statsforecast/models.html#autotheta>`_
- for the documentation of the arguments.
+ <https://nixtla.github.io/statsforecast/src/core/models.html#autotheta>`_
+ for the exhaustive documentation of the arguments.
Parameters
----------
@@ -203,16 +203,22 @@ Source code for darts.models.forecasting.sf_auto_theta
autotheta_kwargs
Keyword arguments for ``statsforecasts.models.AutoTheta``.
- ..
-
Examples
--------
- >>> from darts.models import StatsForecastAutoTheta
>>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import StatsForecastAutoTheta
>>> series = AirPassengersDataset().load()
+ >>> # define StatsForecastAutoTheta parameters
>>> model = StatsForecastAutoTheta(season_length=12)
- >>> model.fit(series[:-36])
- >>> pred = model.predict(36, num_samples=100)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[442.94078295],
+ [432.22936898],
+ [495.30609727],
+ [482.30625563],
+ [487.49312172],
+ [555.57902659]])
"""
super().__init__()
self.model = SFAutoTheta(*autotheta_args, **autotheta_kwargs)
diff --git a/_modules/darts/models/forecasting/tbats_model.html b/_modules/darts/models/forecasting/tbats_model.html
index 3bdcd67194..075ed79028 100644
--- a/_modules/darts/models/forecasting/tbats_model.html
+++ b/_modules/darts/models/forecasting/tbats_model.html
@@ -340,6 +340,23 @@ Source code for darts.models.forecasting.tbats_model
See https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
random_state
Sets the underlying random seed at model initialization time.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import TBATS # or BATS
+ >>> series = AirPassengersDataset().load()
+ >>> # based on preliminary analysis, the series contains a trend
+ >>> model = TBATS(use_trend=True)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[448.29856017],
+ [439.42215052],
+ [507.73465028],
+ [493.03751671],
+ [498.85885374],
+ [564.64871897]])
"""
super().__init__()
diff --git a/_modules/darts/models/forecasting/tcn_model.html b/_modules/darts/models/forecasting/tcn_model.html
index 515fda873c..879f48e547 100644
--- a/_modules/darts/models/forecasting/tcn_model.html
+++ b/_modules/darts/models/forecasting/tcn_model.html
@@ -178,7 +178,10 @@ Source code for darts.models.forecasting.tcn_model
import torch.nn.functional as F
from darts.logging import get_logger, raise_if_not
-from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLPastCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
from darts.timeseries import TimeSeries
from darts.utils.data import PastCovariatesShiftedDataset
@@ -398,6 +401,7 @@ Source code for darts.models.forecasting.tcn_model
self.res_blocks_list.append(res_block)
self.res_blocks = nn.ModuleList(self.res_blocks_list)
+ @io_processor
def forward(self, x_in: Tuple):
x, _ = x_in
# data is of size (batch_size, input_chunk_length, input_size)
@@ -484,6 +488,9 @@ Source code for darts.models.forecasting.tcn_model
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -524,11 +531,14 @@ Source code for darts.models.forecasting.tcn_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -591,6 +601,37 @@ Source code for darts.models.forecasting.tcn_model
References
----------
.. [1] https://arxiv.org/abs/1803.01271
+ .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import TCNModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # `output_chunk_length` must be strictly smaller than `input_chunk_length`
+ >>> model = TCNModel(
+ >>> input_chunk_length=12,
+ >>> output_chunk_length=6,
+ >>> n_epochs=20,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[-80.48476824],
+ [-80.47896667],
+ [-41.77135603],
+ [-41.76158729],
+ [-41.76854107],
+ [-41.78166819]])
+
+ .. note::
+ `DeepTCN example notebook <https://unit8co.github.io/darts/examples/09-DeepTCN-examples.html>`_ presents
+ techniques that can be used to improve the forecasts quality compared to this simple usage example.
"""
raise_if_not(
diff --git a/_modules/darts/models/forecasting/tft_model.html b/_modules/darts/models/forecasting/tft_model.html
index 11954cf6f4..d37c412de3 100644
--- a/_modules/darts/models/forecasting/tft_model.html
+++ b/_modules/darts/models/forecasting/tft_model.html
@@ -182,7 +182,10 @@ Source code for darts.models.forecasting.tft_model
from darts.logging import get_logger, raise_if, raise_if_not, raise_log
from darts.models.components import glu_variants, layer_norm_variants
from darts.models.components.glu_variants import GLU_FFN
-from darts.models.forecasting.pl_forecasting_module import PLMixedCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLMixedCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.tft_submodels import (
_GateAddNorm,
_GatedResidualNetwork,
@@ -615,6 +618,7 @@ Source code for darts.models.forecasting.tft_model
)
return mask
+ @io_processor
def forward(
self, x_in: Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]
) -> torch.Tensor:
@@ -931,6 +935,9 @@ Source code for darts.models.forecasting.tft_model
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [3]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -971,11 +978,14 @@ Source code for darts.models.forecasting.tft_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -1037,7 +1047,46 @@ Source code for darts.models.forecasting.tft_model
References
----------
.. [1] https://arxiv.org/pdf/1912.09363.pdf
- ..[2] Shazeer, Noam, "GLU Variants Improve Transformer", 2020. arVix https://arxiv.org/abs/2002.05202.
+ .. [2] Shazeer, Noam, "GLU Variants Improve Transformer", 2020. arVix https://arxiv.org/abs/2002.05202.
+ .. [3] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import TFTModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # by default, TFTModel is trained using a `QuantileRegression` making it a probabilistic forecasting model
+ >>> model = TFTModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=5,
+ >>> )
+ >>> # future_covariates are mandatory for `TFTModel`
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> # TFTModel is probabilistic by definition; using `num_samples >> 1` to generate probabilistic forecasts
+ >>> pred = model.predict(6, num_samples=100)
+ >>> # shape : (forecast horizon, components, num_samples)
+ >>> pred.all_values().shape
+ (6, 1, 100)
+ >>> # showing the first 3 samples for each timestamp
+ >>> pred.all_values()[:,:,:3]
+ array([[[-0.06414202, -0.7188093 , 0.52541292]],
+ [[ 0.02928407, -0.40867163, 1.19650033]],
+ [[ 0.77252372, -0.50859694, 0.360166 ]],
+ [[ 0.9586113 , 1.24147138, -0.01625545]],
+ [[ 1.06863863, 0.2987822 , -0.69213369]],
+ [[-0.83076568, -0.25780816, -0.28318784]]])
+
+ .. note::
+ `TFT example notebook <https://unit8co.github.io/darts/examples/13-TFT-examples.html>`_ presents
+ techniques that can be used to improve the forecasts quality compared to this simple usage example.
"""
model_kwargs = {key: val for key, val in self.model_params.items()}
if likelihood is None and loss_fn is None:
diff --git a/_modules/darts/models/forecasting/theta.html b/_modules/darts/models/forecasting/theta.html
index 6122dac78b..0efdf5f293 100644
--- a/_modules/darts/models/forecasting/theta.html
+++ b/_modules/darts/models/forecasting/theta.html
@@ -225,6 +225,23 @@ Source code for darts.models.forecasting.theta
References
----------
.. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf`
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import Theta
+ >>> series = AirPassengersDataset().load()
+ >>> # using the canonical Theta method
+ >>> model = Theta(theta=2)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[442.7256909 ],
+ [433.74381763],
+ [494.54534585],
+ [480.36937856],
+ [481.06675142],
+ [545.80068173]])
"""
super().__init__()
@@ -417,6 +434,22 @@
Source code for darts.models.forecasting.theta
-----
Even though this model is an improvement of :class:`Theta`, it is a naive
implementation of the algorithm, which can potentially be slower.
+
+ Examples
+ --------
+ >>> from darts.datasets import AirPassengersDataset
+ >>> from darts.models import FourTheta
+ >>> series = AirPassengersDataset().load()
+ >>> model = FourTheta(theta=2)
+ >>> model.fit(series)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[443.3949283 ],
+ [434.39769555],
+ [495.28886231],
+ [481.08962991],
+ [481.78610361],
+ [546.61463773]])
"""
super().__init__()
diff --git a/_modules/darts/models/forecasting/tide_model.html b/_modules/darts/models/forecasting/tide_model.html
index 5e3d4b58de..8e44efc1f6 100644
--- a/_modules/darts/models/forecasting/tide_model.html
+++ b/_modules/darts/models/forecasting/tide_model.html
@@ -175,9 +175,11 @@
Source code for darts.models.forecasting.tide_model
import torch
import torch.nn as nn
-from darts.logging import get_logger
-from darts.models.components.layer_norm_variants import RINorm
-from darts.models.forecasting.pl_forecasting_module import PLMixedCovariatesModule
+from darts.logging import get_logger, raise_log
+from darts.models.forecasting.pl_forecasting_module import (
+ PLMixedCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import MixedCovariatesTorchModel
MixedCovariatesTrainTensorType = Tuple[
@@ -242,9 +244,9 @@ Source code for darts.models.forecasting.tide_model
decoder_output_dim: int,
hidden_size: int,
temporal_decoder_hidden: int,
- temporal_width: int,
+ temporal_width_past: int,
+ temporal_width_future: int,
use_layer_norm: bool,
- use_reversible_instance_norm: bool,
dropout: float,
**kwargs,
):
@@ -272,12 +274,12 @@ Source code for darts.models.forecasting.tide_model
The width of the hidden layers in the encoder/decoder Residual Blocks.
temporal_decoder_hidden
The width of the hidden layers in the temporal decoder.
- temporal_width
+ temporal_width_past
+ The width of the past covariate embedding space.
+ temporal_width_future
The width of the future covariate embedding space.
use_layer_norm
Whether to use layer normalization in the Residual Blocks.
- use_reversible_instance_norm
- Whether to use reversible instance normalization.
dropout
Dropout probability
**kwargs
@@ -299,6 +301,7 @@ Source code for darts.models.forecasting.tide_model
self.input_dim = input_dim
self.output_dim = output_dim
+ self.past_cov_dim = input_dim - output_dim - future_cov_dim
self.future_cov_dim = future_cov_dim
self.static_cov_dim = static_cov_dim
self.nr_params = nr_params
@@ -308,30 +311,53 @@ Source code for darts.models.forecasting.tide_model
self.hidden_size = hidden_size
self.temporal_decoder_hidden = temporal_decoder_hidden
self.use_layer_norm = use_layer_norm
- self.use_reversible_instance_norm = use_reversible_instance_norm
self.dropout = dropout
- self.temporal_width = temporal_width
+ self.temporal_width_past = temporal_width_past
+ self.temporal_width_future = temporal_width_future
+
+ # past covariates handling: either feature projection, raw features, or no features
+ self.past_cov_projection = None
+ if self.past_cov_dim and temporal_width_past:
+ # residual block for past covariates feature projection
+ self.past_cov_projection = _ResidualBlock(
+ input_dim=self.past_cov_dim,
+ output_dim=temporal_width_past,
+ hidden_size=hidden_size,
+ use_layer_norm=use_layer_norm,
+ dropout=dropout,
+ )
+ past_covariates_flat_dim = self.input_chunk_length * temporal_width_past
+ elif self.past_cov_dim:
+ # skip projection and use raw features
+ past_covariates_flat_dim = self.input_chunk_length * self.past_cov_dim
+ else:
+ past_covariates_flat_dim = 0
- # residual block for input feature projection
- # this is only needed when covariates are used
- if future_cov_dim:
- self.feature_projection = _ResidualBlock(
+ # future covariates handling: either feature projection, raw features, or no features
+ self.future_cov_projection = None
+ if future_cov_dim and self.temporal_width_future:
+ # residual block for future covariates feature projection
+ self.future_cov_projection = _ResidualBlock(
input_dim=future_cov_dim,
- output_dim=temporal_width,
+ output_dim=temporal_width_future,
hidden_size=hidden_size,
use_layer_norm=use_layer_norm,
dropout=dropout,
)
+ historical_future_covariates_flat_dim = (
+ self.input_chunk_length + self.output_chunk_length
+ ) * temporal_width_future
+ elif future_cov_dim:
+ # skip projection and use raw features
+ historical_future_covariates_flat_dim = (
+ self.input_chunk_length + self.output_chunk_length
+ ) * future_cov_dim
else:
- self.feature_projection = None
+ historical_future_covariates_flat_dim = 0
- # original paper doesn't specify how to use past covariates
- # we assume that they pass them raw to the encoder
- historical_future_covariates_flat_dim = (
- self.input_chunk_length + self.output_chunk_length
- ) * (self.temporal_width if future_cov_dim > 0 else 0)
encoder_dim = (
- self.input_chunk_length * (input_dim - future_cov_dim)
+ self.input_chunk_length * output_dim
+ + past_covariates_flat_dim
+ historical_future_covariates_flat_dim
+ static_cov_dim
)
@@ -379,9 +405,14 @@ Source code for darts.models.forecasting.tide_model
),
)
+ decoder_input_dim = decoder_output_dim * self.nr_params
+ if temporal_width_future and future_cov_dim:
+ decoder_input_dim += temporal_width_future
+ elif future_cov_dim:
+ decoder_input_dim += future_cov_dim
+
self.temporal_decoder = _ResidualBlock(
- input_dim=decoder_output_dim * self.nr_params
- + (temporal_width if future_cov_dim > 0 else 0),
+ input_dim=decoder_input_dim,
output_dim=output_dim * self.nr_params,
hidden_size=temporal_decoder_hidden,
use_layer_norm=use_layer_norm,
@@ -392,11 +423,7 @@ Source code for darts.models.forecasting.tide_model
self.input_chunk_length, self.output_chunk_length * self.nr_params
)
- if self.use_reversible_instance_norm:
- self.rin = RINorm(input_dim=output_dim)
- else:
- self.rin = None
-
+ @io_processor
def forward(
self, x_in: Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]
) -> torch.Tensor:
@@ -417,49 +444,51 @@ Source code for darts.models.forecasting.tide_model
# x_static_covariates has shape (batch_size, static_cov_dim)
x, x_future_covariates, x_static_covariates = x_in
- if self.use_reversible_instance_norm:
- x[:, :, : self.output_dim] = self.rin(x[:, :, : self.output_dim])
-
x_lookback = x[:, :, : self.output_dim]
- # future covariates need to be extracted from x and stacked with historical future covariates
- if self.future_cov_dim > 0:
- x_dynamic_covariates = torch.cat(
+ # future covariates: feature projection or raw features
+ # historical future covariates need to be extracted from x and stacked with part of future covariates
+ if self.future_cov_dim:
+ x_dynamic_future_covariates = torch.cat(
[
- x_future_covariates,
x[
:,
:,
None if self.future_cov_dim == 0 else -self.future_cov_dim :,
],
+ x_future_covariates,
],
dim=1,
)
-
- # project input features across all input time steps
- x_dynamic_covariates_proj = self.feature_projection(x_dynamic_covariates)
-
+ if self.temporal_width_future:
+ # project input features across all input and output time steps
+ x_dynamic_future_covariates = self.future_cov_projection(
+ x_dynamic_future_covariates
+ )
else:
- x_dynamic_covariates = None
- x_dynamic_covariates_proj = None
+ x_dynamic_future_covariates = None
- # extract past covariates, if they exist
- if self.input_dim - self.output_dim - self.future_cov_dim > 0:
- x_past_covariates = x[
+ # past covariates: feature projection or raw features
+ # the past covariates are embedded in `x`
+ if self.past_cov_dim:
+ x_dynamic_past_covariates = x[
:,
:,
- self.output_dim : None
- if self.future_cov_dim == 0
- else -self.future_cov_dim :,
+ self.output_dim : self.output_dim + self.past_cov_dim,
]
+ if self.temporal_width_past:
+ # project input features across all input time steps
+ x_dynamic_past_covariates = self.past_cov_projection(
+ x_dynamic_past_covariates
+ )
else:
- x_past_covariates = None
+ x_dynamic_past_covariates = None
# setup input to encoder
encoded = [
x_lookback,
- x_past_covariates,
- x_dynamic_covariates_proj,
+ x_dynamic_past_covariates,
+ x_dynamic_future_covariates,
x_static_covariates,
]
encoded = [t.flatten(start_dim=1) for t in encoded if t is not None]
@@ -475,7 +504,7 @@ Source code for darts.models.forecasting.tide_model
# stack and temporally decode with future covariate last output steps
temporal_decoder_input = [
decoded,
- x_dynamic_covariates_proj[:, -self.output_chunk_length :, :]
+ x_dynamic_future_covariates[:, -self.output_chunk_length :, :]
if self.future_cov_dim > 0
else None,
]
@@ -495,10 +524,6 @@ Source code for darts.models.forecasting.tide_model
) # skip.view(temporal_decoded.shape)
y = y.view(-1, self.output_chunk_length, self.output_dim, self.nr_params)
-
- if self.use_reversible_instance_norm:
- y = self.rin.inverse(y)
-
return y
@@ -511,10 +536,10 @@ Source code for darts.models.forecasting.tide_model
num_decoder_layers: int = 1,
decoder_output_dim: int = 16,
hidden_size: int = 128,
- temporal_width: int = 4,
+ temporal_width_past: int = 4,
+ temporal_width_future: int = 4,
temporal_decoder_hidden: int = 32,
use_layer_norm: bool = False,
- use_reversible_instance_norm: bool = False,
dropout: float = 0.1,
use_static_covariates: bool = True,
**kwargs,
@@ -550,15 +575,16 @@ Source code for darts.models.forecasting.tide_model
The dimensionality of the output of the decoder.
hidden_size
The width of the layers in the residual blocks of the encoder and decoder.
- temporal_width
- The width of the layers in the future covariate projection residual block.
+ temporal_width_past
+ The width of the layers in the past covariate projection residual block. If `0`,
+ will bypass feature projection and use the raw feature data.
+ temporal_width_future
+ The width of the layers in the future covariate projection residual block. If `0`,
+ will bypass feature projection and use the raw feature data.
temporal_decoder_hidden
The width of the layers in the temporal decoder.
use_layer_norm
Whether to use layer normalization in the residual blocks.
- use_reversible_instance_norm
- Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
- It is only applied to the features of the target series and not the covariates.
dropout
The dropout probability to be used in fully connected layers. This is compatible with Monte Carlo dropout
at inference time for model uncertainty estimation (enabled with ``mc_dropout=True`` at
@@ -588,6 +614,9 @@ Source code for darts.models.forecasting.tide_model
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [2]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -628,11 +657,14 @@ Source code for darts.models.forecasting.tide_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -697,7 +729,44 @@ Source code for darts.models.forecasting.tide_model
http://arxiv.org/abs/2304.08424
.. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import TiDEModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> model = TiDEModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=20
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1008.1667634 ],
+ [ 997.08337201],
+ [1017.72035839],
+ [1005.10790392],
+ [ 998.90537286],
+ [1005.91534452]])
+
+ .. note::
+ `TiDE example notebook <https://unit8co.github.io/darts/examples/18-TiDE-examples.html>`_ presents
+ techniques that can be used to improve the forecasts quality compared to this simple usage example.
"""
+ if temporal_width_past < 0 or temporal_width_future < 0:
+ raise_log(
+ ValueError(
+ "`temporal_width_past` and `temporal_width_future` must be >= 0."
+ ),
+ logger=logger,
+ )
super().__init__(**self._extract_torch_model_params(**self.model_params))
# extract pytorch lightning module kwargs
@@ -707,13 +776,13 @@ Source code for darts.models.forecasting.tide_model
self.num_decoder_layers = num_decoder_layers
self.decoder_output_dim = decoder_output_dim
self.hidden_size = hidden_size
- self.temporal_width = temporal_width
+ self.temporal_width_past = temporal_width_past
+ self.temporal_width_future = temporal_width_future
self.temporal_decoder_hidden = temporal_decoder_hidden
self._considers_static_covariates = use_static_covariates
self.use_layer_norm = use_layer_norm
- self.use_reversible_instance_norm = use_reversible_instance_norm
self.dropout = dropout
def _create_model(
@@ -752,6 +821,18 @@ Source code for darts.models.forecasting.tide_model
nr_params = 1 if self.likelihood is None else self.likelihood.num_parameters
+ past_cov_dim = input_dim - output_dim - future_cov_dim
+ if past_cov_dim and self.temporal_width_past >= past_cov_dim:
+ logger.warning(
+ f"number of `past_covariates` features is <= `temporal_width_past`, leading to feature expansion."
+ f"number of covariates: {past_cov_dim}, `temporal_width_past={self.temporal_width_past}`."
+ )
+ if future_cov_dim and self.temporal_width_future >= future_cov_dim:
+ logger.warning(
+ f"number of `future_covariates` features is <= `temporal_width_future`, leading to feature expansion."
+ f"number of covariates: {future_cov_dim}, `temporal_width_future={self.temporal_width_future}`."
+ )
+
return _TideModule(
input_dim=input_dim,
output_dim=output_dim,
@@ -762,10 +843,10 @@ Source code for darts.models.forecasting.tide_model
num_decoder_layers=self.num_decoder_layers,
decoder_output_dim=self.decoder_output_dim,
hidden_size=self.hidden_size,
- temporal_width=self.temporal_width,
+ temporal_width_past=self.temporal_width_past,
+ temporal_width_future=self.temporal_width_future,
temporal_decoder_hidden=self.temporal_decoder_hidden,
use_layer_norm=self.use_layer_norm,
- use_reversible_instance_norm=self.use_reversible_instance_norm,
dropout=self.dropout,
**self.pl_module_params,
)
diff --git a/_modules/darts/models/forecasting/transformer_model.html b/_modules/darts/models/forecasting/transformer_model.html
index 71b29f0efb..26267b2d80 100644
--- a/_modules/darts/models/forecasting/transformer_model.html
+++ b/_modules/darts/models/forecasting/transformer_model.html
@@ -183,7 +183,10 @@ Source code for darts.models.forecasting.transformer_model
CustomFeedForwardDecoderLayer,
CustomFeedForwardEncoderLayer,
)
-from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
+from darts.models.forecasting.pl_forecasting_module import (
+ PLPastCovariatesModule,
+ io_processor,
+)
from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
logger = get_logger(__name__)
@@ -457,6 +460,7 @@ Source code for darts.models.forecasting.transformer_model
return src, tgt
+ @io_processor
def forward(self, x_in: Tuple):
data, _ = x_in
# Here we create 'src' and 'tgt', the inputs for the encoder and decoder
@@ -572,6 +576,9 @@ Source code for darts.models.forecasting.transformer_model
to using a constant learning rate. Default: ``None``.
lr_scheduler_kwargs
Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
+ use_reversible_instance_norm
+ Whether to use reversible instance normalization `RINorm` against distribution shift as shown in [3]_.
+ It is only applied to the features of the target series and not the covariates.
batch_size
Number of time series (input and output sequences) used in each training pass. Default: ``32``.
n_epochs
@@ -612,11 +619,14 @@ Source code for darts.models.forecasting.transformer_model
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -680,18 +690,49 @@ Source code for darts.models.forecasting.transformer_model
.. [1] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser,
and Illia Polosukhin, "Attention Is All You Need", 2017. In Advances in Neural Information Processing Systems,
pages 6000-6010. https://arxiv.org/abs/1706.03762.
- ..[2] Shazeer, Noam, "GLU Variants Improve Transformer", 2020. arVix https://arxiv.org/abs/2002.05202.
+ .. [2] Shazeer, Noam, "GLU Variants Improve Transformer", 2020. arVix https://arxiv.org/abs/2002.05202.
+ .. [3] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
+ Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
Notes
-----
Disclaimer:
This current implementation is fully functional and can already produce some good predictions. However,
it is still limited in how it uses the Transformer architecture because the `tgt` input of
- `torch.nn.Transformer` is not utlized to its full extent. Currently, we simply pass the last value of the
+ `torch.nn.Transformer` is not utilized to its full extent. Currently, we simply pass the last value of the
`src` input to `tgt`. To get closer to the way the Transformer is usually used in language models, we
should allow the model to consume its own output as part of the `tgt` argument, such that when predicting
sequences of values, the input to the `tgt` argument would grow as outputs of the transformer model would be
added to it. Of course, the training of the model would have to be adapted accordingly.
+
+ Examples
+ --------
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import TransformerModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> model = TransformerModel(
+ >>> input_chunk_length=6,
+ >>> output_chunk_length=6,
+ >>> n_epochs=20
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[5.40498034],
+ [5.36561899],
+ [5.80616883],
+ [6.48695488],
+ [7.63158655],
+ [5.65417736]])
+
+ .. note::
+ `Transformer example notebook <https://unit8co.github.io/darts/examples/06-Transformer-examples.html>`_
+ presents techniques that can be used to improve the forecasts quality compared to this simple usage
+ example.
"""
super().__init__(**self._extract_torch_model_params(**self.model_params))
diff --git a/_modules/darts/models/forecasting/varima.html b/_modules/darts/models/forecasting/varima.html
index 1b50df7b52..6f0e3239d0 100644
--- a/_modules/darts/models/forecasting/varima.html
+++ b/_modules/darts/models/forecasting/varima.html
@@ -230,14 +230,39 @@ Source code for darts.models.forecasting.varima
<
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'future': ['relative']},
- 'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'future': [encode_year]},
'transformer': Scaler()
}
..
+
+ Examples
+ --------
+ >>> from darts.datasets import ETTh2Dataset
+ >>> from darts.models import VARIMA
+ >>> from darts.utils.timeseries_generation import holidays_timeseries
+ >>> # forecasting the High UseFul Load ("HUFL") and Oil Temperature ("OT")
+ >>> series = ETTh2Dataset().load()[:500][["HUFL", "OT"]]
+ >>> # optionally, use some future covariates; e.g. encode each timestep whether it is on a holiday
+ >>> future_cov = holidays_timeseries(series.time_index, "CN", add_length=6)
+ >>> # no clear trend in the dataset
+ >>> model = VARIMA(trend="n")
+ >>> model.fit(series, future_covariates=future_cov)
+ >>> pred = model.predict(6, future_covariates=future_cov)
+ >>> # the two targets are predicted together
+ >>> pred.values()
+ array([[48.11846185, 47.94272629],
+ [49.85314633, 47.97713346],
+ [51.16145791, 47.99804203],
+ [52.14674087, 48.00872598],
+ [52.88729152, 48.01166578],
+ [53.44242919, 48.00874069]])
"""
super().__init__(add_encoders=add_encoders)
self.p = p
diff --git a/_modules/darts/models/forecasting/xgboost.html b/_modules/darts/models/forecasting/xgboost.html
index 46d0d5c4e6..90469db6ba 100644
--- a/_modules/darts/models/forecasting/xgboost.html
+++ b/_modules/darts/models/forecasting/xgboost.html
@@ -175,13 +175,18 @@ Source code for darts.models.forecasting.xgboost
"""
from functools import partial
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
import numpy as np
import xgboost as xgb
from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin
+from darts.models.forecasting.regression_model import (
+ FUTURE_LAGS_TYPE,
+ LAGS_TYPE,
+ RegressionModel,
+ _LikelihoodMixin,
+)
from darts.timeseries import TimeSeries
from darts.utils.utils import raise_if_not
@@ -210,13 +215,13 @@ Source code for darts.models.forecasting.xgboost
[docs]class XGBModel(RegressionModel, _LikelihoodMixin):
def __init__(
self,
- lags: Union[int, list] = None,
- lags_past_covariates: Union[int, List[int]] = None,
- lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+ lags: Optional[LAGS_TYPE] = None,
+ lags_past_covariates: Optional[LAGS_TYPE] = None,
+ lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
- likelihood: str = None,
- quantiles: List[float] = None,
+ likelihood: Optional[str] = None,
+ quantiles: Optional[List[float]] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
use_static_covariates: bool = True,
@@ -227,17 +232,33 @@ Source code for darts.models.forecasting.xgboost
Parameters
----------
lags
- Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
- are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+ Lagged target `series` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `series` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_past_covariates
- Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
- `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
- with lags < 0 is required.
+ Lagged `past_covariates` values used to predict the next time step/s.
+ If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+ where `0` corresponds to the first predicted time step of each sample.
+ If a list of integers, each value must be < 0. Uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (integer or list of integers). The
+ key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
lags_future_covariates
- Number of lagged future_covariates values used to predict the next time step. If a tuple (past, future) is
- given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
- `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
- of integers with lags is required.
+ Lagged `future_covariates` values used to predict the next time step/s.
+ If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+ future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+ corresponds the first predicted time step of each sample.
+ If a list of integers, uses only the specified values as lags.
+ If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+ using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+ 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+ components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -254,11 +275,14 @@ Source code for darts.models.forecasting.xgboost
.. highlight:: python
.. code-block:: python
+ def encode_year(idx):
+ return (idx.year - 1950) / 50
+
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
- 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
+ 'custom': {'past': [encode_year]},
'transformer': Scaler()
}
..
@@ -279,6 +303,36 @@ Source code for darts.models.forecasting.xgboost
that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
**kwargs
Additional keyword arguments passed to `xgb.XGBRegressor`.
+
+ Examples
+ --------
+ Deterministic forecasting, using past/future covariates (optional)
+ >>> from darts.datasets import WeatherDataset
+ >>> from darts.models import XGBModel
+ >>> series = WeatherDataset().load()
+ >>> # predicting atmospheric pressure
+ >>> target = series['p (mbar)'][:100]
+ >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+ >>> past_cov = series['rain (mm)'][:100]
+ >>> # optionally, use future temperatures (pretending this component is a forecast)
+ >>> future_cov = series['T (degC)'][:106]
+ >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+ >>> # values corresponding to the forecasted period
+ >>> model = XGBModel(
+ >>> lags=12,
+ >>> lags_past_covariates=12,
+ >>> lags_future_covariates=[0,1,2,3,4,5],
+ >>> output_chunk_length=6,
+ >>> )
+ >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+ >>> pred = model.predict(6)
+ >>> pred.values()
+ array([[1005.9185 ],
+ [1005.8315 ],
+ [1005.7878 ],
+ [1005.72626],
+ [1005.7475 ],
+ [1005.76074]])
"""
kwargs["random_state"] = random_state # seed for tree learner
self.kwargs = kwargs
diff --git a/_modules/darts/timeseries.html b/_modules/darts/timeseries.html
index 76389740d9..f01e2d71df 100644
--- a/_modules/darts/timeseries.html
+++ b/_modules/darts/timeseries.html
@@ -188,9 +188,9 @@ Source code for darts.timeseries
- Have a monotonically increasing time index, without holes (without missing dates)
- Contain numeric types only
- Have distinct components/columns names
- - Have a well defined frequency (
- `date offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
- for ``DateTimeIndex``, and step size for ``RangeIndex``)
+ - Have a well defined frequency (`date offset aliases
+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
+ for ``DateTimeIndex``, or step size for ``RangeIndex``)
- Have static covariates consistent with their components, or no static covariates
- Have a hierarchy consistent with their components, or no hierarchy
@@ -383,7 +383,7 @@ Source code for darts.timeseries
logger,
)
else:
- self._freq = self._time_index.step
+ self._freq: int = self._time_index.step
self._freq_str = None
# check static covariates
@@ -877,6 +877,9 @@ Source code for darts.timeseries
if not time_index.name:
time_index.name = time_col if time_col else DIMS[0]
+ if series_df.columns.name:
+ series_df.columns.name = None
+
xa = xr.DataArray(
series_df.values[:, :, np.newaxis],
dims=(time_index.name,) + DIMS[-2:],
@@ -2267,7 +2270,7 @@ Source code for darts.timeseries
)
raise_if_not(
0 <= point_index < len(self),
- "point (int) should be a valid index in series",
+ f"The index corresponding to the provided point ({point}) should be a valid index in series",
logger,
)
elif isinstance(point, pd.Timestamp):
@@ -2306,8 +2309,8 @@ Source code for darts.timeseries
This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`.
In case of a `float`, the parameter will be treated as the proportion of the time series
that should lie before the point.
- In the case of `int`, the parameter will be treated as an integer index to the time index of
- `series`. Will raise a ValueError if not a valid index in `series`
+ In case of `int`, the parameter will be treated as an integer index to the time index of
+ `series`. Will raise a ValueError if not a valid index in `series`.
In case of a `pandas.Timestamp`, point will be returned as is provided that the timestamp
is present in the series time index, otherwise will raise a ValueError.
"""
@@ -2734,7 +2737,7 @@ Source code for darts.timeseries
Returns
-------
TimeSeries
- A TimeSeries constructed after differencing.
+ A new TimeSeries, with the differenced values.
"""
if not isinstance(n, int) or n < 1:
raise_log(ValueError("'n' must be a positive integer >= 1."), logger)
@@ -2761,6 +2764,17 @@ Source code for darts.timeseries
new_xa = _compute_diff(new_xa)
return self.__class__(new_xa)
+[docs] def cumsum(self) -> Self:
+ """
+ Returns the cumulative sum of the time series along the time axis.
+
+ Returns
+ -------
+ TimeSeries
+ A new TimeSeries, with the cumulatively summed values.
+ """
+ return self.__class__(self._xa.copy().cumsum(axis=0))
+
[docs] def has_same_time_as(self, other: "TimeSeries") -> bool:
"""
Checks whether this series has the same time index as `other`.
@@ -5330,7 +5344,7 @@ Source code for darts.timeseries
if not consecutive_time_axes:
raise_if_not(
ignore_time_axis,
- "When concatenating over time axis, all series need to be contiguous"
+ "When concatenating over time axis, all series need to be contiguous "
"in the time dimension. Use `ignore_time_axis=True` to override "
"this behavior and concatenate the series by extending the time axis "
"of the first series.",
diff --git a/_modules/darts/utils/data/tabularization.html b/_modules/darts/utils/data/tabularization.html
index c0b326400d..6cdd25074b 100644
--- a/_modules/darts/utils/data/tabularization.html
+++ b/_modules/darts/utils/data/tabularization.html
@@ -168,13 +168,15 @@ Source code for darts.utils.data.tabularization
<
import warnings
from functools import reduce
from math import inf
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
+from itertools import chain
+
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import as_strided
@@ -192,9 +194,9 @@ Source code for darts.utils.data.tabularization
<
target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
- lags: Optional[Sequence[int]] = None,
- lags_past_covariates: Optional[Sequence[int]] = None,
- lags_future_covariates: Optional[Sequence[int]] = None,
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
output_chunk_length: int = 1,
uses_static_covariates: bool = True,
last_static_covariates_shape: Optional[Tuple[int, int]] = None,
@@ -319,15 +321,18 @@ Source code for darts.utils.data.tabularization
<
Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
`lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
- the current value).
+ the current value). If the lags are provided as a dictionary, the lags values are specific to each
+ component in the target series.
lags_past_covariates
Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
- be less than or equal to -1.
+ be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+ component in the past covariates series.
lags_future_covariates
Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and
`lags_past_covariates`, `lags_future_covariates` values can be positive (i.e. use values *after* time `t`
to predict target at time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or
- negative (i.e. use values *before* time `t` to predict target at time `t`).
+ negative (i.e. use values *before* time `t` to predict target at time `t`). If the lags are provided as
+ a dictionary, the lags values are specific to each component in the future covariates series.
uses_static_covariates
Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
have identical shapes across all target series.
@@ -496,9 +501,9 @@ Source code for darts.utils.data.tabularization
<
output_chunk_length: int,
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
- lags: Optional[Sequence[int]] = None,
- lags_past_covariates: Optional[Sequence[int]] = None,
- lags_future_covariates: Optional[Sequence[int]] = None,
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
uses_static_covariates: bool = True,
last_static_covariates_shape: Optional[Tuple[int, int]] = None,
max_samples_per_ts: Optional[int] = None,
@@ -537,15 +542,18 @@ Source code for darts.utils.data.tabularization
<
Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
`lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
- the current value).
+ the current value). If the lags are provided as a dictionary, the lags values are specific to each
+ component in the target series.
lags_past_covariates
Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
- be less than or equal to -1.
+ be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+ component in the past covariates series.
lags_future_covariates
Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and `lags_past_covariates`,
`lags_future_covariates` values can be positive (i.e. use values *after* time `t` to predict target at
time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or negative (i.e. use values
- *before* time `t` to predict target at time `t`).
+ *before* time `t` to predict target at time `t`). If the lags are provided as a dictionary, the lags values
+ are specific to each component in the future covariates series.
uses_static_covariates
Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
have identical shapes across all target series.
@@ -634,9 +642,9 @@ Source code for darts.utils.data.tabularization
<
target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
- lags: Optional[Sequence[int]] = None,
- lags_past_covariates: Optional[Sequence[int]] = None,
- lags_future_covariates: Optional[Sequence[int]] = None,
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
uses_static_covariates: bool = True,
last_static_covariates_shape: Optional[Tuple[int, int]] = None,
max_samples_per_ts: Optional[int] = None,
@@ -667,15 +675,18 @@ Source code for darts.utils.data.tabularization
<
Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
`lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
- the current value).
+ the current value). If the lags are provided as a dictionary, the lags values are specific to each
+ component in the target series.
lags_past_covariates
Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
- be less than or equal to -1.
+ be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+ component in the past covariates series.
lags_future_covariates
Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and `lags_past_covariates`,
`lags_future_covariates` values can be positive (i.e. use values *after* time `t` to predict target at
time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or negative (i.e. use
- values *before* time `t` to predict target at time `t`).
+ values *before* time `t` to predict target at time `t`). If the lags are provided as a dictionary, the lags
+ values are specific to each component in the future covariates series.
uses_static_covariates
Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
have identical shapes across all target series.
@@ -749,11 +760,11 @@ Source code for darts.utils.data.tabularization
<
[docs]def add_static_covariates_to_lagged_data(
- features: Union[np.array, Sequence[np.array]],
+ features: Union[np.ndarray, Sequence[np.ndarray]],
target_series: Union[TimeSeries, Sequence[TimeSeries]],
uses_static_covariates: bool = True,
last_shape: Optional[Tuple[int, int]] = None,
-) -> Union[np.array, Sequence[np.array]]:
+) -> Union[np.ndarray, Sequence[np.ndarray]]:
"""
Add static covariates to the features' table for RegressionModels.
If `uses_static_covariates=True`, all target series used in `fit()` and `predict()` must have static
@@ -843,9 +854,9 @@ Source code for darts.utils.data.tabularization
<
target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
- lags: Optional[Sequence[int]] = None,
- lags_past_covariates: Optional[Sequence[int]] = None,
- lags_future_covariates: Optional[Sequence[int]] = None,
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
output_chunk_length: int = 1,
concatenate: bool = True,
use_static_covariates: bool = False,
@@ -854,11 +865,16 @@ Source code for darts.utils.data.tabularization
<
Helper function called to retrieve the name of the features and labels arrays created with
`create_lagged_data()`. The order of the features is the following:
- Along the `n_lagged_features` axis, `X` has the following structure (for `*_lags=[-2,-1]` and
- `*_series.n_components = 2`):
+ Along the `n_lagged_features` axis, `X` has the following structure:
lagged_target | lagged_past_covariates | lagged_future_covariates | static covariates
- where each `lagged_*` has the following structure:
+
+ For `*_lags=[-2,-1]` and `*_series.n_components = 2` (lags shared across all the components),
+ each `lagged_*` has the following structure (grouped by lags):
comp0_*_lag-2 | comp1_*_lag-2 | comp0_*_lag_-1 | comp1_*_lag-1
+ For `*_lags={'comp0':[-2, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component-
+ specific lags), each `lagged_*` has the following structure (grouped by components):
+ comp0_*_lag-2 | comp0_*_lag-1 | comp1_*_lag_-5 | comp1_*_lag-3
+
and for static covariates (2 static covariates acting on 2 target components):
cov0_*_target_comp0 | cov0_*_target_comp1 | cov1_*_target_comp0 | cov1_*_target_comp1
@@ -910,11 +926,17 @@ Source code for darts.utils.data.tabularization
<
continue
components = get_single_series(variate).components.tolist()
- lagged_feature_names += [
- f"{name}_{variate_type}_lag{lag}"
- for lag in variate_lags
- for name in components
- ]
+ if isinstance(variate_lags, dict):
+ for name in components:
+ lagged_feature_names += [
+ f"{name}_{variate_type}_lag{lag}" for lag in variate_lags[name]
+ ]
+ else:
+ lagged_feature_names += [
+ f"{name}_{variate_type}_lag{lag}"
+ for lag in variate_lags
+ for name in components
+ ]
if variate_type == "target" and lags:
label_feature_names = [
@@ -941,13 +963,13 @@ Source code for darts.utils.data.tabularization
<
def _create_lagged_data_by_moving_window(
- target_series: TimeSeries,
+ target_series: Optional[TimeSeries],
output_chunk_length: int,
past_covariates: Optional[TimeSeries],
future_covariates: Optional[TimeSeries],
- lags: Optional[Sequence[int]],
- lags_past_covariates: Optional[Sequence[int]],
- lags_future_covariates: Optional[Sequence[int]],
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
max_samples_per_ts: Optional[int],
multi_models: bool,
check_inputs: bool,
@@ -1058,7 +1080,14 @@ Source code for darts.utils.data.tabularization
<
# Within each window, the `-1` indexed value (i.e. the value at the very end of
# the window) corresponds to time `t - min_lag_i`. The negative index of the time
# `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`:
- lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
+ if isinstance(lags_i, list):
+ lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
+ else:
+ # Lags are grouped by component, extracted from the same window
+ lags_to_extract = [
+ np.array(comp_lags, dtype=int) + min_lag_i - 1
+ for comp_lags in lags_i.values()
+ ]
lagged_vals = _extract_lagged_vals_from_windows(windows, lags_to_extract)
X.append(lagged_vals)
# Cache `start_time_idx` for label creation:
@@ -1095,7 +1124,8 @@ Source code for darts.utils.data.tabularization
<
def _extract_lagged_vals_from_windows(
- windows: np.ndarray, lags_to_extract: Optional[np.ndarray] = None
+ windows: np.ndarray,
+ lags_to_extract: Optional[Union[np.ndarray, List[np.ndarray]]] = None,
) -> np.ndarray:
"""
Helper function called by `_create_lagged_data_by_moving_window` that
@@ -1105,19 +1135,34 @@ Source code for darts.utils.data.tabularization
<
is done such that the order of elements along axis 1 matches the pattern
described in the docstring of `create_lagged_data`.
- If `lags_to_extract` is specified, then only those values within each window that
+ If `lags_to_extract` is not specified, all of the values within each window is extracted.
+ If `lags_to_extract` is specified as an np.ndarray, then only those values within each window that
are indexed by `lags_to_extract` will be returned. In such cases, the shape of the returned
lagged values is `(num_windows, num_components * lags_to_extract.size, num_series)`. For example,
if `lags_to_extract = [-2]`, only the second-to-last values within each window will be extracted.
- If `lags_to_extract` is not specified, all of the values within each window is extracted.
+ If `lags_to_extract` is specified as a list of np.ndarray, the values will be extracted using the
+ lags provided for each component. In such cases, the shape of the returned lagged values is
+ `(num_windows, sum([comp_lags.size for comp_lags in lags_to_extract]), num_series)`. For example,
+ if `lags_to_extract = [[-2, -1], [-1]]`, the second-to-last and last values of the first component
+ and the last values of the second component within each window will be extracted.
"""
# windows.shape = (num_windows, num_components, num_samples, window_len):
- if lags_to_extract is not None:
- windows = windows[:, :, :, lags_to_extract]
- # windows.shape = (num_windows, window_len, num_components, num_samples):
- windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3))
- # lagged_vals.shape = (num_windows, num_components*window_len, num_samples):
- lagged_vals = windows.reshape((windows.shape[0], -1, windows.shape[-1]))
+ if isinstance(lags_to_extract, list):
+ # iterate over the components-specific lags
+ comp_windows = [
+ windows[:, i, :, comp_lags_to_extract]
+ for i, comp_lags_to_extract in enumerate(lags_to_extract)
+ ]
+ # windows.shape = (sum(lags_len) across components, num_windows, num_samples):
+ windows = np.concatenate(comp_windows, axis=0)
+ lagged_vals = np.moveaxis(windows, (1, 0, 2), (0, 1, 2))
+ else:
+ if lags_to_extract is not None:
+ windows = windows[:, :, :, lags_to_extract]
+ # windows.shape = (num_windows, window_len, num_components, num_samples):
+ windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3))
+ # lagged_vals.shape = (num_windows, num_components*window_len, num_samples):
+ lagged_vals = windows.reshape((windows.shape[0], -1, windows.shape[-1]))
return lagged_vals
@@ -1248,9 +1293,9 @@ Source code for darts.utils.data.tabularization
<
target_series: Optional[TimeSeries] = None,
past_covariates: Optional[TimeSeries] = None,
future_covariates: Optional[TimeSeries] = None,
- lags: Optional[Sequence[int]] = None,
- lags_past_covariates: Optional[Sequence[int]] = None,
- lags_future_covariates: Optional[Sequence[int]] = None,
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
output_chunk_length: int = 1,
is_training: bool = True,
return_min_and_max_lags: bool = False,
@@ -1365,6 +1410,9 @@ Source code for darts.utils.data.tabularization
<
Optionally, specifies whether the largest magnitude lag value for each series should also be returned along with
the 'eligible' feature times
+ Note: if the lags are provided as a dictionary for the target series or any of the covariates series, the
+ component-specific lags are grouped into a single list to compute the corresponding feature time.
+
Returns
-------
feature_times
@@ -1410,6 +1458,10 @@ Source code for darts.utils.data.tabularization
<
[target_series, past_covariates, future_covariates],
[lags, lags_past_covariates, lags_future_covariates],
):
+ # union of the component-specific lags, unsorted
+ if isinstance(lags_i, dict):
+ lags_i = list(set(chain(*lags_i.values())))
+
if check_inputs and (series_i is not None):
_check_series_length(
series_i,
@@ -1758,9 +1810,9 @@ Source code for darts.utils.data.tabularization
<
def _check_lags(
- lags: Sequence[int],
- lags_past_covariates: Sequence[int],
- lags_future_covariates: Sequence[int],
+ lags: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+ lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+ lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
) -> None:
"""
Throws `ValueError` if any `lag` values aren't negative OR if no lags have been specified.
@@ -1773,9 +1825,13 @@ Source code for darts.utils.data.tabularization
<
if not lags_is_none[-1]:
is_target_or_past = i < 2
max_lag = -1 if is_target_or_past else inf
+
+ if isinstance(lags_i, dict):
+ lags_i = list(set(chain(*lags_i.values())))
+
raise_if(
any((lag > max_lag or not isinstance(lag, int)) for lag in lags_i),
- f"`lags{suffix}` must be a `Sequence` containing only `int` values less than {max_lag + 1}.",
+ f"`lags{suffix}` must be a `Sequence` or `Dict` containing only `int` values less than {max_lag + 1}.",
)
raise_if(
all(lags_is_none),
diff --git a/_modules/darts/utils/likelihood_models.html b/_modules/darts/utils/likelihood_models.html
index b38e1386bf..16d8045bc8 100644
--- a/_modules/darts/utils/likelihood_models.html
+++ b/_modules/darts/utils/likelihood_models.html
@@ -198,6 +198,7 @@ Source code for darts.utils.likelihood_models
"""
import collections.abc
+import inspect
from abc import ABC, abstractmethod
from typing import List, Optional, Tuple, Union
@@ -397,7 +398,21 @@ Source code for darts.utils.likelihood_models
}