diff --git a/CHANGELOG.md b/CHANGELOG.md index 37d3f2cf46..3cc71f2009 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co - Moved functions `retain_period_common_to_all()`, `series2seq()`, `seq2series()`, `get_single_series()` from `darts.utils.utils` to `darts.utils.ts_utils`. - Improvements to `ForecastingModel`: [#2269](https://github.com/unit8co/darts/pull/2269) by [Felix Divo](https://github.com/felixdivo). - Renamed the private `_is_probabilistic` property to a public `supports_probabilistic_prediction`. +- Improvements to `RegressionModel`: [#2320](https://github.com/unit8co/darts/pull/2320) by [Felix Divo](https://github.com/felixdivo). + - Added a progress bar when performing optimized historical forecasts (`retrain=False` and no autoregression) to display the series-level progress. - Improvements to `DataTransformer`: [#2267](https://github.com/unit8co/darts/pull/2267) by [Alicja Krzeminska-Sciga](https://github.com/alicjakrzeminska). - `InvertibleDataTransformer` now supports parallelized inverse transformation for `series` being a list of lists of `TimeSeries` (`Sequence[Sequence[TimeSeries]]`). This `series` type represents for example the output from `historical_forecasts()` when using multiple series. - New method `TorchForecastingModel.scale_batch_size()` that helps to find batch size automatically. [#2318](https://github.com/unit8co/darts/pull/2318) by [Bohdan Bilonoh](https://github.com/BohdanBilonoh) @@ -94,6 +96,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Fixed** - Fixed a bug in `quantile_loss`, where the loss was computed on all samples rather than only on the predicted quantiles. [#2284](https://github.com/unit8co/darts/pull/2284) by [Dennis Bader](https://github.com/dennisbader). - Fixed type hint warning "Unexpected argument" when calling `historical_forecasts()` caused by the `_with_sanity_checks` decorator. The type hinting is now properly configured to expect any input arguments and return the output type of the method for which the sanity checks are performed for. [#2286](https://github.com/unit8co/darts/pull/2286) by [Dennis Bader](https://github.com/dennisbader). +- Fixed the order of the features when using component-wise lags so that they are grouped by values, then by components (before, were grouped by components, then by values). [#2272](https://github.com/unit8co/darts/pull/2272) by [Antoine Madrona](https://github.com/madtoinou). - Fixed a segmentation fault that some users were facing when importing a `LightGBMModel`. [#2304](https://github.com/unit8co/darts/pull/2304) by [Dennis Bader](https://github.com/dennisbader). - Fixed a bug when using a dropout with a `TorchForecasting` and pytorch lightning versions >= 2.2.0, where the dropout was not properly activated during training. [#2312](https://github.com/unit8co/darts/pull/2312) by [Dennis Bader](https://github.com/dennisbader). diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index dd862db6b6..3bfd45b439 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -43,7 +43,7 @@ from darts.models.forecasting.forecasting_model import GlobalForecastingModel from darts.timeseries import TimeSeries from darts.utils.data.tabularization import ( - add_static_covariates_to_lagged_data, + _create_lagged_data_autoregression, create_lagged_component_names, create_lagged_training_data, ) @@ -1019,83 +1019,25 @@ def predict( last_step_shift = t_pred - (n - step) t_pred = n - step - np_X = [] - # retrieve target lags - if "target" in self.lags: - if predictions: - series_matrix = np.concatenate( - [series_matrix, predictions[-1]], axis=1 - ) - # component-wise lags - if "target" in self.component_lags: - tmp_X = [ - series_matrix[ - :, - [lag - (shift + last_step_shift) for lag in comp_lags], - comp_i, - ] - for comp_i, (comp, comp_lags) in enumerate( - self.component_lags["target"].items() - ) - ] - # values are grouped by component - np_X.append( - np.concatenate(tmp_X, axis=1).reshape( - len(series) * num_samples, -1 - ) - ) - else: - # values are grouped by lags - np_X.append( - series_matrix[ - :, - [ - lag - (shift + last_step_shift) - for lag in self.lags["target"] - ], - ].reshape(len(series) * num_samples, -1) - ) - # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+) - for cov_type in ["past", "future"]: - if cov_type in covariate_matrices: - # component-wise lags - if cov_type in self.component_lags: - tmp_X = [ - covariate_matrices[cov_type][ - :, - np.array(comp_lags) - self.lags[cov_type][0] + t_pred, - comp_i, - ] - for comp_i, (comp, comp_lags) in enumerate( - self.component_lags[cov_type].items() - ) - ] - np_X.append( - np.concatenate(tmp_X, axis=1).reshape( - len(series) * num_samples, -1 - ) - ) - else: - np_X.append( - covariate_matrices[cov_type][ - :, relative_cov_lags[cov_type] + t_pred - ].reshape(len(series) * num_samples, -1) - ) - - # concatenate retrieved lags - X = np.concatenate(np_X, axis=1) - # Need to split up `X` into three equally-sized sub-blocks - # corresponding to each timeseries in `series`, so that - # static covariates can be added to each block; valid since - # each block contains same number of observations: - X_blocks = np.split(X, len(series), axis=0) - X_blocks, _ = add_static_covariates_to_lagged_data( - X_blocks, - series, + # concatenate previous iteration forecasts + if "target" in self.lags and predictions: + series_matrix = np.concatenate([series_matrix, predictions[-1]], axis=1) + + # extract and concatenate lags from target and covariates series + X = _create_lagged_data_autoregression( + target_series=series, + t_pred=t_pred, + shift=shift, + last_step_shift=last_step_shift, + series_matrix=series_matrix, + covariate_matrices=covariate_matrices, + lags=self.lags, + component_lags=self.component_lags, + relative_cov_lags=relative_cov_lags, + num_samples=num_samples, uses_static_covariates=self.uses_static_covariates, - last_shape=self._static_covariates_shape, + last_static_covariates_shape=self._static_covariates_shape, ) - X = np.concatenate(X_blocks, axis=0) # X has shape (n_series * n_samples, n_regression_features) prediction = self._predict_and_sample( @@ -1257,6 +1199,7 @@ def _optimized_historical_forecasts( stride=stride, overlap_end=overlap_end, show_warnings=show_warnings, + verbose=verbose, predict_likelihood_parameters=predict_likelihood_parameters, **kwargs, ) @@ -1273,6 +1216,7 @@ def _optimized_historical_forecasts( stride=stride, overlap_end=overlap_end, show_warnings=show_warnings, + verbose=verbose, predict_likelihood_parameters=predict_likelihood_parameters, **kwargs, ) diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 29f3d740ba..307c7eac73 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -157,10 +157,31 @@ class NewCls(cls): return NewCls +xgb_test_params = { + "n_estimators": 1, + "max_depth": 1, + "max_leaves": 1, + "verbose": -1, + "random_state": 42, +} +lgbm_test_params = { + "n_estimators": 1, + "max_depth": 1, + "num_leaves": 2, + "verbosity": -1, + "random_state": 42, +} +cb_test_params = { + "iterations": 1, + "depth": 1, + "verbose": -1, + "random_state": 42, +} + + class TestRegressionModels: np.random.seed(42) - # default regression models models = [ RandomForest, @@ -179,10 +200,16 @@ class TestRegressionModels: LinearRegressionModel, likelihood="poisson", random_state=42 ) PoissonXGBModel = partialclass( - XGBModel, likelihood="poisson", random_state=42, tree_method="exact" + XGBModel, + likelihood="poisson", + tree_method="exact", + **xgb_test_params, ) QuantileXGBModel = partialclass( - XGBModel, likelihood="quantile", random_state=42, tree_method="exact" + XGBModel, + likelihood="quantile", + tree_method="exact", + **xgb_test_params, ) # targets for poisson regression must be positive, so we exclude them for some tests models.extend( @@ -200,8 +227,8 @@ class TestRegressionModels: 1e-13, # RegressionModel 0.8, # QuantileLinearRegressionModel 0.4, # PoissonLinearRegressionModel - 1e-01, # PoissonXGBModel - 0.5, # QuantileXGBModel + 0.75, # PoissonXGBModel + 0.75, # QuantileXGBModel ] multivariate_accuracies = [ 0.3, # RandomForest @@ -209,8 +236,8 @@ class TestRegressionModels: 1e-13, # RegressionModel 0.8, # QuantileLinearRegressionModel 0.4, # PoissonLinearRegressionModel - 0.15, # PoissonXGBModel - 0.4, # QuantileXGBModel + 0.75, # PoissonXGBModel + 0.75, # QuantileXGBModel ] multivariate_multiseries_accuracies = [ 0.05, # RandomForest @@ -218,23 +245,26 @@ class TestRegressionModels: 1e-13, # RegressionModel 0.8, # QuantileLinearRegressionModel 0.4, # PoissonLinearRegressionModel - 1e-01, # PoissonXGBModel - 0.4, # QuantileXGBModel + 0.85, # PoissonXGBModel + 0.65, # QuantileXGBModel ] lgbm_w_categorical_covariates = NotImportedModule if lgbm_available: + RegularLightGBMModel = partialclass(LightGBMModel, **lgbm_test_params) QuantileLightGBMModel = partialclass( LightGBMModel, likelihood="quantile", quantiles=[0.05, 0.5, 0.95], - random_state=42, + **lgbm_test_params, ) PoissonLightGBMModel = partialclass( - LightGBMModel, likelihood="poisson", random_state=42 + LightGBMModel, + likelihood="poisson", + **lgbm_test_params, ) models += [ - LightGBMModel, + RegularLightGBMModel, QuantileLightGBMModel, PoissonLightGBMModel, ] @@ -247,62 +277,67 @@ class TestRegressionModels: categorical_future_covariates=["fut_cov_promo_mechanism"], categorical_past_covariates=["past_cov_cat_dummy"], categorical_static_covariates=["product_id"], + **lgbm_test_params, ) univariate_accuracies += [ - 0.3, # LightGBMModel - 0.5, # QuantileLightGBMModel - 0.4, # PoissonLightGBMModel + 0.75, # LightGBMModel + 0.75, # QuantileLightGBMModel + 0.75, # PoissonLightGBMModel ] multivariate_accuracies += [ - 0.4, # LightGBMModel - 0.4, # QuantileLightGBMModel - 0.4, # PoissonLightGBMModel + 0.7, # LightGBMModel + 0.75, # QuantileLightGBMModel + 0.75, # PoissonLightGBMModel ] multivariate_multiseries_accuracies += [ - 0.05, # LightGBMModel - 0.4, # QuantileLightGBMModel - 0.4, # PoissonLightGBMModel + 0.7, # LightGBMModel + 0.7, # QuantileLightGBMModel + 0.75, # PoissonLightGBMModel ] if cb_available: + RegularCatBoostModel = partialclass( + CatBoostModel, + **cb_test_params, + ) QuantileCatBoostModel = partialclass( CatBoostModel, likelihood="quantile", quantiles=[0.05, 0.5, 0.95], - random_state=42, + **cb_test_params, ) PoissonCatBoostModel = partialclass( CatBoostModel, likelihood="poisson", - random_state=42, + **cb_test_params, ) NormalCatBoostModel = partialclass( CatBoostModel, likelihood="gaussian", - random_state=42, + **cb_test_params, ) models += [ - CatBoostModel, + RegularCatBoostModel, QuantileCatBoostModel, PoissonCatBoostModel, NormalCatBoostModel, ] univariate_accuracies += [ 0.75, # CatBoostModel - 1e-03, # QuantileCatBoostModel - 1e-01, # PoissonCatBoostModel - 1e-05, # NormalCatBoostModel + 0.75, # QuantileCatBoostModel + 0.9, # PoissonCatBoostModel + 0.75, # NormalCatBoostModel ] multivariate_accuracies += [ 0.75, # CatBoostModel - 1e-03, # QuantileCatBoostModel - 0.15, # PoissonCatBoostModel - 1e-05, # NormalCatBoostModel + 0.75, # QuantileCatBoostModel + 0.86, # PoissonCatBoostModel + 0.75, # NormalCatBoostModel ] multivariate_multiseries_accuracies += [ 0.75, # CatBoostModel - 1e-03, # QuantileCatBoostModel - 1e-01, # PoissonCatBoostModel - 1e-03, # NormalCatBoostModel + 0.75, # QuantileCatBoostModel + 1.2, # PoissonCatBoostModel + 0.75, # NormalCatBoostModel ] # dummy feature and target TimeSeries instances @@ -1026,7 +1061,6 @@ def test_models_runnability(self, config): prediction = model_instance.predict(n=1) assert len(prediction) == 1 - @pytest.mark.slow @pytest.mark.parametrize( "config", itertools.product( @@ -1036,10 +1070,14 @@ def test_models_runnability(self, config): def test_fit(self, config): # test fitting both on univariate and multivariate timeseries model, mode, series = config + + series = series[:15] + sine_multivariate1 = self.sine_multivariate1[:15] + # auto-regression but past_covariates does not extend enough in the future with pytest.raises(ValueError): model_instance = model(lags=4, lags_past_covariates=4, multi_models=mode) - model_instance.fit(series=series, past_covariates=self.sine_multivariate1) + model_instance.fit(series=series, past_covariates=sine_multivariate1) model_instance.predict(n=10) # inconsistent number of components in series Sequence[TimeSeries] @@ -1072,19 +1110,19 @@ def test_fit(self, config): assert model_instance.lags.get("past") is None model_instance = model(lags=12, lags_past_covariates=12, multi_models=mode) - model_instance.fit(series=series, past_covariates=self.sine_multivariate1) + model_instance.fit(series=series, past_covariates=sine_multivariate1) assert len(model_instance.lags.get("past")) == 12 model_instance = model( lags=12, lags_future_covariates=(0, 1), multi_models=mode ) - model_instance.fit(series=series, future_covariates=self.sine_multivariate1) + model_instance.fit(series=series, future_covariates=sine_multivariate1) assert len(model_instance.lags.get("future")) == 1 model_instance = model( lags=12, lags_past_covariates=[-1, -4, -6], multi_models=mode ) - model_instance.fit(series=series, past_covariates=self.sine_multivariate1) + model_instance.fit(series=series, past_covariates=sine_multivariate1) assert len(model_instance.lags.get("past")) == 3 model_instance = model( @@ -1095,8 +1133,8 @@ def test_fit(self, config): ) model_instance.fit( series=series, - past_covariates=self.sine_multivariate1, - future_covariates=self.sine_multivariate1, + past_covariates=sine_multivariate1, + future_covariates=sine_multivariate1, ) assert len(model_instance.lags.get("past")) == 3 @@ -1289,11 +1327,11 @@ def test_multioutput_wrapper(self, config): horizon=0, target_dim=1 ) - model_configs = [(XGBModel, {"tree_method": "exact"})] + model_configs = [(XGBModel, dict({"tree_method": "exact"}, **xgb_test_params))] if lgbm_available: - model_configs += [(LightGBMModel, {})] + model_configs += [(LightGBMModel, lgbm_test_params)] if cb_available: - model_configs += [(CatBoostModel, {})] + model_configs += [(CatBoostModel, cb_test_params)] @pytest.mark.parametrize( "config", itertools.product(model_configs, [1, 2], [True, False]) @@ -1991,7 +2029,7 @@ def test_component_specific_lags(self, config): ) # n > output_chunk_length - model.predict( + pred = model.predict( 7, series=series[0] if multiple_series else None, past_covariates=( @@ -2005,6 +2043,11 @@ def test_component_specific_lags(self, config): else None ), ) + # check that lagged features are properly extracted during auto-regression + if multivar_target: + np.testing.assert_array_almost_equal( + tg.sine_timeseries(length=27)[-7:].values(), pred["sine"].values() + ) @pytest.mark.parametrize( "config", @@ -2303,14 +2346,18 @@ def test_output_shift(self, config): @pytest.mark.parametrize( "config", itertools.product( - [RegressionModel, LinearRegressionModel, XGBModel] - + ([LightGBMModel] if lgbm_available else []), + [ + (RegressionModel, {}), + (LinearRegressionModel, {}), + (XGBModel, xgb_test_params), + ] + + ([(LightGBMModel, lgbm_test_params)] if lgbm_available else []), [True, False], [1, 2], ), ) def test_encoders(self, config): - model_cls, mode, ocl = config + (model_cls, model_kwargs), mode, ocl = config max_past_lag = -4 max_future_lag = 4 # target @@ -2353,18 +2400,21 @@ def test_encoders(self, config): add_encoders=encoder_examples["past"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_fc_valid0 = model_cls( lags=2, add_encoders=encoder_examples["future"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_mixed_valid0 = model_cls( lags=2, add_encoders=encoder_examples["mixed"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) # encoders will not generate covariates without lags @@ -2379,12 +2429,14 @@ def test_encoders(self, config): add_encoders=encoder_examples["past"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_fc_valid0 = model_cls( lags_future_covariates=[-1, 0], add_encoders=encoder_examples["future"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_mixed_valid0 = model_cls( lags_past_covariates=[-2, -1], @@ -2392,6 +2444,7 @@ def test_encoders(self, config): add_encoders=encoder_examples["mixed"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) # check that fit/predict works with model internal covariate requirement checks for model in [model_pc_valid0, model_fc_valid0, model_mixed_valid0]: @@ -2406,6 +2459,7 @@ def test_encoders(self, config): add_encoders=encoder_examples["past"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_fc_valid1 = model_cls( lags=2, @@ -2413,6 +2467,7 @@ def test_encoders(self, config): add_encoders=encoder_examples["future"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) model_mixed_valid1 = model_cls( lags=2, @@ -2421,6 +2476,7 @@ def test_encoders(self, config): add_encoders=encoder_examples["mixed"], multi_models=mode, output_chunk_length=ocl, + **model_kwargs, ) for model, ex in zip( @@ -2728,6 +2784,7 @@ def get_model_params(): return { "lags": int(period / 2), "output_chunk_length": int(period / 2), + "verbose": -1, } # test case without using categorical static covariates @@ -2780,6 +2837,7 @@ def get_model_params(): "past_cov_cat_dummy", ], categorical_static_covariates=["product_id"], + **lgbm_test_params, ), LightGBMModel( lags=1, @@ -2789,12 +2847,14 @@ def get_model_params(): "past_cov_cat_dummy", ], categorical_static_covariates=["does_not_exist"], + **lgbm_test_params, ), LightGBMModel( lags=1, lags_past_covariates=1, output_chunk_length=1, categorical_future_covariates=["does_not_exist"], + **lgbm_test_params, ), ] if lgbm_available @@ -3002,8 +3062,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "poisson", - "random_state": 42, "multi_models": True, + **xgb_test_params, }, 0.6, ), @@ -3013,8 +3073,8 @@ class TestProbabilisticRegressionModels: "lags": 2, "likelihood": "quantile", "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9], - "random_state": 42, "multi_models": True, + **xgb_test_params, }, 0.4, ), @@ -3026,8 +3086,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "quantile", - "random_state": 42, "multi_models": True, + **lgbm_test_params, }, 0.4, ), @@ -3037,8 +3097,8 @@ class TestProbabilisticRegressionModels: "lags": 2, "likelihood": "quantile", "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9], - "random_state": 42, "multi_models": True, + **lgbm_test_params, }, 0.4, ), @@ -3047,8 +3107,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "poisson", - "random_state": 42, "multi_models": True, + **lgbm_test_params, }, 0.6, ), @@ -3060,8 +3120,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "quantile", - "random_state": 42, "multi_models": True, + **cb_test_params, }, 0.05, ), @@ -3071,8 +3131,8 @@ class TestProbabilisticRegressionModels: "lags": 2, "likelihood": "quantile", "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9], - "random_state": 42, "multi_models": True, + **cb_test_params, }, 0.05, ), @@ -3081,8 +3141,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "poisson", - "random_state": 42, "multi_models": True, + **cb_test_params, }, 0.6, ), @@ -3091,8 +3151,8 @@ class TestProbabilisticRegressionModels: { "lags": 2, "likelihood": "gaussian", - "random_state": 42, "multi_models": True, + **cb_test_params, }, 0.05, ), @@ -3104,7 +3164,6 @@ class TestProbabilisticRegressionModels: constant_noisy_multivar_ts = constant_noisy_ts.stack(constant_noisy_ts) num_samples = 5 - @pytest.mark.slow @pytest.mark.parametrize( "config", itertools.product(models_cls_kwargs_errs, [True, False]) ) @@ -3126,7 +3185,6 @@ def test_fit_predict_determinism(self, config): pred3 = model.predict(n=10, num_samples=2).values() assert (pred2 != pred3).any() - @pytest.mark.slow @pytest.mark.parametrize( "config", itertools.product(models_cls_kwargs_errs, [True, False]) ) @@ -3141,7 +3199,6 @@ def test_probabilistic_forecast_accuracy_univariate(self, config): self.constant_noisy_ts, ) - @pytest.mark.slow @pytest.mark.parametrize( "config", itertools.product(models_cls_kwargs_errs, [True, False]) ) diff --git a/darts/tests/utils/tabularization/test_create_lagged_training_data.py b/darts/tests/utils/tabularization/test_create_lagged_training_data.py index d43f0699fd..54a5fc9a2f 100644 --- a/darts/tests/utils/tabularization/test_create_lagged_training_data.py +++ b/darts/tests/utils/tabularization/test_create_lagged_training_data.py @@ -1,7 +1,7 @@ import itertools import warnings from itertools import product -from typing import Optional, Sequence +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -15,6 +15,26 @@ create_lagged_training_data, ) from darts.utils.timeseries_generation import linear_timeseries +from darts.utils.utils import generate_index + + +def helper_create_multivariate_linear_timeseries( + n_components: int, components_names: Sequence[str] = None, **kwargs +) -> TimeSeries: + """ + Helper function that creates a `linear_timeseries` with a specified number of + components. To help distinguish each component from one another, `i` is added on + to each value of the `i`th component. Any additional keyword arguments are passed + to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc). + """ + if components_names is None or len(components_names) < n_components: + components_names = [f"lin_ts_{i}" for i in range(n_components)] + timeseries = [] + for i in range(n_components): + # Values of each component is 1 larger than the last: + timeseries_i = linear_timeseries(column_name=components_names[i], **kwargs) + i + timeseries.append(timeseries_i) + return darts_concatenate(timeseries, axis=1) class TestCreateLaggedTrainingData: @@ -40,27 +60,6 @@ class TestCreateLaggedTrainingData: # Helper Functions for Generated Test Cases # - @staticmethod - def create_multivariate_linear_timeseries( - n_components: int, components_names: Sequence[str] = None, **kwargs - ) -> TimeSeries: - """ - Helper function that creates a `linear_timeseries` with a specified number of - components. To help distinguish each component from one another, `i` is added on - to each value of the `i`th component. Any additional keyword arguments are passed - to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc). - """ - timeseries = [] - if components_names is None or len(components_names) < n_components: - components_names = [f"lin_ts_{i}" for i in range(n_components)] - for i in range(n_components): - # Values of each component is 1 larger than the last: - timeseries_i = ( - linear_timeseries(column_name=components_names[i], **kwargs) + i - ) - timeseries.append(timeseries_i) - return darts_concatenate(timeseries, axis=1) - @staticmethod def get_feature_times( target: TimeSeries, @@ -384,7 +383,7 @@ def create_y( timesteps_ahead = ( range(output_chunk_shift, output_chunk_length + output_chunk_shift) if multi_models - else (output_chunk_length + output_chunk_shift - 1,) + else [output_chunk_length + output_chunk_shift - 1] ) y_row = [] for i in timesteps_ahead: @@ -399,17 +398,248 @@ def create_y( y = np.stack(y, axis=0) return y + @staticmethod + def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc): + """Convert lags to the dictionary format, assuming the lags are shared across the components""" + lags_as_dict = dict() + for ts_, lags_, name_ in zip( + [ts_tg, ts_pc, ts_fc], + [lags_tg, lags_pc, lags_fc], + ["target", "past", "future"], + ): + single_ts = ts_[0] if isinstance(ts_, Sequence) else ts_ + if single_ts is None or lags_ is None: + lags_as_dict[name_] = None + # already in dict format + elif isinstance(lags_, dict): + lags_as_dict[name_] = lags_ + # from list + elif isinstance(lags_, list): + lags_as_dict[name_] = {c_name: lags_ for c_name in single_ts.components} + else: + raise ValueError( + f"Lags should be `None`, a list or a dictionary. Received {type(lags_)}." + ) + return lags_as_dict + + def helper_create_expected_lagged_data( + self, + target: Optional[Union[TimeSeries, List[TimeSeries]]], + past: Optional[Union[TimeSeries, List[TimeSeries]]], + future: Optional[Union[TimeSeries, List[TimeSeries]]], + lags: Optional[Union[List[int], Dict[str, List[int]]]], + lags_past: Optional[Union[List[int], Dict[str, List[int]]]], + lags_future: Optional[Union[List[int], Dict[str, List[int]]]], + output_chunk_length: int, + output_chunk_shift: int, + multi_models: bool, + max_samples_per_ts: Optional[int], + ) -> Tuple[np.ndarray, np.ndarray, Any]: + """Helper function to create the X and y arrays by building them block by block (one per covariates).""" + feats_times = self.get_feature_times( + target, + past, + future, + lags, + lags_past, + lags_future, + output_chunk_length, + max_samples_per_ts, + output_chunk_shift, + ) + # Construct `X` by constructing each block, then concatenate these + # blocks together along component axis: + X_target = self.construct_X_block(target, feats_times, lags) + X_past = self.construct_X_block(past, feats_times, lags_past) + X_future = self.construct_X_block(future, feats_times, lags_future) + all_X = (X_target, X_past, X_future) + to_concat = [X for X in all_X if X is not None] + expected_X = np.concatenate(to_concat, axis=1) + expected_y = self.create_y( + target, + feats_times, + output_chunk_length, + multi_models, + output_chunk_shift, + ) + if len(expected_X.shape) == 2: + expected_X = expected_X[:, :, np.newaxis] + if len(expected_y.shape) == 2: + expected_y = expected_y[:, :, np.newaxis] + return expected_X, expected_y, feats_times + + def helper_check_lagged_data( + self, + convert_lags_to_dict: bool, + expected_X: np.ndarray, + expected_y: np.ndarray, + expected_times_x, + expected_times_y, + target: Optional[Union[TimeSeries, List[TimeSeries]]], + past_cov: Optional[Union[TimeSeries, List[TimeSeries]]], + future_cov: Optional[Union[TimeSeries, List[TimeSeries]]], + lags: Optional[Union[List[int], Dict[str, List[int]]]], + lags_past: Optional[Union[List[int], Dict[str, List[int]]]], + lags_future: Optional[Union[List[int], Dict[str, List[int]]]], + output_chunk_length: int, + output_chunk_shift: int, + use_static_covariates: bool, + multi_models: bool, + max_samples_per_ts: Optional[int], + use_moving_windows: bool, + concatenate: bool, + **kwargs, + ): + """Helper function to call the `create_lagged_training_data()` method with lags argument either in the list + format or the dictionary format (automatically convert them when they are identical across components). + + Assertions are different depending on the value of `concatenate` to account for the output shape. + """ + if convert_lags_to_dict: + lags_as_dict = self.convert_lags_to_dict( + target, + past_cov if lags_past else None, + future_cov if lags_future else None, + lags, + lags_past, + lags_future, + ) + lags_ = lags_as_dict["target"] + lags_past_ = lags_as_dict["past"] + lags_future_ = lags_as_dict["future"] + else: + lags_ = lags + lags_past_ = lags_past + lags_future_ = lags_future + + # convert indexes to list of tuples to simplify processing + expected_times_x = ( + expected_times_x + if isinstance(expected_times_x, Sequence) + else [expected_times_x] + ) + expected_times_y = ( + expected_times_y + if isinstance(expected_times_y, Sequence) + else [expected_times_y] + ) + + X, y, times, _ = create_lagged_training_data( + target_series=target, + output_chunk_length=output_chunk_length, + past_covariates=past_cov if lags_past_ else None, + future_covariates=future_cov if lags_future_ else None, + lags=lags_, + lags_past_covariates=lags_past_, + lags_future_covariates=lags_future_, + uses_static_covariates=use_static_covariates, + multi_models=multi_models, + max_samples_per_ts=max_samples_per_ts, + use_moving_windows=use_moving_windows, + output_chunk_shift=output_chunk_shift, + concatenate=concatenate, + ) + # should have the exact same number of indexes + assert len(times) == len(expected_times_x) == len(expected_times_y) + + # Check that time index(es) match: + for time, exp_time in zip(times, expected_times_x): + assert exp_time.equals(time) + + if concatenate: + # Number of observations should match number of feature times: + data_length = sum(len(time) for time in times) + exp_length_x = sum(len(exp_time) for exp_time in expected_times_x) + exp_length_y = sum(len(exp_time) for exp_time in expected_times_y) + assert exp_length_x == exp_length_y + assert X.shape[0] == exp_length_x == data_length + assert y.shape[0] == exp_length_y == data_length + + # Check that outputs match: + assert X.shape == expected_X.shape + assert np.allclose(expected_X, X) + assert y.shape == expected_y.shape + assert np.allclose(expected_y, y) + else: + # Check the number of observation for each series + for x_, exp_time_x, y_, exp_time_y, time in zip( + X, expected_times_x, y, expected_times_y, times + ): + assert x_.shape[0] == len(time) == len(exp_time_x) + assert y_.shape[0] == len(time) == len(exp_time_y) + + # Check that outputs match: + for x_, y_ in zip(X, y): + assert np.allclose(X, x_) + assert np.allclose(y, y_) + # # Generated Test Cases # + target_with_no_cov = helper_create_multivariate_linear_timeseries( + n_components=1, + components_names=["no_static"], + start_value=0, + end_value=10, + start=2, + length=10, + freq=2, + ) + n_comp = 2 + target_with_static_cov = helper_create_multivariate_linear_timeseries( + n_components=n_comp, + components_names=["static_0", "static_1"], + start_value=0, + end_value=10, + start=2, + length=10, + freq=2, + ) + target_with_static_cov = target_with_static_cov.with_static_covariates( + pd.DataFrame({"dummy": [1]}) # leads to "global" static cov component name + ) + target_with_static_cov2 = target_with_static_cov.with_static_covariates( + pd.DataFrame( + {"dummy": [i for i in range(n_comp)]} + ) # leads to sharing target component names + ) + target_with_static_cov3 = target_with_static_cov.with_static_covariates( + pd.DataFrame( + { + "dummy": [i for i in range(n_comp)], + "dummy1": [i for i in range(n_comp)], + } + ) # leads to sharing target component names + ) + + past = helper_create_multivariate_linear_timeseries( + n_components=3, + components_names=["past_0", "past_1", "past_2"], + start_value=10, + end_value=20, + start=2, + length=10, + freq=2, + ) + future = helper_create_multivariate_linear_timeseries( + n_components=4, + components_names=["future_0", "future_1", "future_2", "future_3"], + start_value=20, + end_value=30, + start=2, + length=10, + freq=2, + ) + # Input parameter combinations used to generate test cases: output_chunk_length_combos = (1, 3) output_chunk_shift_combos = (0, 1) multi_models_combos = (False, True) max_samples_per_ts_combos = (1, 2, None) - target_lag_combos = past_lag_combos = (None, [-1, -3], [-3, -1]) - future_lag_combos = (*target_lag_combos, [0], [2, 1], [-1, 1], [0, 2]) + # lags are sorted ascending as done by the models internally + target_lag_combos = past_lag_combos = (None, [-3, -1], [-2, -1]) + future_lag_combos = (*target_lag_combos, [0], [1, 2], [-1, 1], [0, 2]) # minimum series length min_n_ts = 8 + max(output_chunk_shift_combos) @@ -436,7 +666,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): # different start times, different lengths, and different values, but # they're all of the same frequency: if series_type == "integer": - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, @@ -444,7 +674,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): length=self.min_n_ts, freq=2, ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, @@ -452,7 +682,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): length=self.min_n_ts + 1, freq=2, ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, @@ -461,7 +691,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): freq=2, ) else: - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, @@ -469,7 +699,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): length=self.min_n_ts, freq="2d", ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, @@ -477,7 +707,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str): length=self.min_n_ts + 1, freq="2d", ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, @@ -509,55 +739,45 @@ def test_lagged_training_data_equal_freq(self, series_type: str): lags_is_none = [x is None for x in all_lags] if all(lags_is_none): continue - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length, - past_covariates=past if lags_past else None, - future_covariates=future if lags_future else None, - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - multi_models=multi_models, - max_samples_per_ts=max_samples_per_ts, - use_moving_windows=True, - output_chunk_shift=output_chunk_shift, - ) - feats_times = self.get_feature_times( - target, - past, - future, - lags, - lags_past, - lags_future, - output_chunk_length, - max_samples_per_ts, - output_chunk_shift, - ) - # Construct `X` by constructing each block, then concatenate these - # blocks together along component axis: - X_target = self.construct_X_block(target, feats_times, lags) - X_past = self.construct_X_block(past, feats_times, lags_past) - X_future = self.construct_X_block(future, feats_times, lags_future) - all_X = (X_target, X_past, X_future) - to_concat = [X for X in all_X if X is not None] - expected_X = np.concatenate(to_concat, axis=1) - expected_y = self.create_y( - target, - feats_times, - output_chunk_length, - multi_models, - output_chunk_shift, + + expected_X, expected_y, expected_times = ( + self.helper_create_expected_lagged_data( + target, + past, + future, + lags, + lags_past, + lags_future, + output_chunk_length, + output_chunk_shift, + multi_models, + max_samples_per_ts, + ) ) - # Number of observations should match number of feature times: - assert X.shape[0] == len(feats_times) - assert y.shape[0] == len(feats_times) - assert X.shape[0] == len(times[0]) - assert y.shape[0] == len(times[0]) - # Check that outputs match: - assert np.allclose(expected_X, X[:, :, 0]) - assert np.allclose(expected_y, y[:, :, 0]) - assert feats_times.equals(times[0]) + + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": past, + "future_cov": future, + "lags": lags, + "lags_past": lags_past, + "lags_future": lags_future, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": max_samples_per_ts, + "use_moving_windows": True, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) @pytest.mark.parametrize( "series_type", @@ -581,17 +801,17 @@ def test_lagged_training_data_unequal_freq(self, series_type): # different start times, different lengths, different values, and different # frequencies: if series_type == "integer": - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1 ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2 ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3 ) else: - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, @@ -599,7 +819,7 @@ def test_lagged_training_data_unequal_freq(self, series_type): length=20, freq="d", ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, @@ -607,7 +827,7 @@ def test_lagged_training_data_unequal_freq(self, series_type): length=10, freq="2d", ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, @@ -639,55 +859,49 @@ def test_lagged_training_data_unequal_freq(self, series_type): lags_is_none = [x is None for x in all_lags] if all(lags_is_none): continue - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length, - past_covariates=past if lags_past else None, - future_covariates=future if lags_future else None, - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - multi_models=multi_models, - max_samples_per_ts=max_samples_per_ts, - use_moving_windows=False, - output_chunk_shift=output_chunk_shift, + + expected_X, expected_y, expected_times = ( + self.helper_create_expected_lagged_data( + target, + past, + future, + lags, + lags_past, + lags_future, + output_chunk_length, + output_chunk_shift, + multi_models, + max_samples_per_ts, + ) ) - feats_times = self.get_feature_times( - target, - past, - future, - lags, - lags_past, - lags_future, - output_chunk_length, - max_samples_per_ts, - output_chunk_shift, - ) - # Construct `X` by constructing each block, then concatenate these - # blocks together along component axis: - X_target = self.construct_X_block(target, feats_times, lags) - X_past = self.construct_X_block(past, feats_times, lags_past) - X_future = self.construct_X_block(future, feats_times, lags_future) - all_X = (X_target, X_past, X_future) - to_concat = [x for x in all_X if x is not None] - expected_X = np.concatenate(to_concat, axis=1) - expected_y = self.create_y( - target, - feats_times, - output_chunk_length, - multi_models, - output_chunk_shift, + + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": past, + "future_cov": future, + "lags": lags, + "lags_past": lags_past, + "lags_future": lags_future, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": max_samples_per_ts, + "use_moving_windows": False, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" ) - # Number of observations should match number of feature times: - assert X.shape[0] == len(feats_times) - assert y.shape[0] == len(feats_times) - assert X.shape[0] == len(times[0]) - assert y.shape[0] == len(times[0]) - # Check that outputs match: - assert np.allclose(expected_X, X[:, :, 0]) - assert np.allclose(expected_y, y[:, :, 0]) - assert feats_times.equals(times[0]) @pytest.mark.parametrize( "series_type", @@ -708,17 +922,17 @@ def test_lagged_training_data_method_consistency(self, series_type): # different start times, different lengths, different values, and of # different frequencies: if series_type == "integer": - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1 ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2 ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3 ) else: - target = self.create_multivariate_linear_timeseries( + target = helper_create_multivariate_linear_timeseries( n_components=2, start_value=0, end_value=10, @@ -726,7 +940,7 @@ def test_lagged_training_data_method_consistency(self, series_type): end=pd.Timestamp("1/18/2000"), freq="2d", ) - past = self.create_multivariate_linear_timeseries( + past = helper_create_multivariate_linear_timeseries( n_components=3, start_value=10, end_value=20, @@ -734,7 +948,7 @@ def test_lagged_training_data_method_consistency(self, series_type): end=pd.Timestamp("1/20/2000"), freq="2d", ) - future = self.create_multivariate_linear_timeseries( + future = helper_create_multivariate_linear_timeseries( n_components=4, start_value=20, end_value=30, @@ -841,7 +1055,7 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf expected_y = series.all_values(copy=False)[ 3 + output_chunk_shift : 3 + output_chunk_shift + len(expected_times_y), :, - 0, + :, ] # Offset `3:-2` by `-1` lag: expected_X_target = series.all_values(copy=False)[ @@ -855,28 +1069,38 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf ] expected_X = np.concatenate( [expected_X_target, expected_X_past, expected_X_future], axis=1 - ) - X, y, times, _ = create_lagged_training_data( - target_series=series, - output_chunk_length=output_chunk_length, - past_covariates=series, - future_covariates=series, - lags=lags, - lags_past_covariates=past_lags, - lags_future_covariates=future_lags, - uses_static_covariates=False, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - # Number of observations should match number of feature times: - assert X.shape[0] == len(expected_times_x) - assert X.shape[0] == len(times[0]) - assert y.shape[0] == len(expected_times_y) - assert y.shape[0] == len(times[0]) - # Check that outputs match: - assert np.allclose(expected_X, X[:, :, 0]) - assert np.allclose(expected_y, y[:, :, 0]) - assert expected_times_x.equals(times[0]) + )[:, :, np.newaxis] + + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times_x, + "expected_times_y": expected_times_y, + "target": series, + "past_cov": series, + "future_cov": series, + "lags": lags, + "lags_past": past_lags, + "lags_future": future_lags, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": True, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -946,27 +1170,48 @@ def test_lagged_training_data_extend_past_and_future_covariates(self, config): past.all_values(copy=False)[-1 - output_chunk_shift, :, 0], future.all_values(copy=False)[-1 - output_chunk_shift, :, 0], ] - ).reshape(1, -1) + ).reshape(1, -1, 1) # Label is very last value of `target`: - expected_y = target.all_values(copy=False)[-1, :, 0] + expected_y = target.all_values(copy=False)[-1:, :, :] + + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) + # Check correctness for both 'moving window' method # and 'time intersection' method: - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length=1, - past_covariates=past, - future_covariates=future, - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - max_samples_per_ts=max_samples_per_ts, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq - assert np.allclose(expected_X, X[:, :, 0]) - assert np.allclose(expected_y, y[:, :, 0]) + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": past, + "future_cov": future, + "lags": lags, + "lags_past": lags_past, + "lags_future": lags_future, + "output_chunk_length": 1, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": True, + "max_samples_per_ts": max_samples_per_ts, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -998,22 +1243,43 @@ def test_lagged_training_data_single_point(self, config): lags = [-1] expected_X = np.zeros((1, 1, 1)) expected_y = np.ones((1, 1, 1)) + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) # Test correctness for 'moving window' and for 'time intersection' methods, as well # as for different `multi_models` values: - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length, - lags=lags, - uses_static_covariates=False, - multi_models=multi_models, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - assert np.allclose(expected_X, X) - assert np.allclose(expected_y, y) - # Should only have one sample, generated for `t = target.end_time()`: - assert len(times[0]) == 1 - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": None, + "future_cov": None, + "lags": lags, + "lags_past": None, + "lags_future": None, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -1060,25 +1326,45 @@ def test_lagged_training_data_zero_lags(self, config): ) # X comprises of first value of `target` (i.e. 0) and only value in `future`: - expected_X = np.array([0.0, 1.0]).reshape(1, 2, 1) + expected_X = np.array([[[0.0], [1.0]]]) expected_y = np.ones((1, 1, 1)) + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) # Check correctness for 'moving windows' and 'time intersection' methods, as # well as for different `multi_models` values: - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length=1, - future_covariates=future, - lags=[-1], - lags_future_covariates=[0], - uses_static_covariates=False, - multi_models=multi_models, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - assert np.allclose(expected_X, X) - assert np.allclose(expected_y, y) - assert len(times[0]) == 1 - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": None, + "future_cov": future, + "lags": [-1], + "lags_past": None, + "lags_future": [0], + "output_chunk_length": 1, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -1142,23 +1428,43 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config): # X comprises of first value of `target` (i.e. 0) and only value in `future`: expected_X = future[-1].all_values(copy=False) expected_y = target[-1].all_values(copy=False) + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) # Check correctness for 'moving windows' and 'time intersection' methods, as # well as for different `multi_models` values: - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length=1, - future_covariates=future, - lags=None, - lags_future_covariates=[cov_lag], - uses_static_covariates=False, - multi_models=multi_models, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - assert np.allclose(expected_X, X) - assert np.allclose(expected_y, y) - assert len(times[0]) == 1 - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": None, + "future_cov": future, + "lags": None, + "lags_past": None, + "lags_future": [cov_lag], + "output_chunk_length": 1, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -1221,23 +1527,43 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config): # X comprises of first value of `target` (i.e. 0) and only value in `future`: expected_X = past[-1].all_values(copy=False) expected_y = target[-1].all_values(copy=False) + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) # Check correctness for 'moving windows' and 'time intersection' methods, as # well as for different `multi_models` values: - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length=1, - past_covariates=past, - lags=None, - lags_past_covariates=[cov_lag], - uses_static_covariates=False, - multi_models=multi_models, - use_moving_windows=use_moving_windows, - output_chunk_shift=output_chunk_shift, - ) - assert np.allclose(expected_X, X) - assert np.allclose(expected_y, y) - assert len(times[0]) == 1 - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": past, + "future_cov": None, + "lags": None, + "lags_past": [cov_lag], + "lags_future": None, + "output_chunk_length": 1, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) @pytest.mark.parametrize( "config", @@ -1284,25 +1610,184 @@ def test_lagged_training_data_positive_lags(self, config): end_value=2, ) # X comprises of first value of `target` (i.e. 0) and only value in `future`: - expected_X = np.array([0.0, 1.0]).reshape(1, 2, 1) + expected_X = np.array([[[0.0], [1.0]]]) expected_y = np.ones((1, 1, 1)) + expected_times = generate_index( + start=target.end_time() - output_chunk_shift * target.freq, + length=1, + freq=target.freq, + ) # Check correctness for 'moving windows' and 'time intersection' methods, as # well as for different `multi_models` values: - X, y, times, _ = create_lagged_training_data( + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": None, + "future_cov": future, + "lags": [-1], + "lags_past": None, + "lags_future": [1], + "output_chunk_length": 1, + "output_chunk_shift": output_chunk_shift, + "use_static_covariates": False, + "multi_models": multi_models, + "max_samples_per_ts": None, + "use_moving_windows": use_moving_windows, + "concatenate": True, + } + + self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs) + + if use_moving_windows: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + else: + with pytest.raises(ValueError) as err: + self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags" + ) + + @pytest.mark.parametrize( + "config", + itertools.product( + [0, 1, 3], + [1, 2], + [True, False], + ["datetime", "integer"], + ), + ) + def test_lagged_training_data_comp_wise_lags(self, config): + """ + Tests that `create_lagged_training_data` generate the expected values when the + lags are component-specific over multivariate series. + + Note that this is supported only when use_moving_window=True. + """ + output_chunk_shift, output_chunk_length, multi_models, series_type = config + + lags_tg = {"target_0": [-4, -1], "target_1": [-4, -1]} + lags_pc = [-3] + lags_fc = {"future_0": [-1, 0], "future_1": [-2, 1]} + + if series_type == "integer": + start_tg = 0 + start_pc = start_tg + 1 + start_fc = start_tg + 2 + else: + start_tg = pd.Timestamp("2000-01-15") + start_pc = pd.Timestamp("2000-01-16") + start_fc = pd.Timestamp("2000-01-17") + + # length = max lag - min lag + 1 = -1 + 4 + 1 = 4 + target = helper_create_multivariate_linear_timeseries( + n_components=2, + components_names=["target_0", "target_1"], + length=4 + output_chunk_shift + output_chunk_length, + start=start_tg, + ) + # length = max lag - min lag + 1 = -3 + 3 + 1 = 1 + past = ( + helper_create_multivariate_linear_timeseries( + n_components=2, + components_names=["past_0", "past_1"], + length=1, + start=start_pc, + ) + + 100 + ) + # length = max lag - min lag + 1 = 1 + 2 + 1 = 4 + future = ( + helper_create_multivariate_linear_timeseries( + n_components=2, + components_names=["future_0", "future_1"], + length=4 + output_chunk_shift + output_chunk_length, + start=start_fc, + ) + + 200 + ) + + # extremes lags are manually computed, similarly to the model.lags attribute + feats_times = self.get_feature_times( target, - output_chunk_length=1, - future_covariates=future, - lags=[-1], - lags_future_covariates=[1], - uses_static_covariates=False, - multi_models=multi_models, - use_moving_windows=use_moving_windows, + past, + future, + [-4, -1], # min, max target lag + [-3], # unique past lag + [-2, 1], # min, max future lag + output_chunk_length, + None, + output_chunk_shift, + ) + + # reorder the features to obtain target_0_lag-4, target_1_lag-4, target_0_lag-1, target_1_lag-1 + X_target = [ + self.construct_X_block( + target["target_0"], feats_times, lags_tg["target_0"][0:1] + ), + self.construct_X_block( + target["target_1"], feats_times, lags_tg["target_1"][0:1] + ), + self.construct_X_block( + target["target_0"], feats_times, lags_tg["target_0"][1:2] + ), + self.construct_X_block( + target["target_1"], feats_times, lags_tg["target_1"][1:2] + ), + ] + # single lag for all the components, can be kept as is + X_past = [ + self.construct_X_block(past[name], feats_times, lags_pc) + for name in ["past_0", "past_1"] + ] + # reorder the features to obtain future_1_lag-2, future_0_lag-1, future_0_lag0, future_1_lag1 + X_future = [ + self.construct_X_block( + future["future_1"], feats_times, lags_fc["future_1"][0:1] + ), + self.construct_X_block( + future["future_0"], feats_times, lags_fc["future_0"][0:1] + ), + self.construct_X_block( + future["future_0"], feats_times, lags_fc["future_0"][1:2] + ), + self.construct_X_block( + future["future_1"], feats_times, lags_fc["future_1"][1:2] + ), + ] + all_X = X_target + X_past + X_future + expected_X = np.concatenate(all_X, axis=1)[:, :, np.newaxis] + expected_y = self.create_y( + target, + feats_times, + output_chunk_length, + multi_models, + output_chunk_shift, + )[:, :, np.newaxis] + + # lags are already in dict format + self.helper_check_lagged_data( + convert_lags_to_dict=True, + expected_X=expected_X, + expected_y=expected_y, + expected_times_x=feats_times, + expected_times_y=feats_times, + target=target, + past_cov=past, + future_cov=future, + lags=lags_tg, + lags_past=lags_pc, + lags_future=lags_fc, + output_chunk_length=output_chunk_length, output_chunk_shift=output_chunk_shift, + use_static_covariates=False, + multi_models=multi_models, + max_samples_per_ts=None, + use_moving_windows=True, + concatenate=True, ) - assert np.allclose(expected_X, X) - assert np.allclose(expected_y, y) - assert len(times[0]) == 1 - assert times[0][0] == target.end_time() - output_chunk_shift * target.freq def test_lagged_training_data_sequence_inputs(self): """ @@ -1313,6 +1798,9 @@ def test_lagged_training_data_sequence_inputs(self): # Define two simple tabularization problems: target_1 = past_1 = future_1 = linear_timeseries(start=0, end=5) target_2 = past_2 = future_2 = linear_timeseries(start=6, end=11) + ts_tg = (target_1, target_2) + ts_pc = (past_1, past_2) + ts_fc = (future_1, future_2) lags = lags_past = lags_future = [-1] output_chunk_length = 1 # Expected solution: @@ -1328,45 +1816,41 @@ def test_lagged_training_data_sequence_inputs(self): expected_y = np.concatenate([expected_y_1, expected_y_2], axis=0) expected_times_1 = target_1.time_index[1:] expected_times_2 = target_2.time_index[1:] - # Check when `concatenate = True`: - X, y, times, _ = create_lagged_training_data( - (target_1, target_2), - output_chunk_length=output_chunk_length, - past_covariates=(past_1, past_2), - future_covariates=(future_1, future_2), - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - output_chunk_shift=0, + + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": [expected_times_1, expected_times_2], + "expected_times_y": [expected_times_1, expected_times_2], + "target": ts_tg, + "past_cov": ts_pc, + "future_cov": ts_fc, + "lags": lags, + "lags_past": lags_past, + "lags_future": lags_future, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": 0, + "use_static_covariates": False, + "multi_models": True, + "max_samples_per_ts": None, + "use_moving_windows": True, + } + + # concatenate=True + self.helper_check_lagged_data( + convert_lags_to_dict=False, concatenate=True, **kwargs ) - assert np.allclose(X, expected_X) - assert np.allclose(y, expected_y) - assert len(times) == 2 - assert times[0].equals(expected_times_1) - assert times[1].equals(expected_times_2) - # Check when `concatenate = False`: - X, y, times, _ = create_lagged_training_data( - (target_1, target_2), - output_chunk_length=output_chunk_length, - past_covariates=(past_1, past_2), - future_covariates=(future_1, future_2), - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - concatenate=False, - output_chunk_shift=0, + self.helper_check_lagged_data( + convert_lags_to_dict=True, concatenate=True, **kwargs + ) + + # concatenate=False + self.helper_check_lagged_data( + convert_lags_to_dict=False, concatenate=False, **kwargs + ) + self.helper_check_lagged_data( + convert_lags_to_dict=True, concatenate=False, **kwargs ) - assert len(X) == 2 - assert len(y) == 2 - assert np.allclose(X[0], expected_X_1) - assert np.allclose(X[1], expected_X_2) - assert np.allclose(y[0], expected_y_1) - assert np.allclose(y[1], expected_y_2) - assert len(times) == 2 - assert times[0].equals(expected_times_1) - assert times[1].equals(expected_times_2) def test_lagged_training_data_stochastic_series(self): """ @@ -1387,20 +1871,32 @@ def test_lagged_training_data_stochastic_series(self): ) expected_y = target.all_values(copy=False)[1:, :, :] expected_times = target.time_index[1:] - X, y, times, _ = create_lagged_training_data( - target, - output_chunk_length=output_chunk_length, - past_covariates=past, - future_covariates=future, - lags=lags, - lags_past_covariates=lags_past, - lags_future_covariates=lags_future, - uses_static_covariates=False, - output_chunk_shift=0, + + kwargs = { + "expected_X": expected_X, + "expected_y": expected_y, + "expected_times_x": expected_times, + "expected_times_y": expected_times, + "target": target, + "past_cov": past, + "future_cov": future, + "lags": lags, + "lags_past": lags_past, + "lags_future": lags_future, + "output_chunk_length": output_chunk_length, + "output_chunk_shift": 0, + "use_static_covariates": False, + "multi_models": True, + "max_samples_per_ts": None, + "use_moving_windows": True, + } + + self.helper_check_lagged_data( + convert_lags_to_dict=False, concatenate=True, **kwargs + ) + self.helper_check_lagged_data( + convert_lags_to_dict=True, concatenate=True, **kwargs ) - assert np.allclose(X, expected_X) - assert np.allclose(y, expected_y) - assert times[0].equals(expected_times) def test_lagged_training_data_no_shared_times_error(self): """ @@ -1622,6 +2118,46 @@ def test_lagged_training_data_invalid_lag_values_error(self): output_chunk_shift=0, ) + def test_lagged_training_data_dict_lags_no_moving_window_error(self): + """ + Tests that `create_lagged_training_data` throws correct error + when `use_moving_window` is set to `False` and lags are provided + as a dict for a multivariate series. + """ + ts = linear_timeseries(start=1, length=20, freq=1, column_name="lin1") + lags = [-1] + lags_dict = {"lin1": [-1]} + # one series, one set of lags are dict + with pytest.raises(ValueError) as err: + create_lagged_training_data( + target_series=ts, + output_chunk_length=1, + lags=lags_dict, + uses_static_covariates=False, + use_moving_windows=False, + output_chunk_shift=0, + ) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary." + ) + # all the series are provided, only one passed as dict + with pytest.raises(ValueError) as err: + create_lagged_training_data( + target_series=ts, + past_covariates=ts, + future_covariates=ts, + output_chunk_length=1, + lags=lags, + lags_past_covariates=lags_dict, + lags_future_covariates=lags, + uses_static_covariates=False, + use_moving_windows=False, + output_chunk_shift=0, + ) + assert str(err.value).startswith( + "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary." + ) + def test_lagged_training_data_unspecified_lag_or_series_warning(self): """ Tests that `create_lagged_training_data` throws correct @@ -1709,295 +2245,375 @@ def test_lagged_training_data_unspecified_lag_or_series_warning(self): ) assert len(w) == 0 - def test_create_lagged_component_names(self): + @pytest.mark.parametrize( + "config", + [ + # target no static covariate + ( + target_with_no_cov, + None, + None, + [-2, -1], + None, + None, + False, + ["no_static_target_lag-2", "no_static_target_lag-1"], + ), + # target with static covariate (but don't use them in feature names) + ( + target_with_static_cov, + None, + None, + [-4, -1], + None, + None, + False, + [ + "static_0_target_lag-4", + "static_1_target_lag-4", + "static_0_target_lag-1", + "static_1_target_lag-1", + ], + ), + # target with static covariate (acting on global target components) + ( + target_with_static_cov, + None, + None, + [-4, -1], + None, + None, + True, + [ + "static_0_target_lag-4", + "static_1_target_lag-4", + "static_0_target_lag-1", + "static_1_target_lag-1", + "dummy_statcov_target_global_components", + ], + ), + # target with static covariate (component specific) + ( + target_with_static_cov2, + None, + None, + [-4, -1], + None, + None, + True, + [ + "static_0_target_lag-4", + "static_1_target_lag-4", + "static_0_target_lag-1", + "static_1_target_lag-1", + "dummy_statcov_target_static_0", + "dummy_statcov_target_static_1", + ], + ), + # target with static covariate (component specific & multivariate) + ( + target_with_static_cov3, + None, + None, + [-4, -1], + None, + None, + True, + [ + "static_0_target_lag-4", + "static_1_target_lag-4", + "static_0_target_lag-1", + "static_1_target_lag-1", + "dummy_statcov_target_static_0", + "dummy_statcov_target_static_1", + "dummy1_statcov_target_static_0", + "dummy1_statcov_target_static_1", + ], + ), + # target + past + ( + target_with_no_cov, + past, + None, + [-4, -3], + [-1], + None, + False, + [ + "no_static_target_lag-4", + "no_static_target_lag-3", + "past_0_pastcov_lag-1", + "past_1_pastcov_lag-1", + "past_2_pastcov_lag-1", + ], + ), + # target + future + ( + target_with_no_cov, + None, + future, + [-2, -1], + None, + [3], + False, + [ + "no_static_target_lag-2", + "no_static_target_lag-1", + "future_0_futcov_lag3", + "future_1_futcov_lag3", + "future_2_futcov_lag3", + "future_3_futcov_lag3", + ], + ), + # past + future + ( + target_with_no_cov, + past, + future, + None, + [-1], + [2], + False, + [ + "past_0_pastcov_lag-1", + "past_1_pastcov_lag-1", + "past_2_pastcov_lag-1", + "future_0_futcov_lag2", + "future_1_futcov_lag2", + "future_2_futcov_lag2", + "future_3_futcov_lag2", + ], + ), + # target with static (not used) + past + future + ( + target_with_static_cov, + past, + future, + [-2, -1], + [-1], + [2], + False, + [ + "static_0_target_lag-2", + "static_1_target_lag-2", + "static_0_target_lag-1", + "static_1_target_lag-1", + "past_0_pastcov_lag-1", + "past_1_pastcov_lag-1", + "past_2_pastcov_lag-1", + "future_0_futcov_lag2", + "future_1_futcov_lag2", + "future_2_futcov_lag2", + "future_3_futcov_lag2", + ], + ), + # multiple series with same components names, including past/future covariates + ( + [target_with_static_cov, target_with_static_cov], + [past, past], + [future, future], + [-3], + [-1], + [2], + False, + [ + "static_0_target_lag-3", + "static_1_target_lag-3", + "past_0_pastcov_lag-1", + "past_1_pastcov_lag-1", + "past_2_pastcov_lag-1", + "future_0_futcov_lag2", + "future_1_futcov_lag2", + "future_2_futcov_lag2", + "future_3_futcov_lag2", + ], + ), + # multiple series with different components will use the first series as reference + ( + [ + target_with_static_cov, + target_with_no_cov.stack(target_with_no_cov), + ], + [past, past], + [future, past.stack(target_with_no_cov)], + [-2, -1], + [-1], + [2], + False, + [ + "static_0_target_lag-2", + "static_1_target_lag-2", + "static_0_target_lag-1", + "static_1_target_lag-1", + "past_0_pastcov_lag-1", + "past_1_pastcov_lag-1", + "past_2_pastcov_lag-1", + "future_0_futcov_lag2", + "future_1_futcov_lag2", + "future_2_futcov_lag2", + "future_3_futcov_lag2", + ], + ), + ], + ) + def test_create_lagged_component_names(self, config): """ Tests that `create_lagged_component_names` produces the expected features name depending on the lags, output_chunk_length and covariates. - """ - target_with_no_cov = self.create_multivariate_linear_timeseries( - n_components=1, - components_names=["no_static"], - start_value=0, - end_value=10, - start=2, - length=10, - freq=2, - ) - n_comp = 2 - target_with_static_cov = self.create_multivariate_linear_timeseries( - n_components=n_comp, - components_names=["static_0", "static_1"], - start_value=0, - end_value=10, - start=2, - length=10, - freq=2, - ) - target_with_static_cov = target_with_static_cov.with_static_covariates( - pd.DataFrame({"dummy": [1]}) # leads to "global" static cov component name - ) - target_with_static_cov2 = target_with_static_cov.with_static_covariates( - pd.DataFrame( - {"dummy": [i for i in range(n_comp)]} - ) # leads to sharing target component names - ) - target_with_static_cov3 = target_with_static_cov.with_static_covariates( - pd.DataFrame( - { - "dummy": [i for i in range(n_comp)], - "dummy1": [i for i in range(n_comp)], - } - ) # leads to sharing target component names - ) - - past = self.create_multivariate_linear_timeseries( - n_components=3, - components_names=["past_0", "past_1", "past_2"], - start_value=10, - end_value=20, - start=2, - length=10, - freq=2, - ) - future = self.create_multivariate_linear_timeseries( - n_components=4, - components_names=["future_0", "future_1", "future_2", "future_3"], - start_value=20, - end_value=30, - start=2, - length=10, - freq=2, - ) - - # target no static covariate - expected_lagged_features = ["no_static_target_lag-2", "no_static_target_lag-1"] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_no_cov, - past_covariates=None, - future_covariates=None, - lags=[-2, -1], - lags_past_covariates=None, - lags_future_covariates=None, - concatenate=False, - use_static_covariates=False, - ) - assert expected_lagged_features == created_lagged_features - - # target with static covariate (but don't use them in feature names) - expected_lagged_features = [ - "static_0_target_lag-4", - "static_1_target_lag-4", - "static_0_target_lag-1", - "static_1_target_lag-1", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_static_cov, - past_covariates=None, - future_covariates=None, - lags=[-4, -1], - lags_past_covariates=None, - lags_future_covariates=None, - concatenate=False, - use_static_covariates=False, - ) - assert expected_lagged_features == created_lagged_features - # target with static covariate (acting on global target components) - expected_lagged_features = [ - "static_0_target_lag-4", - "static_1_target_lag-4", - "static_0_target_lag-1", - "static_1_target_lag-1", - "dummy_statcov_target_global_components", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_static_cov, - past_covariates=None, - future_covariates=None, - lags=[-4, -1], - lags_past_covariates=None, - lags_future_covariates=None, - concatenate=False, - use_static_covariates=True, - ) - assert expected_lagged_features == created_lagged_features - - # target with static covariate (component specific) - expected_lagged_features = [ - "static_0_target_lag-4", - "static_1_target_lag-4", - "static_0_target_lag-1", - "static_1_target_lag-1", - "dummy_statcov_target_static_0", - "dummy_statcov_target_static_1", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_static_cov2, - past_covariates=None, - future_covariates=None, - lags=[-4, -1], - lags_past_covariates=None, - lags_future_covariates=None, - concatenate=False, - use_static_covariates=True, - ) - assert expected_lagged_features == created_lagged_features - - # target with static covariate (component specific & multivariate) - expected_lagged_features = [ - "static_0_target_lag-4", - "static_1_target_lag-4", - "static_0_target_lag-1", - "static_1_target_lag-1", - "dummy_statcov_target_static_0", - "dummy_statcov_target_static_1", - "dummy1_statcov_target_static_0", - "dummy1_statcov_target_static_1", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_static_cov3, - past_covariates=None, - future_covariates=None, - lags=[-4, -1], - lags_past_covariates=None, - lags_future_covariates=None, - concatenate=False, - use_static_covariates=True, - ) - assert expected_lagged_features == created_lagged_features - - # target + past - expected_lagged_features = [ - "no_static_target_lag-4", - "no_static_target_lag-3", - "past_0_pastcov_lag-1", - "past_1_pastcov_lag-1", - "past_2_pastcov_lag-1", - ] + When lags are component-specific, they are identical across all the components. + """ + ( + ts_tg, + ts_pc, + ts_fc, + lags_tg, + lags_pc, + lags_fc, + use_static_cov, + expected_lagged_features, + ) = config + # lags as list created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_no_cov, - past_covariates=past, - future_covariates=None, - lags=[-4, -3], - lags_past_covariates=[-1], - lags_future_covariates=None, + target_series=ts_tg, + past_covariates=ts_pc, + future_covariates=ts_fc, + lags=lags_tg, + lags_past_covariates=lags_pc, + lags_future_covariates=lags_fc, concatenate=False, + use_static_covariates=use_static_cov, ) - assert expected_lagged_features == created_lagged_features - # target + future - expected_lagged_features = [ - "no_static_target_lag-2", - "no_static_target_lag-1", - "future_0_futcov_lag3", - "future_1_futcov_lag3", - "future_2_futcov_lag3", - "future_3_futcov_lag3", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_no_cov, - past_covariates=None, - future_covariates=future, - lags=[-2, -1], - lags_past_covariates=None, - lags_future_covariates=[3], - concatenate=False, + # converts lags to dictionary format + lags_as_dict = self.convert_lags_to_dict( + ts_tg, + ts_pc, + ts_fc, + lags_tg, + lags_pc, + lags_fc, ) - assert expected_lagged_features == created_lagged_features - # past + future - expected_lagged_features = [ - "past_0_pastcov_lag-1", - "past_1_pastcov_lag-1", - "past_2_pastcov_lag-1", - "future_0_futcov_lag2", - "future_1_futcov_lag2", - "future_2_futcov_lag2", - "future_3_futcov_lag2", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_no_cov, - past_covariates=past, - future_covariates=future, - lags=None, - lags_past_covariates=[-1], - lags_future_covariates=[2], + created_lagged_features_dict_lags, _ = create_lagged_component_names( + target_series=ts_tg, + past_covariates=ts_pc, + future_covariates=ts_fc, + lags=lags_as_dict["target"], + lags_past_covariates=lags_as_dict["past"], + lags_future_covariates=lags_as_dict["future"], concatenate=False, + use_static_covariates=use_static_cov, ) assert expected_lagged_features == created_lagged_features + assert expected_lagged_features == created_lagged_features_dict_lags - # target with static + past + future - expected_lagged_features = [ - "static_0_target_lag-2", - "static_1_target_lag-2", - "static_0_target_lag-1", - "static_1_target_lag-1", - "past_0_pastcov_lag-1", - "past_1_pastcov_lag-1", - "past_2_pastcov_lag-1", - "future_0_futcov_lag2", - "future_1_futcov_lag2", - "future_2_futcov_lag2", - "future_3_futcov_lag2", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=target_with_static_cov, - past_covariates=past, - future_covariates=future, - lags=[-2, -1], - lags_past_covariates=[-1], - lags_future_covariates=[2], - concatenate=False, - ) - assert expected_lagged_features == created_lagged_features + @pytest.mark.parametrize( + "config", + [ + # lags have the same minimum + ( + target_with_static_cov, + None, + None, + {"static_0": [-4, -2], "static_1": [-4, -3]}, + None, + None, + False, + [ + "static_0_target_lag-4", + "static_1_target_lag-4", + "static_1_target_lag-3", + "static_0_target_lag-2", + ], + ), + # lags are not overlapping + ( + target_with_static_cov, + None, + None, + {"static_0": [-4, -1], "static_1": [-3, -2]}, + None, + None, + False, + [ + "static_0_target_lag-4", + "static_1_target_lag-3", + "static_1_target_lag-2", + "static_0_target_lag-1", + ], + ), + # default lags for target, overlapping lags for past covariates + ( + target_with_static_cov, + past, + None, + {"static_0": [-3], "static_1": [-3]}, + {"past_0": [-4, -3], "past_1": [-3, -2], "past_2": [-2]}, + None, + False, + [ + "static_0_target_lag-3", + "static_1_target_lag-3", + "past_0_pastcov_lag-4", + "past_0_pastcov_lag-3", + "past_1_pastcov_lag-3", + "past_1_pastcov_lag-2", + "past_2_pastcov_lag-2", + ], + ), + # no lags for target, future covariates lags are not in the compoments order + ( + target_with_static_cov, + None, + future, + None, + None, + { + "future_3": [-2, 0, 2], + "future_0": [-4, 1], + "future_2": [1], + "future_1": [-2, 2], + }, + False, + [ + "future_0_futcov_lag-4", + "future_1_futcov_lag-2", + "future_3_futcov_lag-2", + "future_3_futcov_lag0", + "future_0_futcov_lag1", + "future_2_futcov_lag1", + "future_1_futcov_lag2", + "future_3_futcov_lag2", + ], + ), + ], + ) + def test_create_lagged_component_names_different_lags(self, config): + """ + Tests that `create_lagged_component_names` when lags are different across components. - # multiple series with same components, including past/future covariates - expected_lagged_features = [ - "static_0_target_lag-3", - "static_1_target_lag-3", - "past_0_pastcov_lag-1", - "past_1_pastcov_lag-1", - "past_2_pastcov_lag-1", - "future_0_futcov_lag2", - "future_1_futcov_lag2", - "future_2_futcov_lag2", - "future_3_futcov_lag2", - ] - created_lagged_features, _ = create_lagged_component_names( - target_series=[target_with_static_cov, target_with_static_cov], - past_covariates=[past, past], - future_covariates=[future, future], - lags=[-3], - lags_past_covariates=[-1], - lags_future_covariates=[2], - concatenate=False, - ) - assert expected_lagged_features == created_lagged_features + The lagged features should be sorted by lags, then by components. + """ + ( + ts_tg, + ts_pc, + ts_fc, + lags_tg, + lags_pc, + lags_fc, + use_static_cov, + expected_lagged_features, + ) = config - # multiple series with different components will use the first series as reference - expected_lagged_features = [ - "static_0_target_lag-2", - "static_1_target_lag-2", - "static_0_target_lag-1", - "static_1_target_lag-1", - "past_0_pastcov_lag-1", - "past_1_pastcov_lag-1", - "past_2_pastcov_lag-1", - "future_0_futcov_lag2", - "future_1_futcov_lag2", - "future_2_futcov_lag2", - "future_3_futcov_lag2", - ] created_lagged_features, _ = create_lagged_component_names( - target_series=[ - target_with_static_cov, - target_with_no_cov.stack(target_with_no_cov), - ], - past_covariates=[past, past], - future_covariates=[future, past.stack(target_with_no_cov)], - lags=[-2, -1], - lags_past_covariates=[-1], - lags_future_covariates=[2], + target_series=ts_tg, + past_covariates=ts_pc, + future_covariates=ts_fc, + lags=lags_tg, + lags_past_covariates=lags_pc, + lags_future_covariates=lags_fc, concatenate=False, + use_static_covariates=use_static_cov, ) assert expected_lagged_features == created_lagged_features diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py index 8a8e0e0dcd..8ff22f236e 100644 --- a/darts/utils/data/tabularization.py +++ b/darts/utils/data/tabularization.py @@ -276,18 +276,52 @@ def create_lagged_data( if seq_ts is not None ] seq_ts_lens = set(seq_ts_lens) - raise_if( - len(seq_ts_lens) > 1, - "Must specify the same number of `TimeSeries` for each series input.", + if len(seq_ts_lens) > 1: + raise_log( + ValueError( + "Must specify the same number of `TimeSeries` for each series input." + ), + logger, + ) + lags_passed_as_dict = any( + isinstance(lags_, dict) + for lags_ in [lags, lags_past_covariates, lags_future_covariates] ) + if (not use_moving_windows) and lags_passed_as_dict: + raise_log( + ValueError( + "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary. " + f"Received: {[lags, lags_past_covariates, lags_future_covariates]}." + ), + logger, + ) + if max_samples_per_ts is None: max_samples_per_ts = inf + + # lags are identical for multiple series: pre-compute lagged features and reordered lagged features + lags_extract, lags_order = _get_lagged_indices( + lags, + lags_past_covariates, + lags_future_covariates, + ) X, y, times = [], [], [] for i in range(max(seq_ts_lens)): target_i = target_series[i] if target_series else None past_i = past_covariates[i] if past_covariates else None future_i = future_covariates[i] if future_covariates else None - if use_moving_windows and _all_equal_freq(target_i, past_i, future_i): + series_equal_freq = _all_equal_freq(target_i, past_i, future_i) + # component-wise lags extraction is not support with times intersection at the moment + if use_moving_windows and lags_passed_as_dict and (not series_equal_freq): + raise_log( + ValueError( + f"Cannot create tabularized data for the {i}th series because target and covariates don't have " + "the same frequency and some of the lags are provided as a dictionary. Either resample the " + "series or change the lags definition." + ), + logger, + ) + if use_moving_windows and series_equal_freq: X_i, y_i, times_i = _create_lagged_data_by_moving_window( target_i, output_chunk_length, @@ -297,6 +331,8 @@ def create_lagged_data( lags, lags_past_covariates, lags_future_covariates, + lags_extract, + lags_order, max_samples_per_ts, multi_models, check_inputs, @@ -715,9 +751,9 @@ def create_lagged_component_names( For `*_lags=[-2,-1]` and `*_series.n_components = 2` (lags shared across all the components), each `lagged_*` has the following structure (grouped by lags): comp0_*_lag-2 | comp1_*_lag-2 | comp0_*_lag_-1 | comp1_*_lag-1 - For `*_lags={'comp0':[-2, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component- - specific lags), each `lagged_*` has the following structure (grouped by components): - comp0_*_lag-2 | comp0_*_lag-1 | comp1_*_lag_-5 | comp1_*_lag-3 + For `*_lags={'comp0':[-3, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component- + specific lags), each `lagged_*` has the following structure (sorted by lags, then by components): + comp1_*_lag-5 | comp0_*_lag-3 | comp1_*_lag_-3 | comp0_*_lag-1 and for static covariates (2 static covariates acting on 2 target components): cov0_*_target_comp0 | cov0_*_target_comp1 | cov1_*_target_comp0 | cov1_*_target_comp1 @@ -776,10 +812,32 @@ def create_lagged_component_names( components = get_single_series(variate).components.tolist() if isinstance(variate_lags, dict): + if "default_lags" in variate_lags: + raise_log( + ValueError( + "All the lags must be explicitly defined, 'default_lags' is not allowed in the " + "lags dictionary." + ), + logger, + ) + + # combine all the lags and sort them in ascending order across all the components + comp_lags_reordered = np.concatenate( + [ + np.array(variate_lags[comp_name], dtype=int) + for comp_name in components + ] + ).argsort() + tmp_lagged_feats_names = [] for name in components: - lagged_feature_names += [ + tmp_lagged_feats_names += [ f"{name}_{variate_type}_lag{lag}" for lag in variate_lags[name] ] + + # adding feats names reordered across components + lagged_feature_names += [ + tmp_lagged_feats_names[idx] for idx in comp_lags_reordered + ] else: lagged_feature_names += [ f"{name}_{variate_type}_lag{lag}" @@ -811,6 +869,44 @@ def create_lagged_component_names( return lagged_feature_names, label_feature_names +def _get_lagged_indices( + lags, + lags_past_covariates, + lags_future_covariates, +): + """Computes and returns: + + - the lagged feature indices for extraction from windows + - the reordered indices to apply after the window extraction (in case of component specific lags) + + Assumes that all input series share identical component order. + """ + lags_extract = [] + lags_order = [] + for lags_i in [lags, lags_past_covariates, lags_future_covariates]: + if lags_i is None: + lags_extract.append(None) + lags_order.append(None) + continue + + # Within each window, the `-1` indexed value (i.e. the value at the very end of + # the window) corresponds to time `t - min_lag_i`. The negative index of the time + # `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`: + if isinstance(lags_i, list): + lags_extract_i = np.array(lags_i, dtype=int) + # Feats are already grouped by lags and ordered + lags_order_i = slice(None) + else: + # Assume keys are in the same order as the series components + # Lags are grouped by component, extracted from the same window + lags_extract_i = [np.array(c_lags, dtype=int) for c_lags in lags_i.values()] + # Sort the lags across the components in ascending order + lags_order_i = np.concatenate(lags_extract_i).argsort() + lags_extract.append(lags_extract_i) + lags_order.append(lags_order_i) + return lags_extract, lags_order + + def _create_lagged_data_by_moving_window( target_series: Optional[TimeSeries], output_chunk_length: int, @@ -820,6 +916,8 @@ def _create_lagged_data_by_moving_window( lags: Optional[Union[Sequence[int], Dict[str, List[int]]]], lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]], lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]], + lags_extract: List[Optional[np.ndarray]], + lags_order: List[Optional[np.ndarray]], max_samples_per_ts: Optional[int], multi_models: bool, check_inputs: bool, @@ -837,6 +935,8 @@ def _create_lagged_data_by_moving_window( and `t + output_chunk_length - 1` from the target series. In both cases, the extracted windows can then be reshaped into the correct shape. This approach can only be used if we *can* assume that the specified series are all of the same frequency. + + Assumes that all the lags are sorted in ascending order. """ feature_times, min_lags, max_lags = _get_feature_times( target_series, @@ -880,10 +980,11 @@ def _create_lagged_data_by_moving_window( X = [] start_time_idx = None target_start_time_idx = None - for i, (series_i, lags_i, min_lag_i, max_lag_i) in enumerate( + for i, (series_i, lags_extract_i, lags_order_i, min_lag_i, max_lag_i) in enumerate( zip( [target_series, past_covariates, future_covariates], - [lags, lags_past_covariates, lags_future_covariates], + lags_extract, + lags_order, min_lags, max_lags, ) @@ -936,19 +1037,16 @@ def _create_lagged_data_by_moving_window( windows = strided_moving_window( x=vals, window_len=window_len, stride=1, axis=0, check_inputs=False ) + # Within each window, the `-1` indexed value (i.e. the value at the very end of # the window) corresponds to time `t - min_lag_i`. The negative index of the time # `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`: - if isinstance(lags_i, list): - lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1 - else: - # Lags are grouped by component, extracted from the same window - lags_to_extract = [ - np.array(comp_lags, dtype=int) + min_lag_i - 1 - for comp_lags in lags_i.values() - ] - lagged_vals = _extract_lagged_vals_from_windows(windows, lags_to_extract) - X.append(lagged_vals) + # extract lagged values + lagged_vals = _extract_lagged_vals_from_windows( + windows, lags_extract_i, lags_shift=min_lag_i - 1 + ) + # extract and append the reordered lagged values + X.append(lagged_vals[:, lags_order_i]) # Cache `start_time_idx` for label creation: if is_target_series: target_start_time_idx = start_time_idx @@ -987,6 +1085,7 @@ def _create_lagged_data_by_moving_window( def _extract_lagged_vals_from_windows( windows: np.ndarray, lags_to_extract: Optional[Union[np.ndarray, List[np.ndarray]]] = None, + lags_shift: int = 0, ) -> np.ndarray: """ Helper function called by `_create_lagged_data_by_moving_window` that @@ -1011,7 +1110,7 @@ def _extract_lagged_vals_from_windows( if isinstance(lags_to_extract, list): # iterate over the components-specific lags comp_windows = [ - windows[:, i, :, comp_lags_to_extract] + windows[:, i, :, comp_lags_to_extract + lags_shift] for i, comp_lags_to_extract in enumerate(lags_to_extract) ] # windows.shape = (sum(lags_len) across components, num_windows, num_samples): @@ -1019,7 +1118,7 @@ def _extract_lagged_vals_from_windows( lagged_vals = np.moveaxis(windows, (1, 0, 2), (0, 1, 2)) else: if lags_to_extract is not None: - windows = windows[:, :, :, lags_to_extract] + windows = windows[:, :, :, lags_to_extract + lags_shift] # windows.shape = (num_windows, window_len, num_components, num_samples): windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3)) # lagged_vals.shape = (num_windows, num_components*window_len, num_samples): @@ -1148,6 +1247,120 @@ def _create_lagged_data_by_intersecting_times( return X, y, shared_times +def _create_lagged_data_autoregression( + target_series: Union[TimeSeries, Sequence[TimeSeries]], + t_pred: int, + shift: int, + last_step_shift: int, + series_matrix: np.ndarray, + covariate_matrices: Dict[str, np.ndarray], + lags: Dict[str, List[int]], + component_lags: Dict[str, Dict[str, List[int]]], + relative_cov_lags: Dict[str, np.ndarray], + uses_static_covariates: bool, + last_static_covariates_shape: Optional[Tuple[int, int]], + num_samples: int, +) -> np.ndarray: + """Extract lagged data from target, past covariates and future covariates for auto-regression + with RegressionModels. + """ + series_length = len(target_series) + X = [] + for series_type in ["target", "past", "future"]: + if series_type not in lags: + continue + + # extract series specific data + values_matrix = ( + series_matrix + if series_type == "target" + else covariate_matrices[series_type] + ) + + if series_type not in component_lags: + # for global lags over all components, directly extract lagged values from the data + if series_type == "target": + relative_lags = [ + lag - (shift + last_step_shift) for lag in lags[series_type] + ] + else: + relative_lags = relative_cov_lags[series_type] + t_pred + + lagged_data = values_matrix[:, relative_lags].reshape( + series_length * num_samples, -1 + ) + else: + # for component-specific lags, sort by lags and components and then extract + tmp_X = _extract_component_lags_autoregression( + series_type=series_type, + values_matrix=values_matrix, + shift=shift, + last_step_shift=last_step_shift, + t_pred=t_pred, + lags=lags, + component_lags=component_lags, + ) + lagged_data = tmp_X.reshape(series_length * num_samples, -1) + X.append(lagged_data) + # concatenate retrieved lags + X = np.concatenate(X, axis=1) + + if not uses_static_covariates: + return X + + # Need to split up `X` into three equally-sized sub-blocks + # corresponding to each timeseries in `series`, so that + # static covariates can be added to each block; valid since + # each block contains same number of observations: + X = np.split(X, series_length, axis=0) + X, _ = add_static_covariates_to_lagged_data( + features=X, + target_series=target_series, + uses_static_covariates=uses_static_covariates, + last_shape=last_static_covariates_shape, + ) + + # concatenate retrieved lags + return np.concatenate(X, axis=0) + + +def _extract_component_lags_autoregression( + series_type: str, + values_matrix: np.ndarray, + shift: int, + last_step_shift: int, + t_pred: int, + lags: Dict[str, List[int]], + component_lags: Dict[str, Dict[str, List[int]]], +) -> np.ndarray: + """Extract, concatenate and reorder component-wise lags to obtain a feature order + identical to tabularization. + """ + # prepare index to reorder features by lags across components + comp_lags_reordered = np.concatenate( + [comp_lags for comp_lags in component_lags[series_type].values()] + ).argsort() + + # convert relative lags to absolute + if series_type == "target": + lags_shift = -shift - last_step_shift + else: + lags_shift = -lags[series_type][0] + t_pred + + # extract features + tmp_X = [ + values_matrix[ + :, + [lag + lags_shift for lag in comp_lags], + comp_i, + ] + for comp_i, comp_lags in enumerate(component_lags[series_type].values()) + ] + + # concatenate on features dimension and reorder + return np.concatenate(tmp_X, axis=1)[:, comp_lags_reordered] + + # For convenience, define following types for `_get_feature_times`: FeatureTimes = Tuple[ Optional[Union[pd.Index, pd.DatetimeIndex, pd.RangeIndex]], diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py index 6d39a305bc..061bece96f 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py @@ -11,6 +11,7 @@ from darts.logging import get_logger from darts.timeseries import TimeSeries +from darts.utils import _build_tqdm_iterator from darts.utils.data.tabularization import create_lagged_prediction_data from darts.utils.historical_forecasts.utils import _get_historical_forecast_boundaries from darts.utils.utils import generate_index @@ -30,6 +31,7 @@ def _optimized_historical_forecasts_last_points_only( stride: int = 1, overlap_end: bool = False, show_warnings: bool = True, + verbose: bool = False, predict_likelihood_parameters: bool = False, **kwargs, ) -> Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]]: @@ -39,7 +41,8 @@ def _optimized_historical_forecasts_last_points_only( Rely on _check_optimizable_historical_forecasts() to check that the assumptions are verified. """ forecasts_list = [] - for idx, series_ in enumerate(series): + iterator = _build_tqdm_iterator(series, verbose) + for idx, series_ in enumerate(iterator): past_covariates_ = past_covariates[idx] if past_covariates is not None else None future_covariates_ = ( future_covariates[idx] if future_covariates is not None else None @@ -185,6 +188,7 @@ def _optimized_historical_forecasts_all_points( stride: int = 1, overlap_end: bool = False, show_warnings: bool = True, + verbose: bool = False, predict_likelihood_parameters: bool = False, **kwargs, ) -> Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]]: @@ -194,7 +198,8 @@ def _optimized_historical_forecasts_all_points( Rely on _check_optimizable_historical_forecasts() to check that the assumptions are verified. """ forecasts_list = [] - for idx, series_ in enumerate(series): + iterator = _build_tqdm_iterator(series, verbose) + for idx, series_ in enumerate(iterator): past_covariates_ = past_covariates[idx] if past_covariates is not None else None future_covariates_ = ( future_covariates[idx] if future_covariates is not None else None