diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37d3f2cf46..3cc71f2009 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -87,6 +87,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
   - Moved functions `retain_period_common_to_all()`, `series2seq()`, `seq2series()`, `get_single_series()` from `darts.utils.utils` to `darts.utils.ts_utils`.
 - Improvements to `ForecastingModel`: [#2269](https://github.com/unit8co/darts/pull/2269) by [Felix Divo](https://github.com/felixdivo).
   - Renamed the private `_is_probabilistic` property to a public `supports_probabilistic_prediction`.
+- Improvements to `RegressionModel`: [#2320](https://github.com/unit8co/darts/pull/2320) by [Felix Divo](https://github.com/felixdivo).
+  - Added a progress bar when performing optimized historical forecasts (`retrain=False` and no autoregression) to display the series-level progress.
 - Improvements to `DataTransformer`: [#2267](https://github.com/unit8co/darts/pull/2267) by [Alicja Krzeminska-Sciga](https://github.com/alicjakrzeminska).
   - `InvertibleDataTransformer` now supports parallelized inverse transformation for `series` being a list of lists of `TimeSeries` (`Sequence[Sequence[TimeSeries]]`). This `series` type represents for example the output from `historical_forecasts()` when using multiple series. 
 - New method `TorchForecastingModel.scale_batch_size()` that helps to find batch size automatically. [#2318](https://github.com/unit8co/darts/pull/2318) by [Bohdan Bilonoh](https://github.com/BohdanBilonoh)
@@ -94,6 +96,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 **Fixed**
 - Fixed a bug in `quantile_loss`, where the loss was computed on all samples rather than only on the predicted quantiles. [#2284](https://github.com/unit8co/darts/pull/2284) by [Dennis Bader](https://github.com/dennisbader).
 - Fixed type hint warning "Unexpected argument" when calling `historical_forecasts()` caused by the `_with_sanity_checks` decorator. The type hinting is now properly configured to expect any input arguments and return the output type of the method for which the sanity checks are performed for. [#2286](https://github.com/unit8co/darts/pull/2286) by [Dennis Bader](https://github.com/dennisbader).
+- Fixed the order of the features when using component-wise lags so that they are grouped by values, then by components (before, were grouped by components, then by values). [#2272](https://github.com/unit8co/darts/pull/2272) by [Antoine Madrona](https://github.com/madtoinou).
 - Fixed a segmentation fault that some users were facing when importing a `LightGBMModel`. [#2304](https://github.com/unit8co/darts/pull/2304) by [Dennis Bader](https://github.com/dennisbader).
 - Fixed a bug when using a dropout with a `TorchForecasting` and pytorch lightning versions >= 2.2.0, where the dropout was not properly activated during training. [#2312](https://github.com/unit8co/darts/pull/2312) by [Dennis Bader](https://github.com/dennisbader).
 
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index dd862db6b6..3bfd45b439 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -43,7 +43,7 @@
 from darts.models.forecasting.forecasting_model import GlobalForecastingModel
 from darts.timeseries import TimeSeries
 from darts.utils.data.tabularization import (
-    add_static_covariates_to_lagged_data,
+    _create_lagged_data_autoregression,
     create_lagged_component_names,
     create_lagged_training_data,
 )
@@ -1019,83 +1019,25 @@ def predict(
                 last_step_shift = t_pred - (n - step)
                 t_pred = n - step
 
-            np_X = []
-            # retrieve target lags
-            if "target" in self.lags:
-                if predictions:
-                    series_matrix = np.concatenate(
-                        [series_matrix, predictions[-1]], axis=1
-                    )
-                # component-wise lags
-                if "target" in self.component_lags:
-                    tmp_X = [
-                        series_matrix[
-                            :,
-                            [lag - (shift + last_step_shift) for lag in comp_lags],
-                            comp_i,
-                        ]
-                        for comp_i, (comp, comp_lags) in enumerate(
-                            self.component_lags["target"].items()
-                        )
-                    ]
-                    # values are grouped by component
-                    np_X.append(
-                        np.concatenate(tmp_X, axis=1).reshape(
-                            len(series) * num_samples, -1
-                        )
-                    )
-                else:
-                    # values are grouped by lags
-                    np_X.append(
-                        series_matrix[
-                            :,
-                            [
-                                lag - (shift + last_step_shift)
-                                for lag in self.lags["target"]
-                            ],
-                        ].reshape(len(series) * num_samples, -1)
-                    )
-            # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
-            for cov_type in ["past", "future"]:
-                if cov_type in covariate_matrices:
-                    # component-wise lags
-                    if cov_type in self.component_lags:
-                        tmp_X = [
-                            covariate_matrices[cov_type][
-                                :,
-                                np.array(comp_lags) - self.lags[cov_type][0] + t_pred,
-                                comp_i,
-                            ]
-                            for comp_i, (comp, comp_lags) in enumerate(
-                                self.component_lags[cov_type].items()
-                            )
-                        ]
-                        np_X.append(
-                            np.concatenate(tmp_X, axis=1).reshape(
-                                len(series) * num_samples, -1
-                            )
-                        )
-                    else:
-                        np_X.append(
-                            covariate_matrices[cov_type][
-                                :, relative_cov_lags[cov_type] + t_pred
-                            ].reshape(len(series) * num_samples, -1)
-                        )
-
-            # concatenate retrieved lags
-            X = np.concatenate(np_X, axis=1)
-            # Need to split up `X` into three equally-sized sub-blocks
-            # corresponding to each timeseries in `series`, so that
-            # static covariates can be added to each block; valid since
-            # each block contains same number of observations:
-            X_blocks = np.split(X, len(series), axis=0)
-            X_blocks, _ = add_static_covariates_to_lagged_data(
-                X_blocks,
-                series,
+            # concatenate previous iteration forecasts
+            if "target" in self.lags and predictions:
+                series_matrix = np.concatenate([series_matrix, predictions[-1]], axis=1)
+
+            # extract and concatenate lags from target and covariates series
+            X = _create_lagged_data_autoregression(
+                target_series=series,
+                t_pred=t_pred,
+                shift=shift,
+                last_step_shift=last_step_shift,
+                series_matrix=series_matrix,
+                covariate_matrices=covariate_matrices,
+                lags=self.lags,
+                component_lags=self.component_lags,
+                relative_cov_lags=relative_cov_lags,
+                num_samples=num_samples,
                 uses_static_covariates=self.uses_static_covariates,
-                last_shape=self._static_covariates_shape,
+                last_static_covariates_shape=self._static_covariates_shape,
             )
-            X = np.concatenate(X_blocks, axis=0)
 
             # X has shape (n_series * n_samples, n_regression_features)
             prediction = self._predict_and_sample(
@@ -1257,6 +1199,7 @@ def _optimized_historical_forecasts(
                 stride=stride,
                 overlap_end=overlap_end,
                 show_warnings=show_warnings,
+                verbose=verbose,
                 predict_likelihood_parameters=predict_likelihood_parameters,
                 **kwargs,
             )
@@ -1273,6 +1216,7 @@ def _optimized_historical_forecasts(
                 stride=stride,
                 overlap_end=overlap_end,
                 show_warnings=show_warnings,
+                verbose=verbose,
                 predict_likelihood_parameters=predict_likelihood_parameters,
                 **kwargs,
             )
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 29f3d740ba..307c7eac73 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -157,10 +157,31 @@ class NewCls(cls):
     return NewCls
 
 
+xgb_test_params = {
+    "n_estimators": 1,
+    "max_depth": 1,
+    "max_leaves": 1,
+    "verbose": -1,
+    "random_state": 42,
+}
+lgbm_test_params = {
+    "n_estimators": 1,
+    "max_depth": 1,
+    "num_leaves": 2,
+    "verbosity": -1,
+    "random_state": 42,
+}
+cb_test_params = {
+    "iterations": 1,
+    "depth": 1,
+    "verbose": -1,
+    "random_state": 42,
+}
+
+
 class TestRegressionModels:
 
     np.random.seed(42)
-
     # default regression models
     models = [
         RandomForest,
@@ -179,10 +200,16 @@ class TestRegressionModels:
         LinearRegressionModel, likelihood="poisson", random_state=42
     )
     PoissonXGBModel = partialclass(
-        XGBModel, likelihood="poisson", random_state=42, tree_method="exact"
+        XGBModel,
+        likelihood="poisson",
+        tree_method="exact",
+        **xgb_test_params,
     )
     QuantileXGBModel = partialclass(
-        XGBModel, likelihood="quantile", random_state=42, tree_method="exact"
+        XGBModel,
+        likelihood="quantile",
+        tree_method="exact",
+        **xgb_test_params,
     )
     # targets for poisson regression must be positive, so we exclude them for some tests
     models.extend(
@@ -200,8 +227,8 @@ class TestRegressionModels:
         1e-13,  # RegressionModel
         0.8,  # QuantileLinearRegressionModel
         0.4,  # PoissonLinearRegressionModel
-        1e-01,  # PoissonXGBModel
-        0.5,  # QuantileXGBModel
+        0.75,  # PoissonXGBModel
+        0.75,  # QuantileXGBModel
     ]
     multivariate_accuracies = [
         0.3,  # RandomForest
@@ -209,8 +236,8 @@ class TestRegressionModels:
         1e-13,  # RegressionModel
         0.8,  # QuantileLinearRegressionModel
         0.4,  # PoissonLinearRegressionModel
-        0.15,  # PoissonXGBModel
-        0.4,  # QuantileXGBModel
+        0.75,  # PoissonXGBModel
+        0.75,  # QuantileXGBModel
     ]
     multivariate_multiseries_accuracies = [
         0.05,  # RandomForest
@@ -218,23 +245,26 @@ class TestRegressionModels:
         1e-13,  # RegressionModel
         0.8,  # QuantileLinearRegressionModel
         0.4,  # PoissonLinearRegressionModel
-        1e-01,  # PoissonXGBModel
-        0.4,  # QuantileXGBModel
+        0.85,  # PoissonXGBModel
+        0.65,  # QuantileXGBModel
     ]
 
     lgbm_w_categorical_covariates = NotImportedModule
     if lgbm_available:
+        RegularLightGBMModel = partialclass(LightGBMModel, **lgbm_test_params)
         QuantileLightGBMModel = partialclass(
             LightGBMModel,
             likelihood="quantile",
             quantiles=[0.05, 0.5, 0.95],
-            random_state=42,
+            **lgbm_test_params,
         )
         PoissonLightGBMModel = partialclass(
-            LightGBMModel, likelihood="poisson", random_state=42
+            LightGBMModel,
+            likelihood="poisson",
+            **lgbm_test_params,
         )
         models += [
-            LightGBMModel,
+            RegularLightGBMModel,
             QuantileLightGBMModel,
             PoissonLightGBMModel,
         ]
@@ -247,62 +277,67 @@ class TestRegressionModels:
             categorical_future_covariates=["fut_cov_promo_mechanism"],
             categorical_past_covariates=["past_cov_cat_dummy"],
             categorical_static_covariates=["product_id"],
+            **lgbm_test_params,
         )
         univariate_accuracies += [
-            0.3,  # LightGBMModel
-            0.5,  # QuantileLightGBMModel
-            0.4,  # PoissonLightGBMModel
+            0.75,  # LightGBMModel
+            0.75,  # QuantileLightGBMModel
+            0.75,  # PoissonLightGBMModel
         ]
         multivariate_accuracies += [
-            0.4,  # LightGBMModel
-            0.4,  # QuantileLightGBMModel
-            0.4,  # PoissonLightGBMModel
+            0.7,  # LightGBMModel
+            0.75,  # QuantileLightGBMModel
+            0.75,  # PoissonLightGBMModel
         ]
         multivariate_multiseries_accuracies += [
-            0.05,  # LightGBMModel
-            0.4,  # QuantileLightGBMModel
-            0.4,  # PoissonLightGBMModel
+            0.7,  # LightGBMModel
+            0.7,  # QuantileLightGBMModel
+            0.75,  # PoissonLightGBMModel
         ]
     if cb_available:
+        RegularCatBoostModel = partialclass(
+            CatBoostModel,
+            **cb_test_params,
+        )
         QuantileCatBoostModel = partialclass(
             CatBoostModel,
             likelihood="quantile",
             quantiles=[0.05, 0.5, 0.95],
-            random_state=42,
+            **cb_test_params,
         )
         PoissonCatBoostModel = partialclass(
             CatBoostModel,
             likelihood="poisson",
-            random_state=42,
+            **cb_test_params,
         )
         NormalCatBoostModel = partialclass(
             CatBoostModel,
             likelihood="gaussian",
-            random_state=42,
+            **cb_test_params,
         )
         models += [
-            CatBoostModel,
+            RegularCatBoostModel,
             QuantileCatBoostModel,
             PoissonCatBoostModel,
             NormalCatBoostModel,
         ]
         univariate_accuracies += [
             0.75,  # CatBoostModel
-            1e-03,  # QuantileCatBoostModel
-            1e-01,  # PoissonCatBoostModel
-            1e-05,  # NormalCatBoostModel
+            0.75,  # QuantileCatBoostModel
+            0.9,  # PoissonCatBoostModel
+            0.75,  # NormalCatBoostModel
         ]
         multivariate_accuracies += [
             0.75,  # CatBoostModel
-            1e-03,  # QuantileCatBoostModel
-            0.15,  # PoissonCatBoostModel
-            1e-05,  # NormalCatBoostModel
+            0.75,  # QuantileCatBoostModel
+            0.86,  # PoissonCatBoostModel
+            0.75,  # NormalCatBoostModel
         ]
         multivariate_multiseries_accuracies += [
             0.75,  # CatBoostModel
-            1e-03,  # QuantileCatBoostModel
-            1e-01,  # PoissonCatBoostModel
-            1e-03,  # NormalCatBoostModel
+            0.75,  # QuantileCatBoostModel
+            1.2,  # PoissonCatBoostModel
+            0.75,  # NormalCatBoostModel
         ]
 
     # dummy feature and target TimeSeries instances
@@ -1026,7 +1061,6 @@ def test_models_runnability(self, config):
         prediction = model_instance.predict(n=1)
         assert len(prediction) == 1
 
-    @pytest.mark.slow
     @pytest.mark.parametrize(
         "config",
         itertools.product(
@@ -1036,10 +1070,14 @@ def test_models_runnability(self, config):
     def test_fit(self, config):
         # test fitting both on univariate and multivariate timeseries
         model, mode, series = config
+
+        series = series[:15]
+        sine_multivariate1 = self.sine_multivariate1[:15]
+
         # auto-regression but past_covariates does not extend enough in the future
         with pytest.raises(ValueError):
             model_instance = model(lags=4, lags_past_covariates=4, multi_models=mode)
-            model_instance.fit(series=series, past_covariates=self.sine_multivariate1)
+            model_instance.fit(series=series, past_covariates=sine_multivariate1)
             model_instance.predict(n=10)
 
         # inconsistent number of components in series Sequence[TimeSeries]
@@ -1072,19 +1110,19 @@ def test_fit(self, config):
         assert model_instance.lags.get("past") is None
 
         model_instance = model(lags=12, lags_past_covariates=12, multi_models=mode)
-        model_instance.fit(series=series, past_covariates=self.sine_multivariate1)
+        model_instance.fit(series=series, past_covariates=sine_multivariate1)
         assert len(model_instance.lags.get("past")) == 12
 
         model_instance = model(
             lags=12, lags_future_covariates=(0, 1), multi_models=mode
         )
-        model_instance.fit(series=series, future_covariates=self.sine_multivariate1)
+        model_instance.fit(series=series, future_covariates=sine_multivariate1)
         assert len(model_instance.lags.get("future")) == 1
 
         model_instance = model(
             lags=12, lags_past_covariates=[-1, -4, -6], multi_models=mode
         )
-        model_instance.fit(series=series, past_covariates=self.sine_multivariate1)
+        model_instance.fit(series=series, past_covariates=sine_multivariate1)
         assert len(model_instance.lags.get("past")) == 3
 
         model_instance = model(
@@ -1095,8 +1133,8 @@ def test_fit(self, config):
         )
         model_instance.fit(
             series=series,
-            past_covariates=self.sine_multivariate1,
-            future_covariates=self.sine_multivariate1,
+            past_covariates=sine_multivariate1,
+            future_covariates=sine_multivariate1,
         )
         assert len(model_instance.lags.get("past")) == 3
 
@@ -1289,11 +1327,11 @@ def test_multioutput_wrapper(self, config):
                 horizon=0, target_dim=1
             )
 
-    model_configs = [(XGBModel, {"tree_method": "exact"})]
+    model_configs = [(XGBModel, dict({"tree_method": "exact"}, **xgb_test_params))]
     if lgbm_available:
-        model_configs += [(LightGBMModel, {})]
+        model_configs += [(LightGBMModel, lgbm_test_params)]
     if cb_available:
-        model_configs += [(CatBoostModel, {})]
+        model_configs += [(CatBoostModel, cb_test_params)]
 
     @pytest.mark.parametrize(
         "config", itertools.product(model_configs, [1, 2], [True, False])
@@ -1991,7 +2029,7 @@ def test_component_specific_lags(self, config):
         )
 
         # n > output_chunk_length
-        model.predict(
+        pred = model.predict(
             7,
             series=series[0] if multiple_series else None,
             past_covariates=(
@@ -2005,6 +2043,11 @@ def test_component_specific_lags(self, config):
                 else None
             ),
         )
+        # check that lagged features are properly extracted during auto-regression
+        if multivar_target:
+            np.testing.assert_array_almost_equal(
+                tg.sine_timeseries(length=27)[-7:].values(), pred["sine"].values()
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -2303,14 +2346,18 @@ def test_output_shift(self, config):
     @pytest.mark.parametrize(
         "config",
         itertools.product(
-            [RegressionModel, LinearRegressionModel, XGBModel]
-            + ([LightGBMModel] if lgbm_available else []),
+            [
+                (RegressionModel, {}),
+                (LinearRegressionModel, {}),
+                (XGBModel, xgb_test_params),
+            ]
+            + ([(LightGBMModel, lgbm_test_params)] if lgbm_available else []),
             [True, False],
             [1, 2],
         ),
     )
     def test_encoders(self, config):
-        model_cls, mode, ocl = config
+        (model_cls, model_kwargs), mode, ocl = config
         max_past_lag = -4
         max_future_lag = 4
         # target
@@ -2353,18 +2400,21 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["past"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_fc_valid0 = model_cls(
             lags=2,
             add_encoders=encoder_examples["future"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_mixed_valid0 = model_cls(
             lags=2,
             add_encoders=encoder_examples["mixed"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
 
         # encoders will not generate covariates without lags
@@ -2379,12 +2429,14 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["past"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_fc_valid0 = model_cls(
             lags_future_covariates=[-1, 0],
             add_encoders=encoder_examples["future"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_mixed_valid0 = model_cls(
             lags_past_covariates=[-2, -1],
@@ -2392,6 +2444,7 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["mixed"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         # check that fit/predict works with model internal covariate requirement checks
         for model in [model_pc_valid0, model_fc_valid0, model_mixed_valid0]:
@@ -2406,6 +2459,7 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["past"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_fc_valid1 = model_cls(
             lags=2,
@@ -2413,6 +2467,7 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["future"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
         model_mixed_valid1 = model_cls(
             lags=2,
@@ -2421,6 +2476,7 @@ def test_encoders(self, config):
             add_encoders=encoder_examples["mixed"],
             multi_models=mode,
             output_chunk_length=ocl,
+            **model_kwargs,
         )
 
         for model, ex in zip(
@@ -2728,6 +2784,7 @@ def get_model_params():
             return {
                 "lags": int(period / 2),
                 "output_chunk_length": int(period / 2),
+                "verbose": -1,
             }
 
         # test case without using categorical static covariates
@@ -2780,6 +2837,7 @@ def get_model_params():
                         "past_cov_cat_dummy",
                     ],
                     categorical_static_covariates=["product_id"],
+                    **lgbm_test_params,
                 ),
                 LightGBMModel(
                     lags=1,
@@ -2789,12 +2847,14 @@ def get_model_params():
                         "past_cov_cat_dummy",
                     ],
                     categorical_static_covariates=["does_not_exist"],
+                    **lgbm_test_params,
                 ),
                 LightGBMModel(
                     lags=1,
                     lags_past_covariates=1,
                     output_chunk_length=1,
                     categorical_future_covariates=["does_not_exist"],
+                    **lgbm_test_params,
                 ),
             ]
             if lgbm_available
@@ -3002,8 +3062,8 @@ class TestProbabilisticRegressionModels:
             {
                 "lags": 2,
                 "likelihood": "poisson",
-                "random_state": 42,
                 "multi_models": True,
+                **xgb_test_params,
             },
             0.6,
         ),
@@ -3013,8 +3073,8 @@ class TestProbabilisticRegressionModels:
                 "lags": 2,
                 "likelihood": "quantile",
                 "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9],
-                "random_state": 42,
                 "multi_models": True,
+                **xgb_test_params,
             },
             0.4,
         ),
@@ -3026,8 +3086,8 @@ class TestProbabilisticRegressionModels:
                 {
                     "lags": 2,
                     "likelihood": "quantile",
-                    "random_state": 42,
                     "multi_models": True,
+                    **lgbm_test_params,
                 },
                 0.4,
             ),
@@ -3037,8 +3097,8 @@ class TestProbabilisticRegressionModels:
                     "lags": 2,
                     "likelihood": "quantile",
                     "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9],
-                    "random_state": 42,
                     "multi_models": True,
+                    **lgbm_test_params,
                 },
                 0.4,
             ),
@@ -3047,8 +3107,8 @@ class TestProbabilisticRegressionModels:
                 {
                     "lags": 2,
                     "likelihood": "poisson",
-                    "random_state": 42,
                     "multi_models": True,
+                    **lgbm_test_params,
                 },
                 0.6,
             ),
@@ -3060,8 +3120,8 @@ class TestProbabilisticRegressionModels:
                 {
                     "lags": 2,
                     "likelihood": "quantile",
-                    "random_state": 42,
                     "multi_models": True,
+                    **cb_test_params,
                 },
                 0.05,
             ),
@@ -3071,8 +3131,8 @@ class TestProbabilisticRegressionModels:
                     "lags": 2,
                     "likelihood": "quantile",
                     "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9],
-                    "random_state": 42,
                     "multi_models": True,
+                    **cb_test_params,
                 },
                 0.05,
             ),
@@ -3081,8 +3141,8 @@ class TestProbabilisticRegressionModels:
                 {
                     "lags": 2,
                     "likelihood": "poisson",
-                    "random_state": 42,
                     "multi_models": True,
+                    **cb_test_params,
                 },
                 0.6,
             ),
@@ -3091,8 +3151,8 @@ class TestProbabilisticRegressionModels:
                 {
                     "lags": 2,
                     "likelihood": "gaussian",
-                    "random_state": 42,
                     "multi_models": True,
+                    **cb_test_params,
                 },
                 0.05,
             ),
@@ -3104,7 +3164,6 @@ class TestProbabilisticRegressionModels:
     constant_noisy_multivar_ts = constant_noisy_ts.stack(constant_noisy_ts)
     num_samples = 5
 
-    @pytest.mark.slow
     @pytest.mark.parametrize(
         "config", itertools.product(models_cls_kwargs_errs, [True, False])
     )
@@ -3126,7 +3185,6 @@ def test_fit_predict_determinism(self, config):
         pred3 = model.predict(n=10, num_samples=2).values()
         assert (pred2 != pred3).any()
 
-    @pytest.mark.slow
     @pytest.mark.parametrize(
         "config", itertools.product(models_cls_kwargs_errs, [True, False])
     )
@@ -3141,7 +3199,6 @@ def test_probabilistic_forecast_accuracy_univariate(self, config):
             self.constant_noisy_ts,
         )
 
-    @pytest.mark.slow
     @pytest.mark.parametrize(
         "config", itertools.product(models_cls_kwargs_errs, [True, False])
     )
diff --git a/darts/tests/utils/tabularization/test_create_lagged_training_data.py b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
index d43f0699fd..54a5fc9a2f 100644
--- a/darts/tests/utils/tabularization/test_create_lagged_training_data.py
+++ b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
@@ -1,7 +1,7 @@
 import itertools
 import warnings
 from itertools import product
-from typing import Optional, Sequence
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -15,6 +15,26 @@
     create_lagged_training_data,
 )
 from darts.utils.timeseries_generation import linear_timeseries
+from darts.utils.utils import generate_index
+
+
+def helper_create_multivariate_linear_timeseries(
+    n_components: int, components_names: Sequence[str] = None, **kwargs
+) -> TimeSeries:
+    """
+    Helper function that creates a `linear_timeseries` with a specified number of
+    components. To help distinguish each component from one another, `i` is added on
+    to each value of the `i`th component. Any additional keyword arguments are passed
+    to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc).
+    """
+    if components_names is None or len(components_names) < n_components:
+        components_names = [f"lin_ts_{i}" for i in range(n_components)]
+    timeseries = []
+    for i in range(n_components):
+        # Values of each component is 1 larger than the last:
+        timeseries_i = linear_timeseries(column_name=components_names[i], **kwargs) + i
+        timeseries.append(timeseries_i)
+    return darts_concatenate(timeseries, axis=1)
 
 
 class TestCreateLaggedTrainingData:
@@ -40,27 +60,6 @@ class TestCreateLaggedTrainingData:
     #   Helper Functions for Generated Test Cases
     #
 
-    @staticmethod
-    def create_multivariate_linear_timeseries(
-        n_components: int, components_names: Sequence[str] = None, **kwargs
-    ) -> TimeSeries:
-        """
-        Helper function that creates a `linear_timeseries` with a specified number of
-        components. To help distinguish each component from one another, `i` is added on
-        to each value of the `i`th component. Any additional keyword arguments are passed
-        to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc).
-        """
-        timeseries = []
-        if components_names is None or len(components_names) < n_components:
-            components_names = [f"lin_ts_{i}" for i in range(n_components)]
-        for i in range(n_components):
-            # Values of each component is 1 larger than the last:
-            timeseries_i = (
-                linear_timeseries(column_name=components_names[i], **kwargs) + i
-            )
-            timeseries.append(timeseries_i)
-        return darts_concatenate(timeseries, axis=1)
-
     @staticmethod
     def get_feature_times(
         target: TimeSeries,
@@ -384,7 +383,7 @@ def create_y(
             timesteps_ahead = (
                 range(output_chunk_shift, output_chunk_length + output_chunk_shift)
                 if multi_models
-                else (output_chunk_length + output_chunk_shift - 1,)
+                else [output_chunk_length + output_chunk_shift - 1]
             )
             y_row = []
             for i in timesteps_ahead:
@@ -399,17 +398,248 @@ def create_y(
         y = np.stack(y, axis=0)
         return y
 
+    @staticmethod
+    def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc):
+        """Convert lags to the dictionary format, assuming the lags are shared across the components"""
+        lags_as_dict = dict()
+        for ts_, lags_, name_ in zip(
+            [ts_tg, ts_pc, ts_fc],
+            [lags_tg, lags_pc, lags_fc],
+            ["target", "past", "future"],
+        ):
+            single_ts = ts_[0] if isinstance(ts_, Sequence) else ts_
+            if single_ts is None or lags_ is None:
+                lags_as_dict[name_] = None
+            # already in dict format
+            elif isinstance(lags_, dict):
+                lags_as_dict[name_] = lags_
+            # from list
+            elif isinstance(lags_, list):
+                lags_as_dict[name_] = {c_name: lags_ for c_name in single_ts.components}
+            else:
+                raise ValueError(
+                    f"Lags should be `None`, a list or a dictionary. Received {type(lags_)}."
+                )
+        return lags_as_dict
+
+    def helper_create_expected_lagged_data(
+        self,
+        target: Optional[Union[TimeSeries, List[TimeSeries]]],
+        past: Optional[Union[TimeSeries, List[TimeSeries]]],
+        future: Optional[Union[TimeSeries, List[TimeSeries]]],
+        lags: Optional[Union[List[int], Dict[str, List[int]]]],
+        lags_past: Optional[Union[List[int], Dict[str, List[int]]]],
+        lags_future: Optional[Union[List[int], Dict[str, List[int]]]],
+        output_chunk_length: int,
+        output_chunk_shift: int,
+        multi_models: bool,
+        max_samples_per_ts: Optional[int],
+    ) -> Tuple[np.ndarray, np.ndarray, Any]:
+        """Helper function to create the X and y arrays by building them block by block (one per covariates)."""
+        feats_times = self.get_feature_times(
+            target,
+            past,
+            future,
+            lags,
+            lags_past,
+            lags_future,
+            output_chunk_length,
+            max_samples_per_ts,
+            output_chunk_shift,
+        )
+        # Construct `X` by constructing each block, then concatenate these
+        # blocks together along component axis:
+        X_target = self.construct_X_block(target, feats_times, lags)
+        X_past = self.construct_X_block(past, feats_times, lags_past)
+        X_future = self.construct_X_block(future, feats_times, lags_future)
+        all_X = (X_target, X_past, X_future)
+        to_concat = [X for X in all_X if X is not None]
+        expected_X = np.concatenate(to_concat, axis=1)
+        expected_y = self.create_y(
+            target,
+            feats_times,
+            output_chunk_length,
+            multi_models,
+            output_chunk_shift,
+        )
+        if len(expected_X.shape) == 2:
+            expected_X = expected_X[:, :, np.newaxis]
+        if len(expected_y.shape) == 2:
+            expected_y = expected_y[:, :, np.newaxis]
+        return expected_X, expected_y, feats_times
+
+    def helper_check_lagged_data(
+        self,
+        convert_lags_to_dict: bool,
+        expected_X: np.ndarray,
+        expected_y: np.ndarray,
+        expected_times_x,
+        expected_times_y,
+        target: Optional[Union[TimeSeries, List[TimeSeries]]],
+        past_cov: Optional[Union[TimeSeries, List[TimeSeries]]],
+        future_cov: Optional[Union[TimeSeries, List[TimeSeries]]],
+        lags: Optional[Union[List[int], Dict[str, List[int]]]],
+        lags_past: Optional[Union[List[int], Dict[str, List[int]]]],
+        lags_future: Optional[Union[List[int], Dict[str, List[int]]]],
+        output_chunk_length: int,
+        output_chunk_shift: int,
+        use_static_covariates: bool,
+        multi_models: bool,
+        max_samples_per_ts: Optional[int],
+        use_moving_windows: bool,
+        concatenate: bool,
+        **kwargs,
+    ):
+        """Helper function to call the `create_lagged_training_data()` method with lags argument either in the list
+        format or the dictionary format (automatically convert them when they are identical across components).
+
+        Assertions are different depending on the value of `concatenate` to account for the output shape.
+        """
+        if convert_lags_to_dict:
+            lags_as_dict = self.convert_lags_to_dict(
+                target,
+                past_cov if lags_past else None,
+                future_cov if lags_future else None,
+                lags,
+                lags_past,
+                lags_future,
+            )
+            lags_ = lags_as_dict["target"]
+            lags_past_ = lags_as_dict["past"]
+            lags_future_ = lags_as_dict["future"]
+        else:
+            lags_ = lags
+            lags_past_ = lags_past
+            lags_future_ = lags_future
+
+        # convert indexes to list of tuples to simplify processing
+        expected_times_x = (
+            expected_times_x
+            if isinstance(expected_times_x, Sequence)
+            else [expected_times_x]
+        )
+        expected_times_y = (
+            expected_times_y
+            if isinstance(expected_times_y, Sequence)
+            else [expected_times_y]
+        )
+
+        X, y, times, _ = create_lagged_training_data(
+            target_series=target,
+            output_chunk_length=output_chunk_length,
+            past_covariates=past_cov if lags_past_ else None,
+            future_covariates=future_cov if lags_future_ else None,
+            lags=lags_,
+            lags_past_covariates=lags_past_,
+            lags_future_covariates=lags_future_,
+            uses_static_covariates=use_static_covariates,
+            multi_models=multi_models,
+            max_samples_per_ts=max_samples_per_ts,
+            use_moving_windows=use_moving_windows,
+            output_chunk_shift=output_chunk_shift,
+            concatenate=concatenate,
+        )
+        # should have the exact same number of indexes
+        assert len(times) == len(expected_times_x) == len(expected_times_y)
+
+        # Check that time index(es) match:
+        for time, exp_time in zip(times, expected_times_x):
+            assert exp_time.equals(time)
+
+        if concatenate:
+            # Number of observations should match number of feature times:
+            data_length = sum(len(time) for time in times)
+            exp_length_x = sum(len(exp_time) for exp_time in expected_times_x)
+            exp_length_y = sum(len(exp_time) for exp_time in expected_times_y)
+            assert exp_length_x == exp_length_y
+            assert X.shape[0] == exp_length_x == data_length
+            assert y.shape[0] == exp_length_y == data_length
+
+            # Check that outputs match:
+            assert X.shape == expected_X.shape
+            assert np.allclose(expected_X, X)
+            assert y.shape == expected_y.shape
+            assert np.allclose(expected_y, y)
+        else:
+            # Check the number of observation for each series
+            for x_, exp_time_x, y_, exp_time_y, time in zip(
+                X, expected_times_x, y, expected_times_y, times
+            ):
+                assert x_.shape[0] == len(time) == len(exp_time_x)
+                assert y_.shape[0] == len(time) == len(exp_time_y)
+
+            # Check that outputs match:
+            for x_, y_ in zip(X, y):
+                assert np.allclose(X, x_)
+                assert np.allclose(y, y_)
+
     #
     #   Generated Test Cases
     #
 
+    target_with_no_cov = helper_create_multivariate_linear_timeseries(
+        n_components=1,
+        components_names=["no_static"],
+        start_value=0,
+        end_value=10,
+        start=2,
+        length=10,
+        freq=2,
+    )
+    n_comp = 2
+    target_with_static_cov = helper_create_multivariate_linear_timeseries(
+        n_components=n_comp,
+        components_names=["static_0", "static_1"],
+        start_value=0,
+        end_value=10,
+        start=2,
+        length=10,
+        freq=2,
+    )
+    target_with_static_cov = target_with_static_cov.with_static_covariates(
+        pd.DataFrame({"dummy": [1]})  # leads to "global" static cov component name
+    )
+    target_with_static_cov2 = target_with_static_cov.with_static_covariates(
+        pd.DataFrame(
+            {"dummy": [i for i in range(n_comp)]}
+        )  # leads to sharing target component names
+    )
+    target_with_static_cov3 = target_with_static_cov.with_static_covariates(
+        pd.DataFrame(
+            {
+                "dummy": [i for i in range(n_comp)],
+                "dummy1": [i for i in range(n_comp)],
+            }
+        )  # leads to sharing target component names
+    )
+
+    past = helper_create_multivariate_linear_timeseries(
+        n_components=3,
+        components_names=["past_0", "past_1", "past_2"],
+        start_value=10,
+        end_value=20,
+        start=2,
+        length=10,
+        freq=2,
+    )
+    future = helper_create_multivariate_linear_timeseries(
+        n_components=4,
+        components_names=["future_0", "future_1", "future_2", "future_3"],
+        start_value=20,
+        end_value=30,
+        start=2,
+        length=10,
+        freq=2,
+    )
+
     # Input parameter combinations used to generate test cases:
     output_chunk_length_combos = (1, 3)
     output_chunk_shift_combos = (0, 1)
     multi_models_combos = (False, True)
     max_samples_per_ts_combos = (1, 2, None)
-    target_lag_combos = past_lag_combos = (None, [-1, -3], [-3, -1])
-    future_lag_combos = (*target_lag_combos, [0], [2, 1], [-1, 1], [0, 2])
+    # lags are sorted ascending as done by the models internally
+    target_lag_combos = past_lag_combos = (None, [-3, -1], [-2, -1])
+    future_lag_combos = (*target_lag_combos, [0], [1, 2], [-1, 1], [0, 2])
 
     # minimum series length
     min_n_ts = 8 + max(output_chunk_shift_combos)
@@ -436,7 +666,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
         # different start times, different lengths, and different values, but
         # they're all of the same frequency:
         if series_type == "integer":
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2,
                 start_value=0,
                 end_value=10,
@@ -444,7 +674,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 length=self.min_n_ts,
                 freq=2,
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3,
                 start_value=10,
                 end_value=20,
@@ -452,7 +682,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 length=self.min_n_ts + 1,
                 freq=2,
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4,
                 start_value=20,
                 end_value=30,
@@ -461,7 +691,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 freq=2,
             )
         else:
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2,
                 start_value=0,
                 end_value=10,
@@ -469,7 +699,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 length=self.min_n_ts,
                 freq="2d",
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3,
                 start_value=10,
                 end_value=20,
@@ -477,7 +707,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 length=self.min_n_ts + 1,
                 freq="2d",
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4,
                 start_value=20,
                 end_value=30,
@@ -509,55 +739,45 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
             lags_is_none = [x is None for x in all_lags]
             if all(lags_is_none):
                 continue
-            X, y, times, _ = create_lagged_training_data(
-                target,
-                output_chunk_length,
-                past_covariates=past if lags_past else None,
-                future_covariates=future if lags_future else None,
-                lags=lags,
-                lags_past_covariates=lags_past,
-                lags_future_covariates=lags_future,
-                uses_static_covariates=False,
-                multi_models=multi_models,
-                max_samples_per_ts=max_samples_per_ts,
-                use_moving_windows=True,
-                output_chunk_shift=output_chunk_shift,
-            )
-            feats_times = self.get_feature_times(
-                target,
-                past,
-                future,
-                lags,
-                lags_past,
-                lags_future,
-                output_chunk_length,
-                max_samples_per_ts,
-                output_chunk_shift,
-            )
-            # Construct `X` by constructing each block, then concatenate these
-            # blocks together along component axis:
-            X_target = self.construct_X_block(target, feats_times, lags)
-            X_past = self.construct_X_block(past, feats_times, lags_past)
-            X_future = self.construct_X_block(future, feats_times, lags_future)
-            all_X = (X_target, X_past, X_future)
-            to_concat = [X for X in all_X if X is not None]
-            expected_X = np.concatenate(to_concat, axis=1)
-            expected_y = self.create_y(
-                target,
-                feats_times,
-                output_chunk_length,
-                multi_models,
-                output_chunk_shift,
+
+            expected_X, expected_y, expected_times = (
+                self.helper_create_expected_lagged_data(
+                    target,
+                    past,
+                    future,
+                    lags,
+                    lags_past,
+                    lags_future,
+                    output_chunk_length,
+                    output_chunk_shift,
+                    multi_models,
+                    max_samples_per_ts,
+                )
             )
-            # Number of observations should match number of feature times:
-            assert X.shape[0] == len(feats_times)
-            assert y.shape[0] == len(feats_times)
-            assert X.shape[0] == len(times[0])
-            assert y.shape[0] == len(times[0])
-            # Check that outputs match:
-            assert np.allclose(expected_X, X[:, :, 0])
-            assert np.allclose(expected_y, y[:, :, 0])
-            assert feats_times.equals(times[0])
+
+            kwargs = {
+                "expected_X": expected_X,
+                "expected_y": expected_y,
+                "expected_times_x": expected_times,
+                "expected_times_y": expected_times,
+                "target": target,
+                "past_cov": past,
+                "future_cov": future,
+                "lags": lags,
+                "lags_past": lags_past,
+                "lags_future": lags_future,
+                "output_chunk_length": output_chunk_length,
+                "output_chunk_shift": output_chunk_shift,
+                "use_static_covariates": False,
+                "multi_models": multi_models,
+                "max_samples_per_ts": max_samples_per_ts,
+                "use_moving_windows": True,
+                "concatenate": True,
+            }
+
+            self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
 
     @pytest.mark.parametrize(
         "series_type",
@@ -581,17 +801,17 @@ def test_lagged_training_data_unequal_freq(self, series_type):
         # different start times, different lengths, different values, and different
         # frequencies:
         if series_type == "integer":
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3
             )
         else:
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2,
                 start_value=0,
                 end_value=10,
@@ -599,7 +819,7 @@ def test_lagged_training_data_unequal_freq(self, series_type):
                 length=20,
                 freq="d",
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3,
                 start_value=10,
                 end_value=20,
@@ -607,7 +827,7 @@ def test_lagged_training_data_unequal_freq(self, series_type):
                 length=10,
                 freq="2d",
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4,
                 start_value=20,
                 end_value=30,
@@ -639,55 +859,49 @@ def test_lagged_training_data_unequal_freq(self, series_type):
             lags_is_none = [x is None for x in all_lags]
             if all(lags_is_none):
                 continue
-            X, y, times, _ = create_lagged_training_data(
-                target,
-                output_chunk_length,
-                past_covariates=past if lags_past else None,
-                future_covariates=future if lags_future else None,
-                lags=lags,
-                lags_past_covariates=lags_past,
-                lags_future_covariates=lags_future,
-                uses_static_covariates=False,
-                multi_models=multi_models,
-                max_samples_per_ts=max_samples_per_ts,
-                use_moving_windows=False,
-                output_chunk_shift=output_chunk_shift,
+
+            expected_X, expected_y, expected_times = (
+                self.helper_create_expected_lagged_data(
+                    target,
+                    past,
+                    future,
+                    lags,
+                    lags_past,
+                    lags_future,
+                    output_chunk_length,
+                    output_chunk_shift,
+                    multi_models,
+                    max_samples_per_ts,
+                )
             )
-            feats_times = self.get_feature_times(
-                target,
-                past,
-                future,
-                lags,
-                lags_past,
-                lags_future,
-                output_chunk_length,
-                max_samples_per_ts,
-                output_chunk_shift,
-            )
-            # Construct `X` by constructing each block, then concatenate these
-            # blocks together along component axis:
-            X_target = self.construct_X_block(target, feats_times, lags)
-            X_past = self.construct_X_block(past, feats_times, lags_past)
-            X_future = self.construct_X_block(future, feats_times, lags_future)
-            all_X = (X_target, X_past, X_future)
-            to_concat = [x for x in all_X if x is not None]
-            expected_X = np.concatenate(to_concat, axis=1)
-            expected_y = self.create_y(
-                target,
-                feats_times,
-                output_chunk_length,
-                multi_models,
-                output_chunk_shift,
+
+            kwargs = {
+                "expected_X": expected_X,
+                "expected_y": expected_y,
+                "expected_times_x": expected_times,
+                "expected_times_y": expected_times,
+                "target": target,
+                "past_cov": past,
+                "future_cov": future,
+                "lags": lags,
+                "lags_past": lags_past,
+                "lags_future": lags_future,
+                "output_chunk_length": output_chunk_length,
+                "output_chunk_shift": output_chunk_shift,
+                "use_static_covariates": False,
+                "multi_models": multi_models,
+                "max_samples_per_ts": max_samples_per_ts,
+                "use_moving_windows": False,
+                "concatenate": True,
+            }
+
+            self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
             )
-            # Number of observations should match number of feature times:
-            assert X.shape[0] == len(feats_times)
-            assert y.shape[0] == len(feats_times)
-            assert X.shape[0] == len(times[0])
-            assert y.shape[0] == len(times[0])
-            # Check that outputs match:
-            assert np.allclose(expected_X, X[:, :, 0])
-            assert np.allclose(expected_y, y[:, :, 0])
-            assert feats_times.equals(times[0])
 
     @pytest.mark.parametrize(
         "series_type",
@@ -708,17 +922,17 @@ def test_lagged_training_data_method_consistency(self, series_type):
         # different start times, different lengths, different values, and of
         # different frequencies:
         if series_type == "integer":
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3
             )
         else:
-            target = self.create_multivariate_linear_timeseries(
+            target = helper_create_multivariate_linear_timeseries(
                 n_components=2,
                 start_value=0,
                 end_value=10,
@@ -726,7 +940,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
                 end=pd.Timestamp("1/18/2000"),
                 freq="2d",
             )
-            past = self.create_multivariate_linear_timeseries(
+            past = helper_create_multivariate_linear_timeseries(
                 n_components=3,
                 start_value=10,
                 end_value=20,
@@ -734,7 +948,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
                 end=pd.Timestamp("1/20/2000"),
                 freq="2d",
             )
-            future = self.create_multivariate_linear_timeseries(
+            future = helper_create_multivariate_linear_timeseries(
                 n_components=4,
                 start_value=20,
                 end_value=30,
@@ -841,7 +1055,7 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf
         expected_y = series.all_values(copy=False)[
             3 + output_chunk_shift : 3 + output_chunk_shift + len(expected_times_y),
             :,
-            0,
+            :,
         ]
         # Offset `3:-2` by `-1` lag:
         expected_X_target = series.all_values(copy=False)[
@@ -855,28 +1069,38 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf
         ]
         expected_X = np.concatenate(
             [expected_X_target, expected_X_past, expected_X_future], axis=1
-        )
-        X, y, times, _ = create_lagged_training_data(
-            target_series=series,
-            output_chunk_length=output_chunk_length,
-            past_covariates=series,
-            future_covariates=series,
-            lags=lags,
-            lags_past_covariates=past_lags,
-            lags_future_covariates=future_lags,
-            uses_static_covariates=False,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        # Number of observations should match number of feature times:
-        assert X.shape[0] == len(expected_times_x)
-        assert X.shape[0] == len(times[0])
-        assert y.shape[0] == len(expected_times_y)
-        assert y.shape[0] == len(times[0])
-        # Check that outputs match:
-        assert np.allclose(expected_X, X[:, :, 0])
-        assert np.allclose(expected_y, y[:, :, 0])
-        assert expected_times_x.equals(times[0])
+        )[:, :, np.newaxis]
+
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times_x,
+            "expected_times_y": expected_times_y,
+            "target": series,
+            "past_cov": series,
+            "future_cov": series,
+            "lags": lags,
+            "lags_past": past_lags,
+            "lags_future": future_lags,
+            "output_chunk_length": output_chunk_length,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": True,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -946,27 +1170,48 @@ def test_lagged_training_data_extend_past_and_future_covariates(self, config):
                 past.all_values(copy=False)[-1 - output_chunk_shift, :, 0],
                 future.all_values(copy=False)[-1 - output_chunk_shift, :, 0],
             ]
-        ).reshape(1, -1)
+        ).reshape(1, -1, 1)
         # Label is very last value of `target`:
-        expected_y = target.all_values(copy=False)[-1, :, 0]
+        expected_y = target.all_values(copy=False)[-1:, :, :]
+
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
+
         # Check correctness for both 'moving window' method
         # and 'time intersection' method:
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length=1,
-            past_covariates=past,
-            future_covariates=future,
-            lags=lags,
-            lags_past_covariates=lags_past,
-            lags_future_covariates=lags_future,
-            uses_static_covariates=False,
-            max_samples_per_ts=max_samples_per_ts,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
-        assert np.allclose(expected_X, X[:, :, 0])
-        assert np.allclose(expected_y, y[:, :, 0])
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": past,
+            "future_cov": future,
+            "lags": lags,
+            "lags_past": lags_past,
+            "lags_future": lags_future,
+            "output_chunk_length": 1,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": True,
+            "max_samples_per_ts": max_samples_per_ts,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -998,22 +1243,43 @@ def test_lagged_training_data_single_point(self, config):
         lags = [-1]
         expected_X = np.zeros((1, 1, 1))
         expected_y = np.ones((1, 1, 1))
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
         # Test correctness for 'moving window' and for 'time intersection' methods, as well
         # as for different `multi_models` values:
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length,
-            lags=lags,
-            uses_static_covariates=False,
-            multi_models=multi_models,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        assert np.allclose(expected_X, X)
-        assert np.allclose(expected_y, y)
-        # Should only have one sample, generated for `t = target.end_time()`:
-        assert len(times[0]) == 1
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": None,
+            "future_cov": None,
+            "lags": lags,
+            "lags_past": None,
+            "lags_future": None,
+            "output_chunk_length": output_chunk_length,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": multi_models,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -1060,25 +1326,45 @@ def test_lagged_training_data_zero_lags(self, config):
             )
 
         # X comprises of first value of `target` (i.e. 0) and only value in `future`:
-        expected_X = np.array([0.0, 1.0]).reshape(1, 2, 1)
+        expected_X = np.array([[[0.0], [1.0]]])
         expected_y = np.ones((1, 1, 1))
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length=1,
-            future_covariates=future,
-            lags=[-1],
-            lags_future_covariates=[0],
-            uses_static_covariates=False,
-            multi_models=multi_models,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        assert np.allclose(expected_X, X)
-        assert np.allclose(expected_y, y)
-        assert len(times[0]) == 1
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": None,
+            "future_cov": future,
+            "lags": [-1],
+            "lags_past": None,
+            "lags_future": [0],
+            "output_chunk_length": 1,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": multi_models,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -1142,23 +1428,43 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config):
         # X comprises of first value of `target` (i.e. 0) and only value in `future`:
         expected_X = future[-1].all_values(copy=False)
         expected_y = target[-1].all_values(copy=False)
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length=1,
-            future_covariates=future,
-            lags=None,
-            lags_future_covariates=[cov_lag],
-            uses_static_covariates=False,
-            multi_models=multi_models,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        assert np.allclose(expected_X, X)
-        assert np.allclose(expected_y, y)
-        assert len(times[0]) == 1
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": None,
+            "future_cov": future,
+            "lags": None,
+            "lags_past": None,
+            "lags_future": [cov_lag],
+            "output_chunk_length": 1,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": multi_models,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -1221,23 +1527,43 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config):
         # X comprises of first value of `target` (i.e. 0) and only value in `future`:
         expected_X = past[-1].all_values(copy=False)
         expected_y = target[-1].all_values(copy=False)
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length=1,
-            past_covariates=past,
-            lags=None,
-            lags_past_covariates=[cov_lag],
-            uses_static_covariates=False,
-            multi_models=multi_models,
-            use_moving_windows=use_moving_windows,
-            output_chunk_shift=output_chunk_shift,
-        )
-        assert np.allclose(expected_X, X)
-        assert np.allclose(expected_y, y)
-        assert len(times[0]) == 1
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": past,
+            "future_cov": None,
+            "lags": None,
+            "lags_past": [cov_lag],
+            "lags_future": None,
+            "output_chunk_length": 1,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": multi_models,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
 
     @pytest.mark.parametrize(
         "config",
@@ -1284,25 +1610,184 @@ def test_lagged_training_data_positive_lags(self, config):
                 end_value=2,
             )
         # X comprises of first value of `target` (i.e. 0) and only value in `future`:
-        expected_X = np.array([0.0, 1.0]).reshape(1, 2, 1)
+        expected_X = np.array([[[0.0], [1.0]]])
         expected_y = np.ones((1, 1, 1))
+        expected_times = generate_index(
+            start=target.end_time() - output_chunk_shift * target.freq,
+            length=1,
+            freq=target.freq,
+        )
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
-        X, y, times, _ = create_lagged_training_data(
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": None,
+            "future_cov": future,
+            "lags": [-1],
+            "lags_past": None,
+            "lags_future": [1],
+            "output_chunk_length": 1,
+            "output_chunk_shift": output_chunk_shift,
+            "use_static_covariates": False,
+            "multi_models": multi_models,
+            "max_samples_per_ts": None,
+            "use_moving_windows": use_moving_windows,
+            "concatenate": True,
+        }
+
+        self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
+
+        if use_moving_windows:
+            self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+        else:
+            with pytest.raises(ValueError) as err:
+                self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
+            assert str(err.value).startswith(
+                "`use_moving_windows=False` is not supported when any of the lags"
+            )
+
+    @pytest.mark.parametrize(
+        "config",
+        itertools.product(
+            [0, 1, 3],
+            [1, 2],
+            [True, False],
+            ["datetime", "integer"],
+        ),
+    )
+    def test_lagged_training_data_comp_wise_lags(self, config):
+        """
+        Tests that `create_lagged_training_data` generate the expected values when the
+        lags are component-specific over multivariate series.
+
+        Note that this is supported only when use_moving_window=True.
+        """
+        output_chunk_shift, output_chunk_length, multi_models, series_type = config
+
+        lags_tg = {"target_0": [-4, -1], "target_1": [-4, -1]}
+        lags_pc = [-3]
+        lags_fc = {"future_0": [-1, 0], "future_1": [-2, 1]}
+
+        if series_type == "integer":
+            start_tg = 0
+            start_pc = start_tg + 1
+            start_fc = start_tg + 2
+        else:
+            start_tg = pd.Timestamp("2000-01-15")
+            start_pc = pd.Timestamp("2000-01-16")
+            start_fc = pd.Timestamp("2000-01-17")
+
+        # length = max lag - min lag + 1 = -1 + 4 + 1 = 4
+        target = helper_create_multivariate_linear_timeseries(
+            n_components=2,
+            components_names=["target_0", "target_1"],
+            length=4 + output_chunk_shift + output_chunk_length,
+            start=start_tg,
+        )
+        # length = max lag - min lag + 1 = -3 + 3 + 1 = 1
+        past = (
+            helper_create_multivariate_linear_timeseries(
+                n_components=2,
+                components_names=["past_0", "past_1"],
+                length=1,
+                start=start_pc,
+            )
+            + 100
+        )
+        # length = max lag - min lag + 1 = 1 + 2 + 1 = 4
+        future = (
+            helper_create_multivariate_linear_timeseries(
+                n_components=2,
+                components_names=["future_0", "future_1"],
+                length=4 + output_chunk_shift + output_chunk_length,
+                start=start_fc,
+            )
+            + 200
+        )
+
+        # extremes lags are manually computed, similarly to the model.lags attribute
+        feats_times = self.get_feature_times(
             target,
-            output_chunk_length=1,
-            future_covariates=future,
-            lags=[-1],
-            lags_future_covariates=[1],
-            uses_static_covariates=False,
-            multi_models=multi_models,
-            use_moving_windows=use_moving_windows,
+            past,
+            future,
+            [-4, -1],  # min, max target lag
+            [-3],  # unique past lag
+            [-2, 1],  # min, max future lag
+            output_chunk_length,
+            None,
+            output_chunk_shift,
+        )
+
+        # reorder the features to obtain target_0_lag-4, target_1_lag-4, target_0_lag-1, target_1_lag-1
+        X_target = [
+            self.construct_X_block(
+                target["target_0"], feats_times, lags_tg["target_0"][0:1]
+            ),
+            self.construct_X_block(
+                target["target_1"], feats_times, lags_tg["target_1"][0:1]
+            ),
+            self.construct_X_block(
+                target["target_0"], feats_times, lags_tg["target_0"][1:2]
+            ),
+            self.construct_X_block(
+                target["target_1"], feats_times, lags_tg["target_1"][1:2]
+            ),
+        ]
+        # single lag for all the components, can be kept as is
+        X_past = [
+            self.construct_X_block(past[name], feats_times, lags_pc)
+            for name in ["past_0", "past_1"]
+        ]
+        # reorder the features to obtain future_1_lag-2, future_0_lag-1, future_0_lag0, future_1_lag1
+        X_future = [
+            self.construct_X_block(
+                future["future_1"], feats_times, lags_fc["future_1"][0:1]
+            ),
+            self.construct_X_block(
+                future["future_0"], feats_times, lags_fc["future_0"][0:1]
+            ),
+            self.construct_X_block(
+                future["future_0"], feats_times, lags_fc["future_0"][1:2]
+            ),
+            self.construct_X_block(
+                future["future_1"], feats_times, lags_fc["future_1"][1:2]
+            ),
+        ]
+        all_X = X_target + X_past + X_future
+        expected_X = np.concatenate(all_X, axis=1)[:, :, np.newaxis]
+        expected_y = self.create_y(
+            target,
+            feats_times,
+            output_chunk_length,
+            multi_models,
+            output_chunk_shift,
+        )[:, :, np.newaxis]
+
+        # lags are already in dict format
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=True,
+            expected_X=expected_X,
+            expected_y=expected_y,
+            expected_times_x=feats_times,
+            expected_times_y=feats_times,
+            target=target,
+            past_cov=past,
+            future_cov=future,
+            lags=lags_tg,
+            lags_past=lags_pc,
+            lags_future=lags_fc,
+            output_chunk_length=output_chunk_length,
             output_chunk_shift=output_chunk_shift,
+            use_static_covariates=False,
+            multi_models=multi_models,
+            max_samples_per_ts=None,
+            use_moving_windows=True,
+            concatenate=True,
         )
-        assert np.allclose(expected_X, X)
-        assert np.allclose(expected_y, y)
-        assert len(times[0]) == 1
-        assert times[0][0] == target.end_time() - output_chunk_shift * target.freq
 
     def test_lagged_training_data_sequence_inputs(self):
         """
@@ -1313,6 +1798,9 @@ def test_lagged_training_data_sequence_inputs(self):
         # Define two simple tabularization problems:
         target_1 = past_1 = future_1 = linear_timeseries(start=0, end=5)
         target_2 = past_2 = future_2 = linear_timeseries(start=6, end=11)
+        ts_tg = (target_1, target_2)
+        ts_pc = (past_1, past_2)
+        ts_fc = (future_1, future_2)
         lags = lags_past = lags_future = [-1]
         output_chunk_length = 1
         # Expected solution:
@@ -1328,45 +1816,41 @@ def test_lagged_training_data_sequence_inputs(self):
         expected_y = np.concatenate([expected_y_1, expected_y_2], axis=0)
         expected_times_1 = target_1.time_index[1:]
         expected_times_2 = target_2.time_index[1:]
-        # Check when `concatenate = True`:
-        X, y, times, _ = create_lagged_training_data(
-            (target_1, target_2),
-            output_chunk_length=output_chunk_length,
-            past_covariates=(past_1, past_2),
-            future_covariates=(future_1, future_2),
-            lags=lags,
-            lags_past_covariates=lags_past,
-            lags_future_covariates=lags_future,
-            uses_static_covariates=False,
-            output_chunk_shift=0,
+
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": [expected_times_1, expected_times_2],
+            "expected_times_y": [expected_times_1, expected_times_2],
+            "target": ts_tg,
+            "past_cov": ts_pc,
+            "future_cov": ts_fc,
+            "lags": lags,
+            "lags_past": lags_past,
+            "lags_future": lags_future,
+            "output_chunk_length": output_chunk_length,
+            "output_chunk_shift": 0,
+            "use_static_covariates": False,
+            "multi_models": True,
+            "max_samples_per_ts": None,
+            "use_moving_windows": True,
+        }
+
+        # concatenate=True
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=False, concatenate=True, **kwargs
         )
-        assert np.allclose(X, expected_X)
-        assert np.allclose(y, expected_y)
-        assert len(times) == 2
-        assert times[0].equals(expected_times_1)
-        assert times[1].equals(expected_times_2)
-        # Check when `concatenate = False`:
-        X, y, times, _ = create_lagged_training_data(
-            (target_1, target_2),
-            output_chunk_length=output_chunk_length,
-            past_covariates=(past_1, past_2),
-            future_covariates=(future_1, future_2),
-            lags=lags,
-            lags_past_covariates=lags_past,
-            lags_future_covariates=lags_future,
-            uses_static_covariates=False,
-            concatenate=False,
-            output_chunk_shift=0,
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=True, concatenate=True, **kwargs
+        )
+
+        # concatenate=False
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=False, concatenate=False, **kwargs
+        )
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=True, concatenate=False, **kwargs
         )
-        assert len(X) == 2
-        assert len(y) == 2
-        assert np.allclose(X[0], expected_X_1)
-        assert np.allclose(X[1], expected_X_2)
-        assert np.allclose(y[0], expected_y_1)
-        assert np.allclose(y[1], expected_y_2)
-        assert len(times) == 2
-        assert times[0].equals(expected_times_1)
-        assert times[1].equals(expected_times_2)
 
     def test_lagged_training_data_stochastic_series(self):
         """
@@ -1387,20 +1871,32 @@ def test_lagged_training_data_stochastic_series(self):
         )
         expected_y = target.all_values(copy=False)[1:, :, :]
         expected_times = target.time_index[1:]
-        X, y, times, _ = create_lagged_training_data(
-            target,
-            output_chunk_length=output_chunk_length,
-            past_covariates=past,
-            future_covariates=future,
-            lags=lags,
-            lags_past_covariates=lags_past,
-            lags_future_covariates=lags_future,
-            uses_static_covariates=False,
-            output_chunk_shift=0,
+
+        kwargs = {
+            "expected_X": expected_X,
+            "expected_y": expected_y,
+            "expected_times_x": expected_times,
+            "expected_times_y": expected_times,
+            "target": target,
+            "past_cov": past,
+            "future_cov": future,
+            "lags": lags,
+            "lags_past": lags_past,
+            "lags_future": lags_future,
+            "output_chunk_length": output_chunk_length,
+            "output_chunk_shift": 0,
+            "use_static_covariates": False,
+            "multi_models": True,
+            "max_samples_per_ts": None,
+            "use_moving_windows": True,
+        }
+
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=False, concatenate=True, **kwargs
+        )
+        self.helper_check_lagged_data(
+            convert_lags_to_dict=True, concatenate=True, **kwargs
         )
-        assert np.allclose(X, expected_X)
-        assert np.allclose(y, expected_y)
-        assert times[0].equals(expected_times)
 
     def test_lagged_training_data_no_shared_times_error(self):
         """
@@ -1622,6 +2118,46 @@ def test_lagged_training_data_invalid_lag_values_error(self):
                 output_chunk_shift=0,
             )
 
+    def test_lagged_training_data_dict_lags_no_moving_window_error(self):
+        """
+        Tests that `create_lagged_training_data` throws correct error
+        when `use_moving_window` is set to `False` and lags are provided
+        as a dict for a multivariate series.
+        """
+        ts = linear_timeseries(start=1, length=20, freq=1, column_name="lin1")
+        lags = [-1]
+        lags_dict = {"lin1": [-1]}
+        # one series, one set of lags are dict
+        with pytest.raises(ValueError) as err:
+            create_lagged_training_data(
+                target_series=ts,
+                output_chunk_length=1,
+                lags=lags_dict,
+                uses_static_covariates=False,
+                use_moving_windows=False,
+                output_chunk_shift=0,
+            )
+        assert str(err.value).startswith(
+            "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary."
+        )
+        # all the series are provided, only one passed as dict
+        with pytest.raises(ValueError) as err:
+            create_lagged_training_data(
+                target_series=ts,
+                past_covariates=ts,
+                future_covariates=ts,
+                output_chunk_length=1,
+                lags=lags,
+                lags_past_covariates=lags_dict,
+                lags_future_covariates=lags,
+                uses_static_covariates=False,
+                use_moving_windows=False,
+                output_chunk_shift=0,
+            )
+        assert str(err.value).startswith(
+            "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary."
+        )
+
     def test_lagged_training_data_unspecified_lag_or_series_warning(self):
         """
         Tests that `create_lagged_training_data` throws correct
@@ -1709,295 +2245,375 @@ def test_lagged_training_data_unspecified_lag_or_series_warning(self):
                 )
                 assert len(w) == 0
 
-    def test_create_lagged_component_names(self):
+    @pytest.mark.parametrize(
+        "config",
+        [
+            # target no static covariate
+            (
+                target_with_no_cov,
+                None,
+                None,
+                [-2, -1],
+                None,
+                None,
+                False,
+                ["no_static_target_lag-2", "no_static_target_lag-1"],
+            ),
+            # target with static covariate (but don't use them in feature names)
+            (
+                target_with_static_cov,
+                None,
+                None,
+                [-4, -1],
+                None,
+                None,
+                False,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-4",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                ],
+            ),
+            # target with static covariate (acting on global target components)
+            (
+                target_with_static_cov,
+                None,
+                None,
+                [-4, -1],
+                None,
+                None,
+                True,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-4",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                    "dummy_statcov_target_global_components",
+                ],
+            ),
+            # target with static covariate (component specific)
+            (
+                target_with_static_cov2,
+                None,
+                None,
+                [-4, -1],
+                None,
+                None,
+                True,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-4",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                    "dummy_statcov_target_static_0",
+                    "dummy_statcov_target_static_1",
+                ],
+            ),
+            # target with static covariate (component specific & multivariate)
+            (
+                target_with_static_cov3,
+                None,
+                None,
+                [-4, -1],
+                None,
+                None,
+                True,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-4",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                    "dummy_statcov_target_static_0",
+                    "dummy_statcov_target_static_1",
+                    "dummy1_statcov_target_static_0",
+                    "dummy1_statcov_target_static_1",
+                ],
+            ),
+            # target + past
+            (
+                target_with_no_cov,
+                past,
+                None,
+                [-4, -3],
+                [-1],
+                None,
+                False,
+                [
+                    "no_static_target_lag-4",
+                    "no_static_target_lag-3",
+                    "past_0_pastcov_lag-1",
+                    "past_1_pastcov_lag-1",
+                    "past_2_pastcov_lag-1",
+                ],
+            ),
+            # target + future
+            (
+                target_with_no_cov,
+                None,
+                future,
+                [-2, -1],
+                None,
+                [3],
+                False,
+                [
+                    "no_static_target_lag-2",
+                    "no_static_target_lag-1",
+                    "future_0_futcov_lag3",
+                    "future_1_futcov_lag3",
+                    "future_2_futcov_lag3",
+                    "future_3_futcov_lag3",
+                ],
+            ),
+            # past + future
+            (
+                target_with_no_cov,
+                past,
+                future,
+                None,
+                [-1],
+                [2],
+                False,
+                [
+                    "past_0_pastcov_lag-1",
+                    "past_1_pastcov_lag-1",
+                    "past_2_pastcov_lag-1",
+                    "future_0_futcov_lag2",
+                    "future_1_futcov_lag2",
+                    "future_2_futcov_lag2",
+                    "future_3_futcov_lag2",
+                ],
+            ),
+            # target with static (not used) + past + future
+            (
+                target_with_static_cov,
+                past,
+                future,
+                [-2, -1],
+                [-1],
+                [2],
+                False,
+                [
+                    "static_0_target_lag-2",
+                    "static_1_target_lag-2",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                    "past_0_pastcov_lag-1",
+                    "past_1_pastcov_lag-1",
+                    "past_2_pastcov_lag-1",
+                    "future_0_futcov_lag2",
+                    "future_1_futcov_lag2",
+                    "future_2_futcov_lag2",
+                    "future_3_futcov_lag2",
+                ],
+            ),
+            # multiple series with same components names, including past/future covariates
+            (
+                [target_with_static_cov, target_with_static_cov],
+                [past, past],
+                [future, future],
+                [-3],
+                [-1],
+                [2],
+                False,
+                [
+                    "static_0_target_lag-3",
+                    "static_1_target_lag-3",
+                    "past_0_pastcov_lag-1",
+                    "past_1_pastcov_lag-1",
+                    "past_2_pastcov_lag-1",
+                    "future_0_futcov_lag2",
+                    "future_1_futcov_lag2",
+                    "future_2_futcov_lag2",
+                    "future_3_futcov_lag2",
+                ],
+            ),
+            # multiple series with different components will use the first series as reference
+            (
+                [
+                    target_with_static_cov,
+                    target_with_no_cov.stack(target_with_no_cov),
+                ],
+                [past, past],
+                [future, past.stack(target_with_no_cov)],
+                [-2, -1],
+                [-1],
+                [2],
+                False,
+                [
+                    "static_0_target_lag-2",
+                    "static_1_target_lag-2",
+                    "static_0_target_lag-1",
+                    "static_1_target_lag-1",
+                    "past_0_pastcov_lag-1",
+                    "past_1_pastcov_lag-1",
+                    "past_2_pastcov_lag-1",
+                    "future_0_futcov_lag2",
+                    "future_1_futcov_lag2",
+                    "future_2_futcov_lag2",
+                    "future_3_futcov_lag2",
+                ],
+            ),
+        ],
+    )
+    def test_create_lagged_component_names(self, config):
         """
         Tests that `create_lagged_component_names` produces the expected features name depending
         on the lags, output_chunk_length and covariates.
-        """
-        target_with_no_cov = self.create_multivariate_linear_timeseries(
-            n_components=1,
-            components_names=["no_static"],
-            start_value=0,
-            end_value=10,
-            start=2,
-            length=10,
-            freq=2,
-        )
-        n_comp = 2
-        target_with_static_cov = self.create_multivariate_linear_timeseries(
-            n_components=n_comp,
-            components_names=["static_0", "static_1"],
-            start_value=0,
-            end_value=10,
-            start=2,
-            length=10,
-            freq=2,
-        )
-        target_with_static_cov = target_with_static_cov.with_static_covariates(
-            pd.DataFrame({"dummy": [1]})  # leads to "global" static cov component name
-        )
-        target_with_static_cov2 = target_with_static_cov.with_static_covariates(
-            pd.DataFrame(
-                {"dummy": [i for i in range(n_comp)]}
-            )  # leads to sharing target component names
-        )
-        target_with_static_cov3 = target_with_static_cov.with_static_covariates(
-            pd.DataFrame(
-                {
-                    "dummy": [i for i in range(n_comp)],
-                    "dummy1": [i for i in range(n_comp)],
-                }
-            )  # leads to sharing target component names
-        )
-
-        past = self.create_multivariate_linear_timeseries(
-            n_components=3,
-            components_names=["past_0", "past_1", "past_2"],
-            start_value=10,
-            end_value=20,
-            start=2,
-            length=10,
-            freq=2,
-        )
-        future = self.create_multivariate_linear_timeseries(
-            n_components=4,
-            components_names=["future_0", "future_1", "future_2", "future_3"],
-            start_value=20,
-            end_value=30,
-            start=2,
-            length=10,
-            freq=2,
-        )
-
-        # target no static covariate
-        expected_lagged_features = ["no_static_target_lag-2", "no_static_target_lag-1"]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_no_cov,
-            past_covariates=None,
-            future_covariates=None,
-            lags=[-2, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=None,
-            concatenate=False,
-            use_static_covariates=False,
-        )
-        assert expected_lagged_features == created_lagged_features
-
-        # target with static covariate (but don't use them in feature names)
-        expected_lagged_features = [
-            "static_0_target_lag-4",
-            "static_1_target_lag-4",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_static_cov,
-            past_covariates=None,
-            future_covariates=None,
-            lags=[-4, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=None,
-            concatenate=False,
-            use_static_covariates=False,
-        )
-        assert expected_lagged_features == created_lagged_features
 
-        # target with static covariate (acting on global target components)
-        expected_lagged_features = [
-            "static_0_target_lag-4",
-            "static_1_target_lag-4",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-            "dummy_statcov_target_global_components",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_static_cov,
-            past_covariates=None,
-            future_covariates=None,
-            lags=[-4, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=None,
-            concatenate=False,
-            use_static_covariates=True,
-        )
-        assert expected_lagged_features == created_lagged_features
-
-        # target with static covariate (component specific)
-        expected_lagged_features = [
-            "static_0_target_lag-4",
-            "static_1_target_lag-4",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-            "dummy_statcov_target_static_0",
-            "dummy_statcov_target_static_1",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_static_cov2,
-            past_covariates=None,
-            future_covariates=None,
-            lags=[-4, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=None,
-            concatenate=False,
-            use_static_covariates=True,
-        )
-        assert expected_lagged_features == created_lagged_features
-
-        # target with static covariate (component specific & multivariate)
-        expected_lagged_features = [
-            "static_0_target_lag-4",
-            "static_1_target_lag-4",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-            "dummy_statcov_target_static_0",
-            "dummy_statcov_target_static_1",
-            "dummy1_statcov_target_static_0",
-            "dummy1_statcov_target_static_1",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_static_cov3,
-            past_covariates=None,
-            future_covariates=None,
-            lags=[-4, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=None,
-            concatenate=False,
-            use_static_covariates=True,
-        )
-        assert expected_lagged_features == created_lagged_features
-
-        # target + past
-        expected_lagged_features = [
-            "no_static_target_lag-4",
-            "no_static_target_lag-3",
-            "past_0_pastcov_lag-1",
-            "past_1_pastcov_lag-1",
-            "past_2_pastcov_lag-1",
-        ]
+        When lags are component-specific, they are identical across all the components.
+        """
+        (
+            ts_tg,
+            ts_pc,
+            ts_fc,
+            lags_tg,
+            lags_pc,
+            lags_fc,
+            use_static_cov,
+            expected_lagged_features,
+        ) = config
+        # lags as list
         created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_no_cov,
-            past_covariates=past,
-            future_covariates=None,
-            lags=[-4, -3],
-            lags_past_covariates=[-1],
-            lags_future_covariates=None,
+            target_series=ts_tg,
+            past_covariates=ts_pc,
+            future_covariates=ts_fc,
+            lags=lags_tg,
+            lags_past_covariates=lags_pc,
+            lags_future_covariates=lags_fc,
             concatenate=False,
+            use_static_covariates=use_static_cov,
         )
-        assert expected_lagged_features == created_lagged_features
 
-        # target + future
-        expected_lagged_features = [
-            "no_static_target_lag-2",
-            "no_static_target_lag-1",
-            "future_0_futcov_lag3",
-            "future_1_futcov_lag3",
-            "future_2_futcov_lag3",
-            "future_3_futcov_lag3",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_no_cov,
-            past_covariates=None,
-            future_covariates=future,
-            lags=[-2, -1],
-            lags_past_covariates=None,
-            lags_future_covariates=[3],
-            concatenate=False,
+        # converts lags to dictionary format
+        lags_as_dict = self.convert_lags_to_dict(
+            ts_tg,
+            ts_pc,
+            ts_fc,
+            lags_tg,
+            lags_pc,
+            lags_fc,
         )
-        assert expected_lagged_features == created_lagged_features
 
-        # past + future
-        expected_lagged_features = [
-            "past_0_pastcov_lag-1",
-            "past_1_pastcov_lag-1",
-            "past_2_pastcov_lag-1",
-            "future_0_futcov_lag2",
-            "future_1_futcov_lag2",
-            "future_2_futcov_lag2",
-            "future_3_futcov_lag2",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_no_cov,
-            past_covariates=past,
-            future_covariates=future,
-            lags=None,
-            lags_past_covariates=[-1],
-            lags_future_covariates=[2],
+        created_lagged_features_dict_lags, _ = create_lagged_component_names(
+            target_series=ts_tg,
+            past_covariates=ts_pc,
+            future_covariates=ts_fc,
+            lags=lags_as_dict["target"],
+            lags_past_covariates=lags_as_dict["past"],
+            lags_future_covariates=lags_as_dict["future"],
             concatenate=False,
+            use_static_covariates=use_static_cov,
         )
         assert expected_lagged_features == created_lagged_features
+        assert expected_lagged_features == created_lagged_features_dict_lags
 
-        # target with static + past + future
-        expected_lagged_features = [
-            "static_0_target_lag-2",
-            "static_1_target_lag-2",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-            "past_0_pastcov_lag-1",
-            "past_1_pastcov_lag-1",
-            "past_2_pastcov_lag-1",
-            "future_0_futcov_lag2",
-            "future_1_futcov_lag2",
-            "future_2_futcov_lag2",
-            "future_3_futcov_lag2",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=target_with_static_cov,
-            past_covariates=past,
-            future_covariates=future,
-            lags=[-2, -1],
-            lags_past_covariates=[-1],
-            lags_future_covariates=[2],
-            concatenate=False,
-        )
-        assert expected_lagged_features == created_lagged_features
+    @pytest.mark.parametrize(
+        "config",
+        [
+            # lags have the same minimum
+            (
+                target_with_static_cov,
+                None,
+                None,
+                {"static_0": [-4, -2], "static_1": [-4, -3]},
+                None,
+                None,
+                False,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-4",
+                    "static_1_target_lag-3",
+                    "static_0_target_lag-2",
+                ],
+            ),
+            # lags are not overlapping
+            (
+                target_with_static_cov,
+                None,
+                None,
+                {"static_0": [-4, -1], "static_1": [-3, -2]},
+                None,
+                None,
+                False,
+                [
+                    "static_0_target_lag-4",
+                    "static_1_target_lag-3",
+                    "static_1_target_lag-2",
+                    "static_0_target_lag-1",
+                ],
+            ),
+            # default lags for target, overlapping lags for past covariates
+            (
+                target_with_static_cov,
+                past,
+                None,
+                {"static_0": [-3], "static_1": [-3]},
+                {"past_0": [-4, -3], "past_1": [-3, -2], "past_2": [-2]},
+                None,
+                False,
+                [
+                    "static_0_target_lag-3",
+                    "static_1_target_lag-3",
+                    "past_0_pastcov_lag-4",
+                    "past_0_pastcov_lag-3",
+                    "past_1_pastcov_lag-3",
+                    "past_1_pastcov_lag-2",
+                    "past_2_pastcov_lag-2",
+                ],
+            ),
+            # no lags for target, future covariates lags are not in the compoments order
+            (
+                target_with_static_cov,
+                None,
+                future,
+                None,
+                None,
+                {
+                    "future_3": [-2, 0, 2],
+                    "future_0": [-4, 1],
+                    "future_2": [1],
+                    "future_1": [-2, 2],
+                },
+                False,
+                [
+                    "future_0_futcov_lag-4",
+                    "future_1_futcov_lag-2",
+                    "future_3_futcov_lag-2",
+                    "future_3_futcov_lag0",
+                    "future_0_futcov_lag1",
+                    "future_2_futcov_lag1",
+                    "future_1_futcov_lag2",
+                    "future_3_futcov_lag2",
+                ],
+            ),
+        ],
+    )
+    def test_create_lagged_component_names_different_lags(self, config):
+        """
+        Tests that `create_lagged_component_names` when lags are different across components.
 
-        # multiple series with same components, including past/future covariates
-        expected_lagged_features = [
-            "static_0_target_lag-3",
-            "static_1_target_lag-3",
-            "past_0_pastcov_lag-1",
-            "past_1_pastcov_lag-1",
-            "past_2_pastcov_lag-1",
-            "future_0_futcov_lag2",
-            "future_1_futcov_lag2",
-            "future_2_futcov_lag2",
-            "future_3_futcov_lag2",
-        ]
-        created_lagged_features, _ = create_lagged_component_names(
-            target_series=[target_with_static_cov, target_with_static_cov],
-            past_covariates=[past, past],
-            future_covariates=[future, future],
-            lags=[-3],
-            lags_past_covariates=[-1],
-            lags_future_covariates=[2],
-            concatenate=False,
-        )
-        assert expected_lagged_features == created_lagged_features
+        The lagged features should be sorted by lags, then by components.
+        """
+        (
+            ts_tg,
+            ts_pc,
+            ts_fc,
+            lags_tg,
+            lags_pc,
+            lags_fc,
+            use_static_cov,
+            expected_lagged_features,
+        ) = config
 
-        # multiple series with different components will use the first series as reference
-        expected_lagged_features = [
-            "static_0_target_lag-2",
-            "static_1_target_lag-2",
-            "static_0_target_lag-1",
-            "static_1_target_lag-1",
-            "past_0_pastcov_lag-1",
-            "past_1_pastcov_lag-1",
-            "past_2_pastcov_lag-1",
-            "future_0_futcov_lag2",
-            "future_1_futcov_lag2",
-            "future_2_futcov_lag2",
-            "future_3_futcov_lag2",
-        ]
         created_lagged_features, _ = create_lagged_component_names(
-            target_series=[
-                target_with_static_cov,
-                target_with_no_cov.stack(target_with_no_cov),
-            ],
-            past_covariates=[past, past],
-            future_covariates=[future, past.stack(target_with_no_cov)],
-            lags=[-2, -1],
-            lags_past_covariates=[-1],
-            lags_future_covariates=[2],
+            target_series=ts_tg,
+            past_covariates=ts_pc,
+            future_covariates=ts_fc,
+            lags=lags_tg,
+            lags_past_covariates=lags_pc,
+            lags_future_covariates=lags_fc,
             concatenate=False,
+            use_static_covariates=use_static_cov,
         )
         assert expected_lagged_features == created_lagged_features
diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index 8a8e0e0dcd..8ff22f236e 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -276,18 +276,52 @@ def create_lagged_data(
         if seq_ts is not None
     ]
     seq_ts_lens = set(seq_ts_lens)
-    raise_if(
-        len(seq_ts_lens) > 1,
-        "Must specify the same number of `TimeSeries` for each series input.",
+    if len(seq_ts_lens) > 1:
+        raise_log(
+            ValueError(
+                "Must specify the same number of `TimeSeries` for each series input."
+            ),
+            logger,
+        )
+    lags_passed_as_dict = any(
+        isinstance(lags_, dict)
+        for lags_ in [lags, lags_past_covariates, lags_future_covariates]
     )
+    if (not use_moving_windows) and lags_passed_as_dict:
+        raise_log(
+            ValueError(
+                "`use_moving_windows=False` is not supported when any of the lags is provided as a dictionary. "
+                f"Received: {[lags, lags_past_covariates, lags_future_covariates]}."
+            ),
+            logger,
+        )
+
     if max_samples_per_ts is None:
         max_samples_per_ts = inf
+
+    # lags are identical for multiple series: pre-compute lagged features and reordered lagged features
+    lags_extract, lags_order = _get_lagged_indices(
+        lags,
+        lags_past_covariates,
+        lags_future_covariates,
+    )
     X, y, times = [], [], []
     for i in range(max(seq_ts_lens)):
         target_i = target_series[i] if target_series else None
         past_i = past_covariates[i] if past_covariates else None
         future_i = future_covariates[i] if future_covariates else None
-        if use_moving_windows and _all_equal_freq(target_i, past_i, future_i):
+        series_equal_freq = _all_equal_freq(target_i, past_i, future_i)
+        # component-wise lags extraction is not support with times intersection at the moment
+        if use_moving_windows and lags_passed_as_dict and (not series_equal_freq):
+            raise_log(
+                ValueError(
+                    f"Cannot create tabularized data for the {i}th series because target and covariates don't have "
+                    "the same frequency and some of the lags are provided as a dictionary. Either resample the "
+                    "series or change the lags definition."
+                ),
+                logger,
+            )
+        if use_moving_windows and series_equal_freq:
             X_i, y_i, times_i = _create_lagged_data_by_moving_window(
                 target_i,
                 output_chunk_length,
@@ -297,6 +331,8 @@ def create_lagged_data(
                 lags,
                 lags_past_covariates,
                 lags_future_covariates,
+                lags_extract,
+                lags_order,
                 max_samples_per_ts,
                 multi_models,
                 check_inputs,
@@ -715,9 +751,9 @@ def create_lagged_component_names(
     For `*_lags=[-2,-1]` and `*_series.n_components = 2` (lags shared across all the components),
     each `lagged_*` has the following structure (grouped by lags):
         comp0_*_lag-2 | comp1_*_lag-2 | comp0_*_lag_-1 | comp1_*_lag-1
-    For `*_lags={'comp0':[-2, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component-
-    specific lags), each `lagged_*` has the following structure (grouped by components):
-        comp0_*_lag-2 | comp0_*_lag-1 | comp1_*_lag_-5 | comp1_*_lag-3
+    For `*_lags={'comp0':[-3, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component-
+    specific lags), each `lagged_*` has the following structure (sorted by lags, then by components):
+        comp1_*_lag-5 | comp0_*_lag-3 | comp1_*_lag_-3 | comp0_*_lag-1
 
     and for static covariates (2 static covariates acting on 2 target components):
         cov0_*_target_comp0 | cov0_*_target_comp1 | cov1_*_target_comp0 | cov1_*_target_comp1
@@ -776,10 +812,32 @@ def create_lagged_component_names(
 
         components = get_single_series(variate).components.tolist()
         if isinstance(variate_lags, dict):
+            if "default_lags" in variate_lags:
+                raise_log(
+                    ValueError(
+                        "All the lags must be explicitly defined, 'default_lags' is not allowed in the "
+                        "lags dictionary."
+                    ),
+                    logger,
+                )
+
+            # combine all the lags and sort them in ascending order across all the components
+            comp_lags_reordered = np.concatenate(
+                [
+                    np.array(variate_lags[comp_name], dtype=int)
+                    for comp_name in components
+                ]
+            ).argsort()
+            tmp_lagged_feats_names = []
             for name in components:
-                lagged_feature_names += [
+                tmp_lagged_feats_names += [
                     f"{name}_{variate_type}_lag{lag}" for lag in variate_lags[name]
                 ]
+
+            # adding feats names reordered across components
+            lagged_feature_names += [
+                tmp_lagged_feats_names[idx] for idx in comp_lags_reordered
+            ]
         else:
             lagged_feature_names += [
                 f"{name}_{variate_type}_lag{lag}"
@@ -811,6 +869,44 @@ def create_lagged_component_names(
     return lagged_feature_names, label_feature_names
 
 
+def _get_lagged_indices(
+    lags,
+    lags_past_covariates,
+    lags_future_covariates,
+):
+    """Computes and returns:
+
+    - the lagged feature indices for extraction from windows
+    - the reordered indices to apply after the window extraction (in case of component specific lags)
+
+    Assumes that all input series share identical component order.
+    """
+    lags_extract = []
+    lags_order = []
+    for lags_i in [lags, lags_past_covariates, lags_future_covariates]:
+        if lags_i is None:
+            lags_extract.append(None)
+            lags_order.append(None)
+            continue
+
+        # Within each window, the `-1` indexed value (i.e. the value at the very end of
+        # the window) corresponds to time `t - min_lag_i`. The negative index of the time
+        # `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`:
+        if isinstance(lags_i, list):
+            lags_extract_i = np.array(lags_i, dtype=int)
+            # Feats are already grouped by lags and ordered
+            lags_order_i = slice(None)
+        else:
+            # Assume keys are in the same order as the series components
+            # Lags are grouped by component, extracted from the same window
+            lags_extract_i = [np.array(c_lags, dtype=int) for c_lags in lags_i.values()]
+            # Sort the lags across the components in ascending order
+            lags_order_i = np.concatenate(lags_extract_i).argsort()
+        lags_extract.append(lags_extract_i)
+        lags_order.append(lags_order_i)
+    return lags_extract, lags_order
+
+
 def _create_lagged_data_by_moving_window(
     target_series: Optional[TimeSeries],
     output_chunk_length: int,
@@ -820,6 +916,8 @@ def _create_lagged_data_by_moving_window(
     lags: Optional[Union[Sequence[int], Dict[str, List[int]]]],
     lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
     lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+    lags_extract: List[Optional[np.ndarray]],
+    lags_order: List[Optional[np.ndarray]],
     max_samples_per_ts: Optional[int],
     multi_models: bool,
     check_inputs: bool,
@@ -837,6 +935,8 @@ def _create_lagged_data_by_moving_window(
     and `t + output_chunk_length - 1` from the target series. In both cases, the extracted
     windows can then be reshaped into the correct shape. This approach can only be used if
     we *can* assume that the specified series are all of the same frequency.
+
+    Assumes that all the lags are sorted in ascending order.
     """
     feature_times, min_lags, max_lags = _get_feature_times(
         target_series,
@@ -880,10 +980,11 @@ def _create_lagged_data_by_moving_window(
     X = []
     start_time_idx = None
     target_start_time_idx = None
-    for i, (series_i, lags_i, min_lag_i, max_lag_i) in enumerate(
+    for i, (series_i, lags_extract_i, lags_order_i, min_lag_i, max_lag_i) in enumerate(
         zip(
             [target_series, past_covariates, future_covariates],
-            [lags, lags_past_covariates, lags_future_covariates],
+            lags_extract,
+            lags_order,
             min_lags,
             max_lags,
         )
@@ -936,19 +1037,16 @@ def _create_lagged_data_by_moving_window(
             windows = strided_moving_window(
                 x=vals, window_len=window_len, stride=1, axis=0, check_inputs=False
             )
+
             # Within each window, the `-1` indexed value (i.e. the value at the very end of
             # the window) corresponds to time `t - min_lag_i`. The negative index of the time
             # `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`:
-            if isinstance(lags_i, list):
-                lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
-            else:
-                # Lags are grouped by component, extracted from the same window
-                lags_to_extract = [
-                    np.array(comp_lags, dtype=int) + min_lag_i - 1
-                    for comp_lags in lags_i.values()
-                ]
-            lagged_vals = _extract_lagged_vals_from_windows(windows, lags_to_extract)
-            X.append(lagged_vals)
+            # extract lagged values
+            lagged_vals = _extract_lagged_vals_from_windows(
+                windows, lags_extract_i, lags_shift=min_lag_i - 1
+            )
+            # extract and append the reordered lagged values
+            X.append(lagged_vals[:, lags_order_i])
         # Cache `start_time_idx` for label creation:
         if is_target_series:
             target_start_time_idx = start_time_idx
@@ -987,6 +1085,7 @@ def _create_lagged_data_by_moving_window(
 def _extract_lagged_vals_from_windows(
     windows: np.ndarray,
     lags_to_extract: Optional[Union[np.ndarray, List[np.ndarray]]] = None,
+    lags_shift: int = 0,
 ) -> np.ndarray:
     """
     Helper function called by `_create_lagged_data_by_moving_window` that
@@ -1011,7 +1110,7 @@ def _extract_lagged_vals_from_windows(
     if isinstance(lags_to_extract, list):
         # iterate over the components-specific lags
         comp_windows = [
-            windows[:, i, :, comp_lags_to_extract]
+            windows[:, i, :, comp_lags_to_extract + lags_shift]
             for i, comp_lags_to_extract in enumerate(lags_to_extract)
         ]
         # windows.shape = (sum(lags_len) across components, num_windows, num_samples):
@@ -1019,7 +1118,7 @@ def _extract_lagged_vals_from_windows(
         lagged_vals = np.moveaxis(windows, (1, 0, 2), (0, 1, 2))
     else:
         if lags_to_extract is not None:
-            windows = windows[:, :, :, lags_to_extract]
+            windows = windows[:, :, :, lags_to_extract + lags_shift]
         # windows.shape = (num_windows, window_len, num_components, num_samples):
         windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3))
         # lagged_vals.shape = (num_windows, num_components*window_len, num_samples):
@@ -1148,6 +1247,120 @@ def _create_lagged_data_by_intersecting_times(
     return X, y, shared_times
 
 
+def _create_lagged_data_autoregression(
+    target_series: Union[TimeSeries, Sequence[TimeSeries]],
+    t_pred: int,
+    shift: int,
+    last_step_shift: int,
+    series_matrix: np.ndarray,
+    covariate_matrices: Dict[str, np.ndarray],
+    lags: Dict[str, List[int]],
+    component_lags: Dict[str, Dict[str, List[int]]],
+    relative_cov_lags: Dict[str, np.ndarray],
+    uses_static_covariates: bool,
+    last_static_covariates_shape: Optional[Tuple[int, int]],
+    num_samples: int,
+) -> np.ndarray:
+    """Extract lagged data from target, past covariates and future covariates for auto-regression
+    with RegressionModels.
+    """
+    series_length = len(target_series)
+    X = []
+    for series_type in ["target", "past", "future"]:
+        if series_type not in lags:
+            continue
+
+        # extract series specific data
+        values_matrix = (
+            series_matrix
+            if series_type == "target"
+            else covariate_matrices[series_type]
+        )
+
+        if series_type not in component_lags:
+            # for global lags over all components, directly extract lagged values from the data
+            if series_type == "target":
+                relative_lags = [
+                    lag - (shift + last_step_shift) for lag in lags[series_type]
+                ]
+            else:
+                relative_lags = relative_cov_lags[series_type] + t_pred
+
+            lagged_data = values_matrix[:, relative_lags].reshape(
+                series_length * num_samples, -1
+            )
+        else:
+            # for component-specific lags, sort by lags and components and then extract
+            tmp_X = _extract_component_lags_autoregression(
+                series_type=series_type,
+                values_matrix=values_matrix,
+                shift=shift,
+                last_step_shift=last_step_shift,
+                t_pred=t_pred,
+                lags=lags,
+                component_lags=component_lags,
+            )
+            lagged_data = tmp_X.reshape(series_length * num_samples, -1)
+        X.append(lagged_data)
+    # concatenate retrieved lags
+    X = np.concatenate(X, axis=1)
+
+    if not uses_static_covariates:
+        return X
+
+    # Need to split up `X` into three equally-sized sub-blocks
+    # corresponding to each timeseries in `series`, so that
+    # static covariates can be added to each block; valid since
+    # each block contains same number of observations:
+    X = np.split(X, series_length, axis=0)
+    X, _ = add_static_covariates_to_lagged_data(
+        features=X,
+        target_series=target_series,
+        uses_static_covariates=uses_static_covariates,
+        last_shape=last_static_covariates_shape,
+    )
+
+    # concatenate retrieved lags
+    return np.concatenate(X, axis=0)
+
+
+def _extract_component_lags_autoregression(
+    series_type: str,
+    values_matrix: np.ndarray,
+    shift: int,
+    last_step_shift: int,
+    t_pred: int,
+    lags: Dict[str, List[int]],
+    component_lags: Dict[str, Dict[str, List[int]]],
+) -> np.ndarray:
+    """Extract, concatenate and reorder component-wise lags to obtain a feature order
+    identical to tabularization.
+    """
+    # prepare index to reorder features by lags across components
+    comp_lags_reordered = np.concatenate(
+        [comp_lags for comp_lags in component_lags[series_type].values()]
+    ).argsort()
+
+    # convert relative lags to absolute
+    if series_type == "target":
+        lags_shift = -shift - last_step_shift
+    else:
+        lags_shift = -lags[series_type][0] + t_pred
+
+    # extract features
+    tmp_X = [
+        values_matrix[
+            :,
+            [lag + lags_shift for lag in comp_lags],
+            comp_i,
+        ]
+        for comp_i, comp_lags in enumerate(component_lags[series_type].values())
+    ]
+
+    # concatenate on features dimension and reorder
+    return np.concatenate(tmp_X, axis=1)[:, comp_lags_reordered]
+
+
 # For convenience, define following types for `_get_feature_times`:
 FeatureTimes = Tuple[
     Optional[Union[pd.Index, pd.DatetimeIndex, pd.RangeIndex]],
diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
index 6d39a305bc..061bece96f 100644
--- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
+++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
@@ -11,6 +11,7 @@
 
 from darts.logging import get_logger
 from darts.timeseries import TimeSeries
+from darts.utils import _build_tqdm_iterator
 from darts.utils.data.tabularization import create_lagged_prediction_data
 from darts.utils.historical_forecasts.utils import _get_historical_forecast_boundaries
 from darts.utils.utils import generate_index
@@ -30,6 +31,7 @@ def _optimized_historical_forecasts_last_points_only(
     stride: int = 1,
     overlap_end: bool = False,
     show_warnings: bool = True,
+    verbose: bool = False,
     predict_likelihood_parameters: bool = False,
     **kwargs,
 ) -> Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]]:
@@ -39,7 +41,8 @@ def _optimized_historical_forecasts_last_points_only(
     Rely on _check_optimizable_historical_forecasts() to check that the assumptions are verified.
     """
     forecasts_list = []
-    for idx, series_ in enumerate(series):
+    iterator = _build_tqdm_iterator(series, verbose)
+    for idx, series_ in enumerate(iterator):
         past_covariates_ = past_covariates[idx] if past_covariates is not None else None
         future_covariates_ = (
             future_covariates[idx] if future_covariates is not None else None
@@ -185,6 +188,7 @@ def _optimized_historical_forecasts_all_points(
     stride: int = 1,
     overlap_end: bool = False,
     show_warnings: bool = True,
+    verbose: bool = False,
     predict_likelihood_parameters: bool = False,
     **kwargs,
 ) -> Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]]:
@@ -194,7 +198,8 @@ def _optimized_historical_forecasts_all_points(
     Rely on _check_optimizable_historical_forecasts() to check that the assumptions are verified.
     """
     forecasts_list = []
-    for idx, series_ in enumerate(series):
+    iterator = _build_tqdm_iterator(series, verbose)
+    for idx, series_ in enumerate(iterator):
         past_covariates_ = past_covariates[idx] if past_covariates is not None else None
         future_covariates_ = (
             future_covariates[idx] if future_covariates is not None else None