From cfd381bc6c3e167099697c80099bd5407c1cb433 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 11:41:02 +0200
Subject: [PATCH 01/30] feat: updated lags sanity checks to accept dictionnary

---
 darts/models/forecasting/regression_model.py | 189 ++++++++++++++-----
 1 file changed, 146 insertions(+), 43 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index f51c26e902..cd0d797ee8 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -60,13 +60,18 @@
 
 logger = get_logger(__name__)
 
+LAGS_TYPE = Union[int, List[int], Dict[str, Union[int, List[int]]]]
+FUTURE_LAGS_TYPE = Union[
+    Tuple[int, int], List[int], Dict[str, Union[Tuple[int, int], List[int]]]
+]
+
 
 class RegressionModel(GlobalForecastingModel):
     def __init__(
         self,
-        lags: Union[int, list] = None,
-        lags_past_covariates: Union[int, List[int]] = None,
-        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        lags: Optional[LAGS_TYPE] = None,
+        lags_past_covariates: Optional[LAGS_TYPE] = None,
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
         output_chunk_length: int = 1,
         add_encoders: Optional[dict] = None,
         model=None,
@@ -80,7 +85,10 @@ def __init__(
         ----------
         lags
             Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise, a list of integers with lags is required (each lag must be < 0).
+            are used (from -1 backward). Otherwise, a list of integers with lags (each lag must be < 0).
+            In order to specify component-wise lags, a dictionnary with the component name or index as key and the
+            lags value can be provided. The number of keys in the dictionnary must match the number of components in
+            the series.
         lags_past_covariates
             Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
             `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
@@ -132,6 +140,7 @@ def __init__(
 
         self.model = model
         self.lags: Dict[str, List[int]] = {}
+        self.component_lags: Dict[str, Dict[str, List[int]]] = {}
         self.input_dim = None
         self.multi_models = multi_models
         self._considers_static_covariates = use_static_covariates
@@ -174,18 +183,18 @@ def __init__(
 
         for _lags, lags_name in lags_type_checks:
             raise_if_not(
-                isinstance(_lags, (int, list)) or _lags is None,
-                f"`{lags_name}` must be of type int or list. Given: {type(_lags)}.",
+                isinstance(_lags, (int, list, dict)) or _lags is None,
+                f"`{lags_name}` must be of type int, list or dict. Given: {type(_lags)}.",
             )
             raise_if(
                 isinstance(_lags, bool),
-                f"`{lags_name}` must be of type int or list, not bool.",
+                f"`{lags_name}` must be of type int, list or dict, not bool.",
             )
 
         raise_if_not(
-            isinstance(lags_future_covariates, (tuple, list))
+            isinstance(lags_future_covariates, (tuple, list, dict))
             or lags_future_covariates is None,
-            f"`lags_future_covariates` must be of type tuple or list. Given: {type(lags_future_covariates)}.",
+            f"`lags_future_covariates` must be of type tuple, list or dict. Given: {type(lags_future_covariates)}.",
         )
 
         if isinstance(lags_future_covariates, tuple):
@@ -202,57 +211,151 @@ def __init__(
             )
 
         # set lags
-        if isinstance(lags, int):
-            raise_if_not(lags > 0, f"`lags` must be strictly positive. Given: {lags}.")
+        def _check_int_lags(lags: int, lags_name: str) -> Optional[List[int]]:
+            raise_if_not(
+                lags > 0, f"{lags_name} must be strictly positive. Given: {lags}."
+            )
             # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict)
-            self.lags["target"] = list(range(-lags, 0))
-        elif isinstance(lags, list):
+            return list(range(-lags, 0))
+
+        def _check_list_lags(lags: list, lags_name: str) -> Optional[List[int]]:
             for lag in lags:
                 raise_if(
                     not isinstance(lag, int) or (lag >= 0),
-                    f"Every element of `lags` must be a strictly negative integer. Given: {lags}.",
+                    f"Every element of {lags_name} must be a strictly negative integer. Given: {lags}.",
                 )
             if lags:
-                self.lags["target"] = sorted(lags)
+                return sorted(lags)
+
+        def _check_dict_lags(
+            lags: dict, lags_name: str
+        ) -> Optional[Tuple[List[int], Dict[str, List[int]]]]:
+            components_lags = dict()
+            min_lags = None
+            max_lags = None
+            # TODO: use component idx instead of component name for robustness?
+            for comp_idx, (comp_name, comp_lags) in enumerate(lags.items()):
+                if isinstance(comp_lags, int):
+                    components_lags[comp_name] = _check_int_lags(
+                        comp_lags, f"{lags_name} for component {comp_name}"
+                    )
+                elif isinstance(comp_lags, list):
+                    components_lags[comp_name] = _check_list_lags(
+                        comp_lags, f"{lags_name} for component {comp_name}"
+                    )
+                else:
+                    raise_log(
+                        ValueError(
+                            f"when passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
+                            f"strictly positive integer or a list, received : {type(comp_lags)}."
+                        ),
+                        logger,
+                    )
+                min_lags: int = min(components_lags[comp_name])
+                max_lags: int = max(components_lags[comp_name])
+            return [min_lags, max_lags], components_lags
+
+        if isinstance(lags, int):
+            conv_lags = _check_int_lags(lags, "`lags`")
+            if conv_lags:
+                self.lags["target"] = conv_lags
+        elif isinstance(lags, list):
+            conv_lags = _check_list_lags(lags, "`lags`")
+            if conv_lags:
+                self.lags["target"] = conv_lags
+        elif isinstance(lags, dict):
+            conv_lags = _check_dict_lags(lags, "`lags`")
+            if conv_lags:
+                # dummy, used to compute the extreme lags
+                self.lags["target"] = conv_lags[0]
+                # actual lags
+                self.component_lags["target"] = conv_lags[1]
 
         if isinstance(lags_past_covariates, int):
-            raise_if_not(
-                lags_past_covariates > 0,
-                f"`lags_past_covariates` must be an integer > 0. Given: {lags_past_covariates}.",
-            )
-            self.lags["past"] = list(range(-lags_past_covariates, 0))
+            conv_lags = _check_int_lags(lags_past_covariates, "`lags_past_covariates`")
+            if conv_lags:
+                self.lags["past"] = conv_lags
         elif isinstance(lags_past_covariates, list):
-            for lag in lags_past_covariates:
-                raise_if(
-                    not isinstance(lag, int) or (lag >= 0),
-                    f"Every element of `lags_covariates` must be an integer < 0. Given: {lags_past_covariates}.",
-                )
-            if lags_past_covariates:
-                self.lags["past"] = sorted(lags_past_covariates)
-
-        if isinstance(lags_future_covariates, tuple):
+            conv_lags = _check_list_lags(lags_past_covariates, "`lags_past_covariates`")
+            if conv_lags:
+                self.lags["past"] = conv_lags
+        elif isinstance(lags_past_covariates, dict):
+            conv_lags = _check_dict_lags(lags_past_covariates, "`lags_past_covariates`")
+            if conv_lags:
+                # dummy, used to compute the extreme lags
+                self.lags["past"] = conv_lags[0]
+                # actual lags
+                self.component_lags["past"] = conv_lags[1]
+
+        def _check_tuple_future_lags(
+            lags_future_covariates: Tuple[int, int], lags_name: str
+        ):
             raise_if_not(
                 lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"`lags_future_covariates` tuple must contain integers >= 0. Given: {lags_future_covariates}.",
+                f"{lags_name} tuple must contain integers >= 0. Given: {lags_future_covariates}.",
             )
-            if (
-                lags_future_covariates[0] is not None
-                and lags_future_covariates[1] is not None
-            ):
-                if not (
-                    lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0
-                ):
-                    self.lags["future"] = list(
-                        range(-lags_future_covariates[0], lags_future_covariates[1])
-                    )
-        elif isinstance(lags_future_covariates, list):
+            # TODO: check if it should return None or []
+            if lags_future_covariates[0] + lags_future_covariates[1] == 0:
+                return None
+            else:
+                return list(
+                    range(-lags_future_covariates[0], lags_future_covariates[1])
+                )
+
+        def _check_list_future_lags(lags_future_covariates: List[int], lags_name: str):
             for lag in lags_future_covariates:
                 raise_if(
                     not isinstance(lag, int) or isinstance(lag, bool),
-                    f"Every element of `lags_future_covariates` must be an integer. Given: {lags_future_covariates}.",
+                    f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
                 )
             if lags_future_covariates:
-                self.lags["future"] = sorted(lags_future_covariates)
+                return sorted(lags_future_covariates)
+
+        def _check_dict_future_lags(
+            lags_future_covariates: Dict[str, Union[Tuple, List]]
+        ):
+            components_lags = dict()
+            # TODO: use component idx instead of component name for robustness?
+            for comp_idx, (comp_name, comp_lags) in enumerate(
+                lags_future_covariates.items()
+            ):
+                if isinstance(comp_lags, tuple):
+                    components_lags[comp_name] = _check_tuple_future_lags(
+                        comp_lags, f"`future_covariates_lags` for {comp_name}"
+                    )
+                elif isinstance(comp_lags, list):
+                    components_lags[comp_name] = _check_list_future_lags(
+                        comp_lags, f"`future_covariates_lags` for {comp_name}"
+                    )
+                else:
+                    raise_log(
+                        ValueError(
+                            f"when passed as a dictionnary, `future_covariates_lags` for component {comp_name} must be "
+                            f"either a strictly positive integer or a list, received : {type(comp_lags)}."
+                        ),
+                        logger,
+                    )
+            return components_lags
+
+        if isinstance(lags_future_covariates, tuple):
+            conv_lags = _check_tuple_future_lags(
+                lags_future_covariates, "`future_covariates_lags`"
+            )
+            if conv_lags:
+                self.lags["future"] = conv_lags
+        elif isinstance(lags_future_covariates, list):
+            conv_lags = _check_list_future_lags(
+                lags_future_covariates, "`future_covariates_lags`"
+            )
+            if conv_lags:
+                self.lags["future"] = conv_lags
+        elif isinstance(lags_future_covariates, dict):
+            conv_lags = _check_dict_future_lags(lags_future_covariates)
+            if conv_lags:
+                # dummy, used to compute the extreme lags
+                self.lags["future"] = conv_lags[0]
+                # actual lags
+                self.component_lags["future"] = conv_lags[1]
 
         self.pred_dim = self.output_chunk_length if self.multi_models else 1
 

From b3ce1f1ee8cebd8303b94d2afbb4707008102ab7 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 13:02:02 +0200
Subject: [PATCH 02/30] fix: better management of corner cases during lags
 checks

---
 darts/models/forecasting/regression_model.py | 71 ++++++++++++++------
 1 file changed, 51 insertions(+), 20 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index cd0d797ee8..218b203ec8 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -211,28 +211,34 @@ def __init__(
             )
 
         # set lags
-        def _check_int_lags(lags: int, lags_name: str) -> Optional[List[int]]:
+        def _check_int_lags(lags: int, lags_name: str) -> List[int]:
             raise_if_not(
                 lags > 0, f"{lags_name} must be strictly positive. Given: {lags}."
             )
             # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict)
             return list(range(-lags, 0))
 
-        def _check_list_lags(lags: list, lags_name: str) -> Optional[List[int]]:
+        def _check_list_lags(lags: list, lags_name: str) -> List[int]:
             for lag in lags:
                 raise_if(
                     not isinstance(lag, int) or (lag >= 0),
                     f"Every element of {lags_name} must be a strictly negative integer. Given: {lags}.",
                 )
-            if lags:
-                return sorted(lags)
+            return sorted(lags)
 
         def _check_dict_lags(
             lags: dict, lags_name: str
         ) -> Optional[Tuple[List[int], Dict[str, List[int]]]]:
-            components_lags = dict()
+
+            raise_if_not(
+                len(lags) > 0,
+                f"When passed as a dictionnary, {lags_name} must contain at least one key.",
+                logger,
+            )
+
             min_lags = None
             max_lags = None
+            components_lags = dict()
             # TODO: use component idx instead of component name for robustness?
             for comp_idx, (comp_name, comp_lags) in enumerate(lags.items()):
                 if isinstance(comp_lags, int):
@@ -246,13 +252,21 @@ def _check_dict_lags(
                 else:
                     raise_log(
                         ValueError(
-                            f"when passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
+                            f"When passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
                             f"strictly positive integer or a list, received : {type(comp_lags)}."
                         ),
                         logger,
                     )
-                min_lags: int = min(components_lags[comp_name])
-                max_lags: int = max(components_lags[comp_name])
+
+                if min_lags is None:
+                    min_lags = components_lags[comp_name][0]
+                else:
+                    min_lags = min(min_lags, components_lags[comp_name][0])
+
+                if max_lags is None:
+                    max_lags = components_lags[comp_name][-1]
+                else:
+                    max_lags = max(max_lags, components_lags[comp_name][-1])
             return [min_lags, max_lags], components_lags
 
         if isinstance(lags, int):
@@ -289,31 +303,38 @@ def _check_dict_lags(
 
         def _check_tuple_future_lags(
             lags_future_covariates: Tuple[int, int], lags_name: str
-        ):
+        ) -> List[int]:
             raise_if_not(
                 lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
                 f"{lags_name} tuple must contain integers >= 0. Given: {lags_future_covariates}.",
             )
-            # TODO: check if it should return None or []
-            if lags_future_covariates[0] + lags_future_covariates[1] == 0:
-                return None
-            else:
-                return list(
-                    range(-lags_future_covariates[0], lags_future_covariates[1])
-                )
+            raise_if(
+                lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
+                f"{lags_name} tuple cannot be (0,0).",
+                logger,
+            )
+            return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
 
-        def _check_list_future_lags(lags_future_covariates: List[int], lags_name: str):
+        def _check_list_future_lags(
+            lags_future_covariates: List[int], lags_name: str
+        ) -> List[int]:
             for lag in lags_future_covariates:
                 raise_if(
                     not isinstance(lag, int) or isinstance(lag, bool),
                     f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
                 )
-            if lags_future_covariates:
-                return sorted(lags_future_covariates)
+            return sorted(lags_future_covariates)
 
         def _check_dict_future_lags(
             lags_future_covariates: Dict[str, Union[Tuple, List]]
         ):
+            raise_if_not(
+                len(lags) > 0,
+                "When passed as a dictionnary, `lags_future_covariates` must contain at least one key.",
+                logger,
+            )
+            min_lags = None
+            max_lags = None
             components_lags = dict()
             # TODO: use component idx instead of component name for robustness?
             for comp_idx, (comp_name, comp_lags) in enumerate(
@@ -330,11 +351,21 @@ def _check_dict_future_lags(
                 else:
                     raise_log(
                         ValueError(
-                            f"when passed as a dictionnary, `future_covariates_lags` for component {comp_name} must be "
+                            f"When passed as a dictionnary, `future_covariates_lags` for component {comp_name} must be "
                             f"either a strictly positive integer or a list, received : {type(comp_lags)}."
                         ),
                         logger,
                     )
+
+            if min_lags is None:
+                min_lags = components_lags[comp_name][0]
+            else:
+                min_lags = min(min_lags, components_lags[comp_name][0])
+
+            if max_lags is None:
+                max_lags = components_lags[comp_name][-1]
+            else:
+                max_lags = max(max_lags, components_lags[comp_name][-1])
             return components_lags
 
         if isinstance(lags_future_covariates, tuple):

From 2dde70fe8e95255e39a560aef5240ba92ab46ee6 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 13:20:56 +0200
Subject: [PATCH 03/30] fix: improved modularity

---
 darts/models/forecasting/regression_model.py | 157 +++++++++----------
 1 file changed, 74 insertions(+), 83 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 218b203ec8..7d3e1a2b6d 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -210,12 +210,26 @@ def __init__(
                 "`lags_future_covariates` tuple must contain integers, not bool",
             )
 
-        # set lags
+        self._set_lags(
+            lags=lags,
+            lags_past_covariates=lags_past_covariates,
+            lags_future_covariates=lags_future_covariates,
+        )
+
+        self.pred_dim = self.output_chunk_length if self.multi_models else 1
+
+    def _set_lags(
+        self,
+        lags: Optional[LAGS_TYPE],
+        lags_past_covariates: Optional[LAGS_TYPE],
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE],
+    ):
+        """Based on the type of the argument and the nature of the covariates, convert the lags to a list."""
+
         def _check_int_lags(lags: int, lags_name: str) -> List[int]:
             raise_if_not(
                 lags > 0, f"{lags_name} must be strictly positive. Given: {lags}."
             )
-            # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict)
             return list(range(-lags, 0))
 
         def _check_list_lags(lags: list, lags_name: str) -> List[int]:
@@ -226,6 +240,30 @@ def _check_list_lags(lags: list, lags_name: str) -> List[int]:
                 )
             return sorted(lags)
 
+        def _check_tuple_future_lags(
+            lags_future_covariates: Tuple[int, int], lags_name: str
+        ) -> List[int]:
+            raise_if_not(
+                lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
+                f"{lags_name} tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
+            )
+            raise_if(
+                lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
+                f"{lags_name} tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
+                logger,
+            )
+            return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
+
+        def _check_list_future_lags(
+            lags_future_covariates: List[int], lags_name: str
+        ) -> List[int]:
+            for lag in lags_future_covariates:
+                raise_if(
+                    not isinstance(lag, int) or isinstance(lag, bool),
+                    f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
+                )
+            return sorted(lags_future_covariates)
+
         def _check_dict_lags(
             lags: dict, lags_name: str
         ) -> Optional[Tuple[List[int], Dict[str, List[int]]]]:
@@ -236,24 +274,43 @@ def _check_dict_lags(
                 logger,
             )
 
+            invalid_type = False
+            supported_types = ""
             min_lags = None
             max_lags = None
             components_lags = dict()
             # TODO: use component idx instead of component name for robustness?
             for comp_idx, (comp_name, comp_lags) in enumerate(lags.items()):
-                if isinstance(comp_lags, int):
-                    components_lags[comp_name] = _check_int_lags(
-                        comp_lags, f"{lags_name} for component {comp_name}"
-                    )
-                elif isinstance(comp_lags, list):
-                    components_lags[comp_name] = _check_list_lags(
-                        comp_lags, f"{lags_name} for component {comp_name}"
-                    )
+                if lags_name == "lags_future_covariates":
+                    if isinstance(comp_lags, tuple):
+                        components_lags[comp_name] = _check_tuple_future_lags(
+                            comp_lags, f"{lags_name} for component {comp_name}"
+                        )
+                    elif isinstance(comp_lags, list):
+                        components_lags[comp_name] = _check_list_future_lags(
+                            comp_lags, f"{lags_name} for component {comp_name}"
+                        )
+                    else:
+                        invalid_type = True
+                        supported_types = "tuple or a list"
                 else:
+                    if isinstance(comp_lags, int):
+                        components_lags[comp_name] = _check_int_lags(
+                            comp_lags, f"{lags_name} for component {comp_name}"
+                        )
+                    elif isinstance(comp_lags, list):
+                        components_lags[comp_name] = _check_list_lags(
+                            comp_lags, f"{lags_name} for component {comp_name}"
+                        )
+                    else:
+                        invalid_type = True
+                        supported_types = "strictly positive integer or a list"
+
+                if invalid_type:
                     raise_log(
                         ValueError(
                             f"When passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
-                            f"strictly positive integer or a list, received : {type(comp_lags)}."
+                            f"{supported_types}, received : {type(comp_lags)}."
                         ),
                         logger,
                     )
@@ -269,6 +326,7 @@ def _check_dict_lags(
                     max_lags = max(max_lags, components_lags[comp_name][-1])
             return [min_lags, max_lags], components_lags
 
+        # perform the type and sanity checks
         if isinstance(lags, int):
             conv_lags = _check_int_lags(lags, "`lags`")
             if conv_lags:
@@ -301,95 +359,28 @@ def _check_dict_lags(
                 # actual lags
                 self.component_lags["past"] = conv_lags[1]
 
-        def _check_tuple_future_lags(
-            lags_future_covariates: Tuple[int, int], lags_name: str
-        ) -> List[int]:
-            raise_if_not(
-                lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"{lags_name} tuple must contain integers >= 0. Given: {lags_future_covariates}.",
-            )
-            raise_if(
-                lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
-                f"{lags_name} tuple cannot be (0,0).",
-                logger,
-            )
-            return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
-
-        def _check_list_future_lags(
-            lags_future_covariates: List[int], lags_name: str
-        ) -> List[int]:
-            for lag in lags_future_covariates:
-                raise_if(
-                    not isinstance(lag, int) or isinstance(lag, bool),
-                    f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
-                )
-            return sorted(lags_future_covariates)
-
-        def _check_dict_future_lags(
-            lags_future_covariates: Dict[str, Union[Tuple, List]]
-        ):
-            raise_if_not(
-                len(lags) > 0,
-                "When passed as a dictionnary, `lags_future_covariates` must contain at least one key.",
-                logger,
-            )
-            min_lags = None
-            max_lags = None
-            components_lags = dict()
-            # TODO: use component idx instead of component name for robustness?
-            for comp_idx, (comp_name, comp_lags) in enumerate(
-                lags_future_covariates.items()
-            ):
-                if isinstance(comp_lags, tuple):
-                    components_lags[comp_name] = _check_tuple_future_lags(
-                        comp_lags, f"`future_covariates_lags` for {comp_name}"
-                    )
-                elif isinstance(comp_lags, list):
-                    components_lags[comp_name] = _check_list_future_lags(
-                        comp_lags, f"`future_covariates_lags` for {comp_name}"
-                    )
-                else:
-                    raise_log(
-                        ValueError(
-                            f"When passed as a dictionnary, `future_covariates_lags` for component {comp_name} must be "
-                            f"either a strictly positive integer or a list, received : {type(comp_lags)}."
-                        ),
-                        logger,
-                    )
-
-            if min_lags is None:
-                min_lags = components_lags[comp_name][0]
-            else:
-                min_lags = min(min_lags, components_lags[comp_name][0])
-
-            if max_lags is None:
-                max_lags = components_lags[comp_name][-1]
-            else:
-                max_lags = max(max_lags, components_lags[comp_name][-1])
-            return components_lags
-
         if isinstance(lags_future_covariates, tuple):
             conv_lags = _check_tuple_future_lags(
-                lags_future_covariates, "`future_covariates_lags`"
+                lags_future_covariates, "`lags_future_covariates`"
             )
             if conv_lags:
                 self.lags["future"] = conv_lags
         elif isinstance(lags_future_covariates, list):
             conv_lags = _check_list_future_lags(
-                lags_future_covariates, "`future_covariates_lags`"
+                lags_future_covariates, "`lags_future_covariates`"
             )
             if conv_lags:
                 self.lags["future"] = conv_lags
         elif isinstance(lags_future_covariates, dict):
-            conv_lags = _check_dict_future_lags(lags_future_covariates)
+            conv_lags = _check_dict_lags(
+                lags_future_covariates, "`lags_future_covariates`"
+            )
             if conv_lags:
                 # dummy, used to compute the extreme lags
                 self.lags["future"] = conv_lags[0]
                 # actual lags
                 self.component_lags["future"] = conv_lags[1]
 
-        self.pred_dim = self.output_chunk_length if self.multi_models else 1
-
     @property
     def _model_encoder_settings(
         self,

From 65c82a70ea8aab068987d0f8e7b74b68fc990522 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 15:53:06 +0200
Subject: [PATCH 04/30] fix: simplified the logic a bit

---
 darts/models/forecasting/regression_model.py | 48 ++++++++------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 7d3e1a2b6d..5de580173a 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -328,54 +328,46 @@ def _check_dict_lags(
 
         # perform the type and sanity checks
         if isinstance(lags, int):
-            conv_lags = _check_int_lags(lags, "`lags`")
-            if conv_lags:
-                self.lags["target"] = conv_lags
+            self.lags["target"] = _check_int_lags(lags, "`lags`")
         elif isinstance(lags, list):
-            conv_lags = _check_list_lags(lags, "`lags`")
-            if conv_lags:
-                self.lags["target"] = conv_lags
+            self.lags["target"] = _check_list_lags(lags, "`lags`")
         elif isinstance(lags, dict):
             conv_lags = _check_dict_lags(lags, "`lags`")
-            if conv_lags:
+            if conv_lags is not None:
                 # dummy, used to compute the extreme lags
                 self.lags["target"] = conv_lags[0]
                 # actual lags
                 self.component_lags["target"] = conv_lags[1]
 
         if isinstance(lags_past_covariates, int):
-            conv_lags = _check_int_lags(lags_past_covariates, "`lags_past_covariates`")
-            if conv_lags:
-                self.lags["past"] = conv_lags
+            self.lags["past"] = _check_int_lags(
+                lags_past_covariates, "`lags_past_covariates`"
+            )
         elif isinstance(lags_past_covariates, list):
-            conv_lags = _check_list_lags(lags_past_covariates, "`lags_past_covariates`")
-            if conv_lags:
-                self.lags["past"] = conv_lags
+            self.lags["past"] = _check_list_lags(
+                lags_past_covariates, "`lags_past_covariates`"
+            )
         elif isinstance(lags_past_covariates, dict):
             conv_lags = _check_dict_lags(lags_past_covariates, "`lags_past_covariates`")
-            if conv_lags:
+            if conv_lags is not None:
                 # dummy, used to compute the extreme lags
                 self.lags["past"] = conv_lags[0]
                 # actual lags
                 self.component_lags["past"] = conv_lags[1]
 
         if isinstance(lags_future_covariates, tuple):
-            conv_lags = _check_tuple_future_lags(
+            self.lags["future"] = _check_tuple_future_lags(
                 lags_future_covariates, "`lags_future_covariates`"
             )
-            if conv_lags:
-                self.lags["future"] = conv_lags
         elif isinstance(lags_future_covariates, list):
-            conv_lags = _check_list_future_lags(
+            self.lags["future"] = _check_list_future_lags(
                 lags_future_covariates, "`lags_future_covariates`"
             )
-            if conv_lags:
-                self.lags["future"] = conv_lags
         elif isinstance(lags_future_covariates, dict):
             conv_lags = _check_dict_lags(
                 lags_future_covariates, "`lags_future_covariates`"
             )
-            if conv_lags:
+            if conv_lags is not None:
                 # dummy, used to compute the extreme lags
                 self.lags["future"] = conv_lags[0]
                 # actual lags
@@ -420,16 +412,12 @@ def extreme_lags(
         Optional[int],
         Optional[int],
     ]:
-        min_target_lag = self.lags.get("target")[0] if "target" in self.lags else None
+        min_target_lag = self.lags["target"][0] if "target" in self.lags else None
         max_target_lag = self.output_chunk_length - 1
-        min_past_cov_lag = self.lags.get("past")[0] if "past" in self.lags else None
-        max_past_cov_lag = self.lags.get("past")[-1] if "past" in self.lags else None
-        min_future_cov_lag = (
-            self.lags.get("future")[0] if "future" in self.lags else None
-        )
-        max_future_cov_lag = (
-            self.lags.get("future")[-1] if "future" in self.lags else None
-        )
+        min_past_cov_lag = self.lags["past"][0] if "past" in self.lags else None
+        max_past_cov_lag = self.lags["past"][-1] if "past" in self.lags else None
+        min_future_cov_lag = self.lags["future"][0] if "future" in self.lags else None
+        max_future_cov_lag = self.lags["future"][-1] if "future" in self.lags else None
         return (
             min_target_lag,
             max_target_lag,

From 9c5b312815aa77d5ef44ff2c7f30f9365467529d Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 15:53:47 +0200
Subject: [PATCH 05/30] feat: when generating lagged data, the values can be
 extracted using component-specific lags

---
 darts/utils/data/tabularization.py | 39 ++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index 74c1c65ea7..fe825543e1 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -891,7 +891,13 @@ def _create_lagged_data_by_moving_window(
             # Within each window, the `-1` indexed value (i.e. the value at the very end of
             # the window) corresponds to time `t - min_lag_i`. The negative index of the time
             # `t + lag_i` within this window is, therefore, `-1 + lag_i + min_lag_i`:
-            lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
+            if isinstance(lags_i, list):
+                lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
+            else:
+                lags_to_extract = [
+                    np.array(comp_lags, dtype=int) + min_lag_i - 1
+                    for comp_lags in lags_i
+                ]
             lagged_vals = _extract_lagged_vals_from_windows(windows, lags_to_extract)
             X.append(lagged_vals)
         # Cache `start_time_idx` for label creation:
@@ -928,7 +934,8 @@ def _create_lagged_data_by_moving_window(
 
 
 def _extract_lagged_vals_from_windows(
-    windows: np.ndarray, lags_to_extract: Optional[np.ndarray] = None
+    windows: np.ndarray,
+    lags_to_extract: Optional[Union[np.ndarray, List[np.ndarray]]] = None,
 ) -> np.ndarray:
     """
     Helper function called by `_create_lagged_data_by_moving_window` that
@@ -938,19 +945,31 @@ def _extract_lagged_vals_from_windows(
     is done such that the order of elements along axis 1 matches the pattern
     described in the docstring of `create_lagged_data`.
 
-    If `lags_to_extract` is specified, then only those values within each window that
+    If `lags_to_extract` is not specified, all of the values within each window is extracted.
+    If `lags_to_extract` is specified as an np.ndarray, then only those values within each window that
     are indexed by `lags_to_extract` will be returned. In such cases, the shape of the returned
     lagged values is `(num_windows, num_components * lags_to_extract.size, num_series)`. For example,
     if `lags_to_extract = [-2]`, only the second-to-last values within each window will be extracted.
-    If `lags_to_extract` is not specified, all of the values within each window is extracted.
+    If `lags_to_extract` is specified as a list of np.ndarray, the values will be extracted using the
+    lags provided for each component.
     """
     # windows.shape = (num_windows, num_components, num_samples, window_len):
-    if lags_to_extract is not None:
-        windows = windows[:, :, :, lags_to_extract]
-    # windows.shape = (num_windows, window_len, num_components, num_samples):
-    windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3))
-    # lagged_vals.shape = (num_windows, num_components*window_len, num_samples):
-    lagged_vals = windows.reshape((windows.shape[0], -1, windows.shape[-1]))
+    if isinstance(lags_to_extract, list):
+        # iterate over the components-specific lags
+        comp_windows = [
+            windows[:, i, :, comp_lags_to_extract]
+            for i, comp_lags_to_extract in enumerate(lags_to_extract)
+        ]
+        # windows.shape = (sum(lags_len) across components, num_windows, num_samples):
+        windows = np.concatenate(comp_windows, axis=0)
+        lagged_vals = np.moveaxis(windows, (1, 0, 2), (0, 1, 2))
+    else:
+        if lags_to_extract is not None:
+            windows = windows[:, :, :, lags_to_extract]
+        # windows.shape = (num_windows, window_len, num_components, num_samples):
+        windows = np.moveaxis(windows, (0, 3, 1, 2), (0, 1, 2, 3))
+        # lagged_vals.shape = (num_windows, num_components*window_len, num_samples):
+        lagged_vals = windows.reshape((windows.shape[0], -1, windows.shape[-1]))
     return lagged_vals
 
 

From 753db5b97698ca6c88e1410d28b6f73566406680 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 18 Aug 2023 16:22:42 +0200
Subject: [PATCH 06/30] feat: raise error if all the ts in target/past/future
 don't have the same number of components

---
 darts/models/forecasting/forecasting_model.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index 452d2368cd..a848674570 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -2078,6 +2078,20 @@ def fit(
             ):
                 self.static_covariates = series.static_covariates
         else:
+            # check that all the ts within one group have the same number of components
+            for ts_sequence, cov_name in zip(
+                [series, past_covariates, future_covariates],
+                ["series", "past_covariates", "future_covariates"],
+            ):
+                raise_if(
+                    ts_sequence is not None
+                    and not all(
+                        [ts_sequence[0].width == ts.width for ts in ts_sequence]
+                    ),
+                    f"All the series in `{cov_name}` should have the same number of components",
+                    logger,
+                )
+
             if past_covariates is not None:
                 self._expect_past_covariates = True
             if future_covariates is not None:

From 0cdeee7e70aa90b66e647e51e9e3a1433739fda1 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 21 Aug 2023 15:37:01 +0200
Subject: [PATCH 07/30] feat: added support for component-specific lags in
 fit() and predict()

---
 darts/models/forecasting/regression_model.py | 122 +++++++++++++++----
 darts/utils/data/tabularization.py           |  47 +++++--
 2 files changed, 130 insertions(+), 39 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 5de580173a..727f45870f 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -140,7 +140,7 @@ def __init__(
 
         self.model = model
         self.lags: Dict[str, List[int]] = {}
-        self.component_lags: Dict[str, Dict[str, List[int]]] = {}
+        self.component_lags: Dict[str, Dict[str, Sequence[int]]] = {}
         self.input_dim = None
         self.multi_models = multi_models
         self._considers_static_covariates = use_static_covariates
@@ -266,7 +266,7 @@ def _check_list_future_lags(
 
         def _check_dict_lags(
             lags: dict, lags_name: str
-        ) -> Optional[Tuple[List[int], Dict[str, List[int]]]]:
+        ) -> Optional[Tuple[List[int], Dict[str, Sequence[int]]]]:
 
             raise_if_not(
                 len(lags) > 0,
@@ -474,10 +474,10 @@ def _get_last_prediction_time(self, series, forecast_horizon, overlap_end):
     def _create_lagged_data(
         self, target_series, past_covariates, future_covariates, max_samples_per_ts
     ):
-        lags = self.lags.get("target")
-        lags_past_covariates = self.lags.get("past")
-        lags_future_covariates = self.lags.get("future")
-
+        """
+        If lags were specified component-wise manner, they are contained in self.component_lags and the values
+        in self.lags should be ignored.
+        """
         (
             features,
             labels,
@@ -488,9 +488,15 @@ def _create_lagged_data(
             output_chunk_length=self.output_chunk_length,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            lags=lags,
-            lags_past_covariates=lags_past_covariates,
-            lags_future_covariates=lags_future_covariates,
+            lags=self.component_lags["target"]
+            if "target" in self.component_lags
+            else self.lags.get("target"),
+            lags_past_covariates=self.component_lags["past"]
+            if "past" in self.component_lags
+            else self.lags.get("past"),
+            lags_future_covariates=self.component_lags["future"]
+            if "future" in self.component_lags
+            else self.lags.get("future"),
             uses_static_covariates=self.uses_static_covariates,
             last_static_covariates_shape=None,
             max_samples_per_ts=max_samples_per_ts,
@@ -538,9 +544,15 @@ def _fit_model(
             target_series=target_series,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            lags=self.lags.get("target"),
-            lags_past_covariates=self.lags.get("past"),
-            lags_future_covariates=self.lags.get("future"),
+            lags=self.component_lags["target"]
+            if "target" in self.component_lags
+            else self.lags.get("target"),
+            lags_past_covariates=self.component_lags["past"]
+            if "past" in self.component_lags
+            else self.lags.get("past"),
+            lags_future_covariates=self.component_lags["future"]
+            if "future" in self.component_lags
+            else self.lags.get("future"),
             output_chunk_length=self.output_chunk_length,
             concatenate=False,
             use_static_covariates=self.uses_static_covariates,
@@ -663,6 +675,30 @@ def fit(
             future_covariates=seq2series(future_covariates),
         )
 
+        # TODO: if the keys are string, check if they are indeed in the series?
+        # if provided, component-wise lags must be defined for all the components
+        if "target" in self.component_lags:
+            raise_if(
+                len(self.component_lags["target"]) != self.input_dim["target"],
+                f"The training series contain {self.input_dim['target']} components, "
+                f"{len(self.component_lags['target'])} lags were provided. These two values must exactly match.",
+                logger,
+            )
+        if "past" in self.component_lags and "past" in self.input_dim:
+            raise_if(
+                len(self.component_lags["past"]) != self.input_dim["past"],
+                f"The past covariates series contain {self.input_dim['past']} components, "
+                f"{len(self.component_lags['past'])} lags were provided. These two values must exactly match.",
+                logger,
+            )
+        if "future" in self.component_lags and "future" in self.input_dim:
+            raise_if(
+                len(self.component_lags["future"]) != self.input_dim["future"],
+                f"The future covariates series contain {self.input_dim['future']} components, "
+                f"{len(self.component_lags['future'])} lags were provided. These two values must exactly match.",
+                logger,
+            )
+
         self._fit_model(
             series, past_covariates, future_covariates, max_samples_per_ts, **kwargs
         )
@@ -863,23 +899,57 @@ def predict(
                     series_matrix = np.concatenate(
                         [series_matrix, predictions[-1]], axis=1
                     )
-                np_X.append(
-                    series_matrix[
-                        :,
-                        [
-                            lag - (shift + last_step_shift)
-                            for lag in self.lags["target"]
-                        ],
-                    ].reshape(len(series) * num_samples, -1)
-                )
-            # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
-            for cov_type in ["past", "future"]:
-                if cov_type in covariate_matrices:
+                # component-wise lags
+                if "target" in self.component_lags:
+                    tmp_X = [
+                        series_matrix[
+                            :,
+                            [lag - (shift + last_step_shift) for lag in comp_lags],
+                            comp_i,
+                        ]
+                        for comp_i, (comp, comp_lags) in enumerate(
+                            self.component_lags["target"].items()
+                        )
+                    ]
+                    # values are grouped by component
                     np_X.append(
-                        covariate_matrices[cov_type][
-                            :, relative_cov_lags[cov_type] + t_pred
+                        np.concatenate(tmp_X).reshape(len(series) * num_samples, -1)
+                    )
+                else:
+                    # values are grouped by lags
+                    np_X.append(
+                        series_matrix[
+                            :,
+                            [
+                                lag - (shift + last_step_shift)
+                                for lag in self.lags["target"]
+                            ],
                         ].reshape(len(series) * num_samples, -1)
                     )
+            # retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
+            for cov_type in ["past", "future"]:
+                if cov_type in covariate_matrices:
+                    # component-wise lags
+                    if cov_type in self.component_lags:
+                        tmp_X = [
+                            covariate_matrices[cov_type][
+                                :,
+                                np.array(comp_lags) - self.lags[cov_type][0] + t_pred,
+                                comp_i,
+                            ]
+                            for comp_i, (comp, comp_lags) in enumerate(
+                                self.component_lags[cov_type].items()
+                            )
+                        ]
+                        np_X.append(
+                            np.concatenate(tmp_X).reshape(len(series) * num_samples, -1)
+                        )
+                    else:
+                        np_X.append(
+                            covariate_matrices[cov_type][
+                                :, relative_cov_lags[cov_type] + t_pred
+                            ].reshape(len(series) * num_samples, -1)
+                        )
 
             # concatenate retrieved lags
             X = np.concatenate(np_X, axis=1)
diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index fe825543e1..d5249ee95f 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -1,13 +1,15 @@
 import warnings
 from functools import reduce
 from math import inf
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 try:
     from typing import Literal
 except ImportError:
     from typing_extensions import Literal
 
+from itertools import chain
+
 import numpy as np
 import pandas as pd
 from numpy.lib.stride_tricks import as_strided
@@ -329,9 +331,13 @@ def create_lagged_training_data(
     output_chunk_length: int,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Sequence[int]] = None,
-    lags_past_covariates: Optional[Sequence[int]] = None,
-    lags_future_covariates: Optional[Sequence[int]] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
+    lags_past_covariates: Optional[
+        Union[Sequence[int], Dict[str, Sequence[int]]]
+    ] = None,
+    lags_future_covariates: Optional[
+        Union[Sequence[int], Dict[str, Sequence[int]]]
+    ] = None,
     uses_static_covariates: bool = True,
     last_static_covariates_shape: Optional[Tuple[int, int]] = None,
     max_samples_per_ts: Optional[int] = None,
@@ -676,9 +682,13 @@ def create_lagged_component_names(
     target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Sequence[int]] = None,
-    lags_past_covariates: Optional[Sequence[int]] = None,
-    lags_future_covariates: Optional[Sequence[int]] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
+    lags_past_covariates: Optional[
+        Union[Sequence[int], Dict[str, Sequence[int]]]
+    ] = None,
+    lags_future_covariates: Optional[
+        Union[Sequence[int], Dict[str, Sequence[int]]]
+    ] = None,
     output_chunk_length: int = 1,
     concatenate: bool = True,
     use_static_covariates: bool = False,
@@ -743,11 +753,17 @@ def create_lagged_component_names(
             continue
 
         components = get_single_series(variate).components.tolist()
-        lagged_feature_names += [
-            f"{name}_{variate_type}_lag{lag}"
-            for lag in variate_lags
-            for name in components
-        ]
+        if isinstance(variate_lags, dict):
+            for name in components:
+                lagged_feature_names += [
+                    f"{name}_{variate_type}_lag{lag}" for lag in variate_lags[name]
+                ]
+        else:
+            lagged_feature_names += [
+                f"{name}_{variate_type}_lag{lag}"
+                for lag in variate_lags
+                for name in components
+            ]
 
         if variate_type == "target" and lags:
             label_feature_names = [
@@ -894,9 +910,10 @@ def _create_lagged_data_by_moving_window(
             if isinstance(lags_i, list):
                 lags_to_extract = np.array(lags_i, dtype=int) + min_lag_i - 1
             else:
+                # Lags are grouped by component, extracted from the same window
                 lags_to_extract = [
                     np.array(comp_lags, dtype=int) + min_lag_i - 1
-                    for comp_lags in lags_i
+                    for comp_lags in lags_i.values()
                 ]
             lagged_vals = _extract_lagged_vals_from_windows(windows, lags_to_extract)
             X.append(lagged_vals)
@@ -1262,6 +1279,10 @@ def _get_feature_times(
         [target_series, past_covariates, future_covariates],
         [lags, lags_past_covariates, lags_future_covariates],
     ):
+        # TODO: information is available in model.lags, not sure how to make the info get here
+        if isinstance(lags_i, dict):
+            lags_i = list(set(chain(*lags_i.values())))
+
         if check_inputs and (series_i is not None):
             _check_series_length(
                 series_i,

From f24ea84756923cebfcc0d54c948e3cc6e70d3565 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 21 Aug 2023 16:53:42 +0200
Subject: [PATCH 08/30] test: added tests and fix some bug accordingly

---
 darts/models/forecasting/regression_model.py  | 40 ++++----
 .../forecasting/test_regression_models.py     | 98 ++++++++++++++++++-
 2 files changed, 117 insertions(+), 21 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 727f45870f..b0cdcecc11 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -228,7 +228,7 @@ def _set_lags(
 
         def _check_int_lags(lags: int, lags_name: str) -> List[int]:
             raise_if_not(
-                lags > 0, f"{lags_name} must be strictly positive. Given: {lags}."
+                lags > 0, f"`{lags_name}` must be strictly positive. Given: {lags}."
             )
             return list(range(-lags, 0))
 
@@ -236,7 +236,7 @@ def _check_list_lags(lags: list, lags_name: str) -> List[int]:
             for lag in lags:
                 raise_if(
                     not isinstance(lag, int) or (lag >= 0),
-                    f"Every element of {lags_name} must be a strictly negative integer. Given: {lags}.",
+                    f"Every element of `{lags_name}` must be a strictly negative integer. Given: {lags}.",
                 )
             return sorted(lags)
 
@@ -245,11 +245,11 @@ def _check_tuple_future_lags(
         ) -> List[int]:
             raise_if_not(
                 lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"{lags_name} tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
+                f"`{lags_name}` tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
             )
             raise_if(
                 lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
-                f"{lags_name} tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
+                f"`{lags_name}` tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
                 logger,
             )
             return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
@@ -260,7 +260,7 @@ def _check_list_future_lags(
             for lag in lags_future_covariates:
                 raise_if(
                     not isinstance(lag, int) or isinstance(lag, bool),
-                    f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
+                    f"Every element of `{lags_name}` must be an integer. Given: {lags_future_covariates}.",
                 )
             return sorted(lags_future_covariates)
 
@@ -270,7 +270,7 @@ def _check_dict_lags(
 
             raise_if_not(
                 len(lags) > 0,
-                f"When passed as a dictionnary, {lags_name} must contain at least one key.",
+                f"When passed as a dictionnary, `{lags_name}` must contain at least one key.",
                 logger,
             )
 
@@ -284,11 +284,11 @@ def _check_dict_lags(
                 if lags_name == "lags_future_covariates":
                     if isinstance(comp_lags, tuple):
                         components_lags[comp_name] = _check_tuple_future_lags(
-                            comp_lags, f"{lags_name} for component {comp_name}"
+                            comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     elif isinstance(comp_lags, list):
                         components_lags[comp_name] = _check_list_future_lags(
-                            comp_lags, f"{lags_name} for component {comp_name}"
+                            comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     else:
                         invalid_type = True
@@ -296,11 +296,11 @@ def _check_dict_lags(
                 else:
                     if isinstance(comp_lags, int):
                         components_lags[comp_name] = _check_int_lags(
-                            comp_lags, f"{lags_name} for component {comp_name}"
+                            comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     elif isinstance(comp_lags, list):
                         components_lags[comp_name] = _check_list_lags(
-                            comp_lags, f"{lags_name} for component {comp_name}"
+                            comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     else:
                         invalid_type = True
@@ -309,7 +309,7 @@ def _check_dict_lags(
                 if invalid_type:
                     raise_log(
                         ValueError(
-                            f"When passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
+                            f"When passed as a dictionnary, `{lags_name}` for component {comp_name} must be either a "
                             f"{supported_types}, received : {type(comp_lags)}."
                         ),
                         logger,
@@ -328,11 +328,11 @@ def _check_dict_lags(
 
         # perform the type and sanity checks
         if isinstance(lags, int):
-            self.lags["target"] = _check_int_lags(lags, "`lags`")
+            self.lags["target"] = _check_int_lags(lags, "lags")
         elif isinstance(lags, list):
-            self.lags["target"] = _check_list_lags(lags, "`lags`")
+            self.lags["target"] = _check_list_lags(lags, "lags")
         elif isinstance(lags, dict):
-            conv_lags = _check_dict_lags(lags, "`lags`")
+            conv_lags = _check_dict_lags(lags, "lags")
             if conv_lags is not None:
                 # dummy, used to compute the extreme lags
                 self.lags["target"] = conv_lags[0]
@@ -341,14 +341,14 @@ def _check_dict_lags(
 
         if isinstance(lags_past_covariates, int):
             self.lags["past"] = _check_int_lags(
-                lags_past_covariates, "`lags_past_covariates`"
+                lags_past_covariates, "lags_past_covariates"
             )
         elif isinstance(lags_past_covariates, list):
             self.lags["past"] = _check_list_lags(
-                lags_past_covariates, "`lags_past_covariates`"
+                lags_past_covariates, "lags_past_covariates"
             )
         elif isinstance(lags_past_covariates, dict):
-            conv_lags = _check_dict_lags(lags_past_covariates, "`lags_past_covariates`")
+            conv_lags = _check_dict_lags(lags_past_covariates, "lags_past_covariates")
             if conv_lags is not None:
                 # dummy, used to compute the extreme lags
                 self.lags["past"] = conv_lags[0]
@@ -357,15 +357,15 @@ def _check_dict_lags(
 
         if isinstance(lags_future_covariates, tuple):
             self.lags["future"] = _check_tuple_future_lags(
-                lags_future_covariates, "`lags_future_covariates`"
+                lags_future_covariates, "lags_future_covariates"
             )
         elif isinstance(lags_future_covariates, list):
             self.lags["future"] = _check_list_future_lags(
-                lags_future_covariates, "`lags_future_covariates`"
+                lags_future_covariates, "lags_future_covariates"
             )
         elif isinstance(lags_future_covariates, dict):
             conv_lags = _check_dict_lags(
-                lags_future_covariates, "`lags_future_covariates`"
+                lags_future_covariates, "lags_future_covariates"
             )
             if conv_lags is not None:
                 # dummy, used to compute the extreme lags
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 1fecded0f4..78c89c0167 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -420,7 +420,9 @@ def test_model_construction(self, config):
         # testing lags_past_covariates
         model_instance = model(lags=None, lags_past_covariates=3, multi_models=mode)
         assert model_instance.lags.get("past") == [-3, -2, -1]
-        # testing lags_future covariates
+        # lags_future covariates does not support SINGLE INT
+
+        # TESTING TUPLE of int, only supported by lags_future_covariates
         model_instance = model(
             lags=None, lags_future_covariates=(3, 5), multi_models=mode
         )
@@ -435,6 +437,25 @@ def test_model_construction(self, config):
         model_instance = model(lags_past_covariates=values, multi_models=mode)
         assert model_instance.lags.get("past") == values
         # testing lags_future_covariates
+        values = [-5, -1, 5]
+        model_instance = model(lags_future_covariates=values, multi_models=mode)
+        assert model_instance.lags.get("future") == values
+
+        # TESTING DICT, lags are specified component-wise
+        # model.lags contains the extreme across the components
+        values = {"comp0": [-4, -2], "comp1": [-5, -3]}
+        model_instance = model(lags=values, multi_models=mode)
+        assert model_instance.lags.get("target") == [-5, -2]
+        assert model_instance.component_lags.get("target") == values
+        # testing lags_past_covariates
+        model_instance = model(lags_past_covariates=values, multi_models=mode)
+        assert model_instance.lags.get("past") == [-5, -2]
+        assert model_instance.component_lags.get("past") == values
+        # testing lags_future_covariates
+        values = {"comp0": [-4, 2], "comp1": [-5, 3]}
+        model_instance = model(lags_future_covariates=values, multi_models=mode)
+        assert model_instance.lags.get("future") == [-5, 3]
+        assert model_instance.component_lags.get("future") == values
 
         with pytest.raises(ValueError):
             model(multi_models=mode)
@@ -464,6 +485,10 @@ def test_model_construction(self, config):
             model(lags=5, lags_future_covariates=(1, True), multi_models=mode)
         with pytest.raises(ValueError):
             model(lags=5, lags_future_covariates=(1, 1.0), multi_models=mode)
+        with pytest.raises(ValueError):
+            model(lags=5, lags_future_covariates={}, multi_models=mode)
+        with pytest.raises(ValueError):
+            model(lags=None, lags_future_covariates={}, multi_models=mode)
 
     @pytest.mark.parametrize("mode", [True, False])
     def test_training_data_creation(self, mode):
@@ -1519,6 +1544,77 @@ def test_integer_indexed_series(self, mode):
         # the time axis returned by the second model should be as expected
         assert all(preds[1].time_index == pd.RangeIndex(start=50, stop=70, step=2))
 
+    @pytest.mark.parametrize(
+        "config",
+        [
+            ({"lags": [-3, -2, -1]}, {"lags": {"gaussian": 3}}),
+            ({"lags": 3}, {"lags": {"gaussian": 3, "sine": 3}}),
+            ({"lags_past_covariates": 2}, {"lags_past_covariates": {"lin_past": 2}}),
+            (
+                {"lags": 5, "lags_future_covariates": [-2, 3]},
+                {
+                    "lags": {
+                        "gaussian": [-5, -4, -3, -2, -1],
+                        "sine": [-5, -4, -3, -2, -1],
+                    },
+                    "lags_future_covariates": {
+                        "lin_future": [-2, 3],
+                        "sine_future": [-2, 3],
+                    },
+                },
+            ),
+        ],
+    )
+    def test_component_specific_lags(self, config):
+        """Verify that the same lags, defined using int/list or dictionnaries yield the same results"""
+        list_lags, dict_lags = config
+        multivar_target = "lags" in dict_lags and len(dict_lags["lags"]) > 1
+        multivar_future_cov = (
+            "lags_future_covariates" in dict_lags
+            and len(dict_lags["lags_future_covariates"]) > 1
+        )
+
+        # create series based on the model parameters
+        series = tg.gaussian_timeseries(length=20, column_name="gaussian")
+        if multivar_target:
+            series = series.stack(tg.sine_timeseries(length=20, column_name="sine"))
+
+        future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
+        if multivar_future_cov:
+            future_cov = future_cov.stack(
+                tg.sine_timeseries(length=30, column_name="sine_future")
+            )
+
+        past_cov = tg.linear_timeseries(length=30, column_name="lin_past")
+
+        # the lags are identical across the components for each series
+        model = LinearRegressionModel(**list_lags)
+        model.fit(
+            series=series,
+            past_covariates=past_cov if model.supports_past_covariates else None,
+            future_covariates=future_cov if model.supports_future_covariates else None,
+        )
+
+        # the lags are specified for each component, individually
+        model2 = LinearRegressionModel(**dict_lags)
+        model2.fit(
+            series=series,
+            past_covariates=past_cov if model2.supports_past_covariates else None,
+            future_covariates=future_cov if model2.supports_future_covariates else None,
+        )
+
+        # n == output_chunk_length
+        pred = model.predict(1)
+        pred2 = model2.predict(1)
+        np.testing.assert_array_almost_equal(pred.values(), pred2.values())
+        assert pred.time_index.equals(pred2.time_index)
+
+        # n > output_chunk_length
+        pred = model.predict(3)
+        pred2 = model2.predict(3)
+        np.testing.assert_array_almost_equal(pred.values(), pred2.values())
+        assert pred.time_index.equals(pred2.time_index)
+
     @pytest.mark.parametrize(
         "config",
         itertools.product(

From 01b8409b024b83720b3401bc93bbc448a9f014a1 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 21 Aug 2023 17:35:48 +0200
Subject: [PATCH 09/30] feat: component-wise lags support encoders, improved
 sanity checks

---
 darts/models/forecasting/regression_model.py | 50 +++++++++++---------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index b0cdcecc11..c5abb800fb 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -675,29 +675,33 @@ def fit(
             future_covariates=seq2series(future_covariates),
         )
 
-        # TODO: if the keys are string, check if they are indeed in the series?
-        # if provided, component-wise lags must be defined for all the components
-        if "target" in self.component_lags:
-            raise_if(
-                len(self.component_lags["target"]) != self.input_dim["target"],
-                f"The training series contain {self.input_dim['target']} components, "
-                f"{len(self.component_lags['target'])} lags were provided. These two values must exactly match.",
-                logger,
-            )
-        if "past" in self.component_lags and "past" in self.input_dim:
-            raise_if(
-                len(self.component_lags["past"]) != self.input_dim["past"],
-                f"The past covariates series contain {self.input_dim['past']} components, "
-                f"{len(self.component_lags['past'])} lags were provided. These two values must exactly match.",
-                logger,
-            )
-        if "future" in self.component_lags and "future" in self.input_dim:
-            raise_if(
-                len(self.component_lags["future"]) != self.input_dim["future"],
-                f"The future covariates series contain {self.input_dim['future']} components, "
-                f"{len(self.component_lags['future'])} lags were provided. These two values must exactly match.",
-                logger,
-            )
+        # if provided, component-wise lags must be defined for all the components of the first series
+        for variate_type, variate in zip(
+            ["target", "past", "future"], [series, past_covariates, future_covariates]
+        ):
+            if variate_type in self.component_lags:
+                provided_components = set(self.component_lags[variate_type].keys())
+                required_components = set(variate[0].components)
+                # lags were specified for unrecognized components
+                wrong_components = list(provided_components - required_components)
+                if len(wrong_components) > 0:
+                    logger.warning(
+                        f"Lags of components not present in the series ({wrong_components}) were ignored."
+                    )
+
+                missing_keys = list(required_components - provided_components)
+                raise_if(
+                    len(missing_keys) > 0,
+                    f"The {variate_type} series contains {self.input_dim[variate_type]} components, lags were "
+                    f"provided for {len(self.component_lags[variate_type])} of them. The lags for the "
+                    f"following components must be provided: {missing_keys}.",
+                    logger,
+                )
+                # reorder the components based on the input series
+                self.component_lags[variate_type] = {
+                    comp_name: self.component_lags[variate_type][comp_name]
+                    for comp_name in variate[0].components
+                }
 
         self._fit_model(
             series, past_covariates, future_covariates, max_samples_per_ts, **kwargs

From a671af875d47834de7856b218b782af52a73b37d Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Wed, 23 Aug 2023 09:26:45 +0200
Subject: [PATCH 10/30] feat: possibility to declare default lags for all the
 not specified components, updated changelog

---
 CHANGELOG.md                                  |  1 +
 darts/models/forecasting/regression_model.py  | 46 +++++++++++++------
 .../forecasting/test_regression_models.py     | 13 ++++++
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7cdc9db33..046e99e8b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 **Improved**
 - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
 - Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` that allows to use an integer `start` either as the index position or index value/label for `series` indexed with a `pd.RangeIndex`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
+- `RegressionModel` can now be created with different lags for each component of the target and past/future covariates series. [#1962](https://github.com/unit8co/darts/pull/1962) by [Antoine Madrona](https://github.com/madtoinou).
 
 **Fixed**
 - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index c5abb800fb..5a8ad00b48 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -84,20 +84,31 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise, a list of integers with lags (each lag must be < 0).
-            In order to specify component-wise lags, a dictionnary with the component name or index as key and the
-            lags value can be provided. The number of keys in the dictionnary must match the number of components in
-            the series.
+            Lagged target values used to predict the next time step.
+            If an integer is given the last `lags` past lags are used (from -1 backward).
+            If a list of integers, each lag must be < 0.
+            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
+            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
+            provide fallback lags values for un-specified components. An error will be raised if some components are
+            missing and the 'default_lags' key is not present in the dictionnary.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
-            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
-            with lags < 0 is required.
+            Number of lagged past_covariates values used to predict the next time step.
+            If an integer is given the last `lags_past_covariates` past lags are used (inclusive, starting from lag -1).
+            If a list of integers, each lag must be < 0.
+            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
+            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
+            provide fallback lags values for un-specified components. An error will be raised if some components are
+            missing and the 'default_lags' key is not present in the dictionnary.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step. If a tuple (past, future) is
-            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
-            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
-            of integers with lags is required.
+            Number of lagged future_covariates values used to predict the next time step.
+            If a tuple (past, future) is given the last `past` lags in the past are used (inclusive, starting from
+            lag -1) along with the first `future` future lags (starting from 0 - the prediction time - up to
+            `future - 1` included).
+            If a list of integer, the values will be used as is.
+            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
+            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
+            provide fallback lags values for un-specified components. An error will be raised if some components are
+            missing and the 'default_lags' key is not present in the dictionnary.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
@@ -680,10 +691,13 @@ def fit(
             ["target", "past", "future"], [series, past_covariates, future_covariates]
         ):
             if variate_type in self.component_lags:
+                # ignore the fallback lags entry
                 provided_components = set(self.component_lags[variate_type].keys())
                 required_components = set(variate[0].components)
                 # lags were specified for unrecognized components
-                wrong_components = list(provided_components - required_components)
+                wrong_components = list(
+                    provided_components - {"default_lags"} - required_components
+                )
                 if len(wrong_components) > 0:
                     logger.warning(
                         f"Lags of components not present in the series ({wrong_components}) were ignored."
@@ -691,15 +705,17 @@ def fit(
 
                 missing_keys = list(required_components - provided_components)
                 raise_if(
-                    len(missing_keys) > 0,
+                    len(missing_keys) > 0 and "default_lags" not in provided_components,
                     f"The {variate_type} series contains {self.input_dim[variate_type]} components, lags were "
                     f"provided for {len(self.component_lags[variate_type])} of them. The lags for the "
                     f"following components must be provided: {missing_keys}.",
                     logger,
                 )
-                # reorder the components based on the input series
+                # reorder the components based on the input series, insert the default when necessary
                 self.component_lags[variate_type] = {
                     comp_name: self.component_lags[variate_type][comp_name]
+                    if comp_name in self.component_lags[variate_type]
+                    else self.component_lags[variate_type]["default_lags"]
                     for comp_name in variate[0].components
                 }
 
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 78c89c0167..86d02c85c5 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1563,6 +1563,19 @@ def test_integer_indexed_series(self, mode):
                     },
                 },
             ),
+            (
+                {"lags": 5, "lags_future_covariates": [-2, 3]},
+                {
+                    "lags": {
+                        "gaussian": [-5, -4, -3, -2, -1],
+                        "sine": [-5, -4, -3, -2, -1],
+                    },
+                    "lags_future_covariates": {
+                        "sine_future": [-2, 3],
+                        "default_lags": [-2, 3],
+                    },
+                },
+            ),
         ],
     )
     def test_component_specific_lags(self, config):

From 2aa96a4fa69e7d0f96da08151d18e7602ab7c965 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Wed, 23 Aug 2023 10:54:01 +0200
Subject: [PATCH 11/30] test: adding a test for the lagged data creation

---
 darts/models/forecasting/regression_model.py  |  2 +-
 .../forecasting/test_regression_models.py     | 52 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 5a8ad00b48..444c9c7859 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -256,7 +256,7 @@ def _check_tuple_future_lags(
         ) -> List[int]:
             raise_if_not(
                 lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"`{lags_name}` tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
+                f"`{lags_name}` tuple must contain stricly positive integers. Given: {lags_future_covariates}.",
             )
             raise_if(
                 lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 86d02c85c5..51d1256b00 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -492,7 +492,8 @@ def test_model_construction(self, config):
 
     @pytest.mark.parametrize("mode", [True, False])
     def test_training_data_creation(self, mode):
-        # testing _get_training_data function
+        """testing _get_training_data function"""
+        # lags defined using lists of integers
         model_instance = RegressionModel(
             lags=self.lags_1["target"],
             lags_past_covariates=self.lags_1["past"],
@@ -541,6 +542,55 @@ def test_training_data_creation(self, mode):
         ]
         assert list(training_labels[0]) == [82, 182, 282]
 
+        # lags defined using dictionnaries
+        # cannot use 'default_lags' because it's converted in `fit()`, before calling `_created_lagged_data`
+        model_instance = RegressionModel(
+            lags={"0-trgt-0": [-5, -4], "0-trgt-1": [-3, -2], "0-trgt-2": [-2, -1]},
+            lags_past_covariates={"0-pcov-0": [-10], "0-pvoc-1": [-7]},
+            lags_future_covariates={"0-fcov-0": (2, 2)},
+            multi_models=mode,
+        )
+
+        max_samples_per_ts = 3
+
+        # using only one series of each
+        training_samples, training_labels = model_instance._create_lagged_data(
+            target_series=self.target_series[0],
+            past_covariates=self.past_covariates[0],
+            future_covariates=self.future_covariates[0],
+            max_samples_per_ts=max_samples_per_ts,
+        )
+
+        # checking number of dimensions
+        assert len(training_samples.shape) == 2  # samples, features
+        assert len(training_labels.shape) == 2  # samples, components (multivariate)
+        assert training_samples.shape[0] == training_labels.shape[0]
+        assert training_samples.shape[0] == max_samples_per_ts
+        assert (
+            training_samples.shape[1]
+            == 6  # [-4, -3], [-3, -2], [-2, -1]
+            + 2  # [-10], [-7]
+            + 4  # [-2, -1, 0, 1]
+        )
+
+        # check last sample
+        assert list(training_labels[0]) == [97, 197, 297]
+        # lags are grouped by components instead of lags
+        assert list(training_samples[0, :]) == [
+            92,
+            93,
+            194,
+            195,
+            295,
+            296,  # comp_i = comp_0 + i*100
+            10087,
+            10190,  # past cov; target + 10'000
+            20095,
+            20096,
+            20097,
+            20098,  # future cov; target + 20'000
+        ]
+
     @pytest.mark.parametrize("mode", [True, False])
     def test_prediction_data_creation(self, mode):
         # assigning correct names to variables

From c3133b27eab92b71a421fa55b0fbf1a8e30d8d5f Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Wed, 23 Aug 2023 10:55:09 +0200
Subject: [PATCH 12/30] fix: typo

---
 darts/tests/models/forecasting/test_regression_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 51d1256b00..eabee4447d 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -545,7 +545,7 @@ def test_training_data_creation(self, mode):
         # lags defined using dictionnaries
         # cannot use 'default_lags' because it's converted in `fit()`, before calling `_created_lagged_data`
         model_instance = RegressionModel(
-            lags={"0-trgt-0": [-5, -4], "0-trgt-1": [-3, -2], "0-trgt-2": [-2, -1]},
+            lags={"0-trgt-0": [-4, -3], "0-trgt-1": [-3, -2], "0-trgt-2": [-2, -1]},
             lags_past_covariates={"0-pcov-0": [-10], "0-pvoc-1": [-7]},
             lags_future_covariates={"0-fcov-0": (2, 2)},
             multi_models=mode,
@@ -577,8 +577,8 @@ def test_training_data_creation(self, mode):
         assert list(training_labels[0]) == [97, 197, 297]
         # lags are grouped by components instead of lags
         assert list(training_samples[0, :]) == [
-            92,
             93,
+            94,
             194,
             195,
             295,

From 646b6716bcc8e9309e0ec70755959ad77fe38dd1 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 25 Aug 2023 16:48:13 +0200
Subject: [PATCH 13/30] fix: adressing review comments

---
 darts/models/forecasting/regression_model.py  | 284 ++++++++++--------
 .../forecasting/test_regression_models.py     |   2 +-
 darts/utils/data/tabularization.py            |  10 +-
 3 files changed, 162 insertions(+), 134 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 444c9c7859..c55d7acca3 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -151,7 +151,7 @@ def __init__(
 
         self.model = model
         self.lags: Dict[str, List[int]] = {}
-        self.component_lags: Dict[str, Dict[str, Sequence[int]]] = {}
+        self.component_lags: Dict[str, Dict[str, List[int]]] = {}
         self.input_dim = None
         self.multi_models = multi_models
         self._considers_static_covariates = use_static_covariates
@@ -187,63 +187,44 @@ def __init__(
             "At least one of `lags`, `lags_future_covariates` or `lags_past_covariates` must be not None.",
         )
 
-        lags_type_checks = [
-            (lags, "lags"),
-            (lags_past_covariates, "lags_past_covariates"),
-        ]
-
-        for _lags, lags_name in lags_type_checks:
-            raise_if_not(
-                isinstance(_lags, (int, list, dict)) or _lags is None,
-                f"`{lags_name}` must be of type int, list or dict. Given: {type(_lags)}.",
-            )
-            raise_if(
-                isinstance(_lags, bool),
-                f"`{lags_name}` must be of type int, list or dict, not bool.",
-            )
-
-        raise_if_not(
-            isinstance(lags_future_covariates, (tuple, list, dict))
-            or lags_future_covariates is None,
-            f"`lags_future_covariates` must be of type tuple, list or dict. Given: {type(lags_future_covariates)}.",
-        )
-
-        if isinstance(lags_future_covariates, tuple):
-            raise_if_not(
-                len(lags_future_covariates) == 2
-                and isinstance(lags_future_covariates[0], int)
-                and isinstance(lags_future_covariates[1], int),
-                "`lags_future_covariates` tuple must be of length 2, and must contain two integers",
-            )
-            raise_if(
-                isinstance(lags_future_covariates[0], bool)
-                or isinstance(lags_future_covariates[1], bool),
-                "`lags_future_covariates` tuple must contain integers, not bool",
-            )
-
-        self._set_lags(
+        # convert lags arguments to list of int
+        processed_lags, processed_component_lags = self._generate_lags(
             lags=lags,
             lags_past_covariates=lags_past_covariates,
             lags_future_covariates=lags_future_covariates,
         )
 
+        self.lags = processed_lags
+        self.component_lags = processed_component_lags
+
         self.pred_dim = self.output_chunk_length if self.multi_models else 1
 
-    def _set_lags(
+    def _generate_lags(
         self,
         lags: Optional[LAGS_TYPE],
         lags_past_covariates: Optional[LAGS_TYPE],
         lags_future_covariates: Optional[FUTURE_LAGS_TYPE],
-    ):
-        """Based on the type of the argument and the nature of the covariates, convert the lags to a list."""
+    ) -> Tuple[Dict[str, List[int]], Dict[str, Dict[str, List[int]]]]:
+        """
+        Based on the type of the argument and the nature of the covariates, perform some sanity checks before
+        converting the lags to a list of integer.
+
+        If lags are provided as a dictionary, the lags values are contained in self.component_lags and the self.lags
+        attributes contain only the extreme values
+        If the lags are provided as integer, list, tuple or dictionary containing only the 'default_lags' keys, the lags
+        values are contained in the self.lags attribute and the self.component_lags is an empty dictionary.
+
+        `lags` and `lags_past_covariates` are processed using the same local functions,
+        `lags_future_covariates` is processed with different local functions
+        """
 
-        def _check_int_lags(lags: int, lags_name: str) -> List[int]:
+        def _process_int_lags(lags: int, lags_name: str) -> List[int]:
             raise_if_not(
                 lags > 0, f"`{lags_name}` must be strictly positive. Given: {lags}."
             )
             return list(range(-lags, 0))
 
-        def _check_list_lags(lags: list, lags_name: str) -> List[int]:
+        def _process_list_lags(lags: list, lags_name: str) -> List[int]:
             for lag in lags:
                 raise_if(
                     not isinstance(lag, int) or (lag >= 0),
@@ -251,12 +232,27 @@ def _check_list_lags(lags: list, lags_name: str) -> List[int]:
                 )
             return sorted(lags)
 
-        def _check_tuple_future_lags(
+        def _process_tuple_future_lags(
             lags_future_covariates: Tuple[int, int], lags_name: str
         ) -> List[int]:
+            raise_if_not(
+                len(lags_future_covariates) == 2
+                and isinstance(lags_future_covariates[0], int)
+                and isinstance(lags_future_covariates[1], int),
+                f"`{lags_name}` tuple must be of length 2, and must contain two integers",
+                logger,
+            )
+
+            raise_if(
+                isinstance(lags_future_covariates[0], bool)
+                or isinstance(lags_future_covariates[1], bool),
+                f"`{lags_name}` tuple must contain integers, not bool",
+                logger,
+            )
+
             raise_if_not(
                 lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"`{lags_name}` tuple must contain stricly positive integers. Given: {lags_future_covariates}.",
+                f"`{lags_name}` tuple must contain positive integers. Given: {lags_future_covariates}.",
             )
             raise_if(
                 lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
@@ -265,7 +261,7 @@ def _check_tuple_future_lags(
             )
             return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
 
-        def _check_list_future_lags(
+        def _process_list_future_lags(
             lags_future_covariates: List[int], lags_name: str
         ) -> List[int]:
             for lag in lags_future_covariates:
@@ -275,13 +271,13 @@ def _check_list_future_lags(
                 )
             return sorted(lags_future_covariates)
 
-        def _check_dict_lags(
+        def _process_dict_lags(
             lags: dict, lags_name: str
-        ) -> Optional[Tuple[List[int], Dict[str, Sequence[int]]]]:
+        ) -> Tuple[List[int], Dict[str, List[int]]]:
 
             raise_if_not(
                 len(lags) > 0,
-                f"When passed as a dictionnary, `{lags_name}` must contain at least one key.",
+                f"When passed as a dictionary, `{lags_name}` must contain at least one key.",
                 logger,
             )
 
@@ -289,16 +285,15 @@ def _check_dict_lags(
             supported_types = ""
             min_lags = None
             max_lags = None
-            components_lags = dict()
-            # TODO: use component idx instead of component name for robustness?
-            for comp_idx, (comp_name, comp_lags) in enumerate(lags.items()):
+            components_lags: Dict[str, List[int]] = dict()
+            for comp_name, comp_lags in lags.items():
                 if lags_name == "lags_future_covariates":
                     if isinstance(comp_lags, tuple):
-                        components_lags[comp_name] = _check_tuple_future_lags(
+                        components_lags[comp_name] = _process_tuple_future_lags(
                             comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     elif isinstance(comp_lags, list):
-                        components_lags[comp_name] = _check_list_future_lags(
+                        components_lags[comp_name] = _process_list_future_lags(
                             comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     else:
@@ -306,11 +301,11 @@ def _check_dict_lags(
                         supported_types = "tuple or a list"
                 else:
                     if isinstance(comp_lags, int):
-                        components_lags[comp_name] = _check_int_lags(
+                        components_lags[comp_name] = _process_int_lags(
                             comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     elif isinstance(comp_lags, list):
-                        components_lags[comp_name] = _check_list_lags(
+                        components_lags[comp_name] = _process_list_lags(
                             comp_lags, f"`{lags_name}` for component {comp_name}"
                         )
                     else:
@@ -320,7 +315,7 @@ def _check_dict_lags(
                 if invalid_type:
                     raise_log(
                         ValueError(
-                            f"When passed as a dictionnary, `{lags_name}` for component {comp_name} must be either a "
+                            f"When passed as a dictionary, `{lags_name}` for component {comp_name} must be either a "
                             f"{supported_types}, received : {type(comp_lags)}."
                         ),
                         logger,
@@ -335,54 +330,94 @@ def _check_dict_lags(
                     max_lags = components_lags[comp_name][-1]
                 else:
                     max_lags = max(max_lags, components_lags[comp_name][-1])
-            return [min_lags, max_lags], components_lags
+
+            # revert to lags shared across components logic
+            if list(components_lags.keys()) == ["default_lags"]:
+                return components_lags["default_lags"], {}
+            else:
+                return [min_lags, max_lags], components_lags
 
         # perform the type and sanity checks
-        if isinstance(lags, int):
-            self.lags["target"] = _check_int_lags(lags, "lags")
+        lags_type_error_msg = []
+        processed_lags: Dict[str, List[int]] = dict()
+        processed_component_lags: Dict[str, Dict[str, List[int]]] = dict()
+        if lags is None:
+            pass
+        elif isinstance(lags, int):
+            processed_lags["target"] = _process_int_lags(lags, "lags")
         elif isinstance(lags, list):
-            self.lags["target"] = _check_list_lags(lags, "lags")
+            processed_lags["target"] = _process_list_lags(lags, "lags")
         elif isinstance(lags, dict):
-            conv_lags = _check_dict_lags(lags, "lags")
-            if conv_lags is not None:
-                # dummy, used to compute the extreme lags
-                self.lags["target"] = conv_lags[0]
-                # actual lags
-                self.component_lags["target"] = conv_lags[1]
-
-        if isinstance(lags_past_covariates, int):
-            self.lags["past"] = _check_int_lags(
+            conv_lags = _process_dict_lags(lags, "lags")
+            # dummy, used to compute the extreme lags
+            processed_lags["target"] = conv_lags[0]
+            # actual lags
+            processed_component_lags["target"] = conv_lags[1]
+        else:
+            lags_type_error_msg.append(
+                f"`lags` must be of type int, list or dict." f"Given: {type(lags)}."
+            )
+
+        if lags_past_covariates is None:
+            pass
+        elif isinstance(lags_past_covariates, int):
+            processed_lags["past"] = _process_int_lags(
                 lags_past_covariates, "lags_past_covariates"
             )
         elif isinstance(lags_past_covariates, list):
-            self.lags["past"] = _check_list_lags(
+            processed_lags["past"] = _process_list_lags(
                 lags_past_covariates, "lags_past_covariates"
             )
         elif isinstance(lags_past_covariates, dict):
-            conv_lags = _check_dict_lags(lags_past_covariates, "lags_past_covariates")
-            if conv_lags is not None:
-                # dummy, used to compute the extreme lags
-                self.lags["past"] = conv_lags[0]
-                # actual lags
-                self.component_lags["past"] = conv_lags[1]
-
-        if isinstance(lags_future_covariates, tuple):
-            self.lags["future"] = _check_tuple_future_lags(
+            conv_lags = _process_dict_lags(lags_past_covariates, "lags_past_covariates")
+            # dummy, used to compute the extreme lags
+            processed_lags["past"] = conv_lags[0]
+            # actual lags
+            processed_component_lags["past"] = conv_lags[1]
+        else:
+            lags_type_error_msg.append(
+                f"`lags_past_covariates` must be of type int, list or dict."
+                f"Given: {type(lags_past_covariates)}."
+            )
+
+        if lags_future_covariates is None:
+            pass
+        elif isinstance(lags_future_covariates, tuple):
+            processed_lags["future"] = _process_tuple_future_lags(
                 lags_future_covariates, "lags_future_covariates"
             )
         elif isinstance(lags_future_covariates, list):
-            self.lags["future"] = _check_list_future_lags(
+            processed_lags["future"] = _process_list_future_lags(
                 lags_future_covariates, "lags_future_covariates"
             )
         elif isinstance(lags_future_covariates, dict):
-            conv_lags = _check_dict_lags(
+            conv_lags = _process_dict_lags(
                 lags_future_covariates, "lags_future_covariates"
             )
-            if conv_lags is not None:
-                # dummy, used to compute the extreme lags
-                self.lags["future"] = conv_lags[0]
-                # actual lags
-                self.component_lags["future"] = conv_lags[1]
+            # dummy, used to compute the extreme lags
+            processed_lags["future"] = conv_lags[0]
+            # actual lags
+            processed_component_lags["future"] = conv_lags[1]
+        else:
+            lags_type_error_msg.append(
+                f"`lags_future_covariates` must be of type tuple, list or dict. "
+                f"Given: {type(lags_future_covariates)}."
+            )
+
+        # error message for all the invalid types
+        if len(lags_type_error_msg) > 0:
+            raise_log(ValueError("\n".join(lags_type_error_msg)), logger)
+        return processed_lags, processed_component_lags
+
+    def _get_lags(self, lags_type: str):
+        """
+        If lags were specified in a component-wise manner, they are contained in self.component_lags and
+        the values in self.lags should be ignored as they correspond just the extreme values.
+        """
+        if lags_type in self.component_lags:
+            return self.component_lags[lags_type]
+        else:
+            return self.lags.get(lags_type)
 
     @property
     def _model_encoder_settings(
@@ -485,10 +520,6 @@ def _get_last_prediction_time(self, series, forecast_horizon, overlap_end):
     def _create_lagged_data(
         self, target_series, past_covariates, future_covariates, max_samples_per_ts
     ):
-        """
-        If lags were specified component-wise manner, they are contained in self.component_lags and the values
-        in self.lags should be ignored.
-        """
         (
             features,
             labels,
@@ -499,15 +530,9 @@ def _create_lagged_data(
             output_chunk_length=self.output_chunk_length,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            lags=self.component_lags["target"]
-            if "target" in self.component_lags
-            else self.lags.get("target"),
-            lags_past_covariates=self.component_lags["past"]
-            if "past" in self.component_lags
-            else self.lags.get("past"),
-            lags_future_covariates=self.component_lags["future"]
-            if "future" in self.component_lags
-            else self.lags.get("future"),
+            lags=self._get_lags("target"),
+            lags_past_covariates=self._get_lags("past"),
+            lags_future_covariates=self._get_lags("future"),
             uses_static_covariates=self.uses_static_covariates,
             last_static_covariates_shape=None,
             max_samples_per_ts=max_samples_per_ts,
@@ -555,15 +580,9 @@ def _fit_model(
             target_series=target_series,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            lags=self.component_lags["target"]
-            if "target" in self.component_lags
-            else self.lags.get("target"),
-            lags_past_covariates=self.component_lags["past"]
-            if "past" in self.component_lags
-            else self.lags.get("past"),
-            lags_future_covariates=self.component_lags["future"]
-            if "future" in self.component_lags
-            else self.lags.get("future"),
+            lags=self._get_lags("target"),
+            lags_past_covariates=self._get_lags("past"),
+            lags_future_covariates=self._get_lags("future"),
             output_chunk_length=self.output_chunk_length,
             concatenate=False,
             use_static_covariates=self.uses_static_covariates,
@@ -685,32 +704,41 @@ def fit(
             past_covariates=seq2series(past_covariates),
             future_covariates=seq2series(future_covariates),
         )
+        variate2arg = {
+            "target": "lags",
+            "past": "lags_past_covariates",
+            "future": "lags_future_covariates",
+        }
 
         # if provided, component-wise lags must be defined for all the components of the first series
+        component_lags_error_msg = []
         for variate_type, variate in zip(
             ["target", "past", "future"], [series, past_covariates, future_covariates]
         ):
-            if variate_type in self.component_lags:
-                # ignore the fallback lags entry
-                provided_components = set(self.component_lags[variate_type].keys())
-                required_components = set(variate[0].components)
-                # lags were specified for unrecognized components
-                wrong_components = list(
-                    provided_components - {"default_lags"} - required_components
-                )
-                if len(wrong_components) > 0:
-                    logger.warning(
-                        f"Lags of components not present in the series ({wrong_components}) were ignored."
-                    )
+            if variate_type not in self.component_lags:
+                continue
 
-                missing_keys = list(required_components - provided_components)
-                raise_if(
-                    len(missing_keys) > 0 and "default_lags" not in provided_components,
-                    f"The {variate_type} series contains {self.input_dim[variate_type]} components, lags were "
-                    f"provided for {len(self.component_lags[variate_type])} of them. The lags for the "
-                    f"following components must be provided: {missing_keys}.",
-                    logger,
+            # ignore the fallback lags entry
+            provided_components = set(self.component_lags[variate_type].keys())
+            required_components = set(variate[0].components)
+
+            wrong_components = list(
+                provided_components - {"default_lags"} - required_components
+            )
+            missing_keys = list(required_components - provided_components)
+            # lags were specified for unrecognized components
+            if len(wrong_components) > 0:
+                component_lags_error_msg.append(
+                    f"The `{variate2arg[variate_type]}` dictionary specifies lags for components that are not "
+                    f"present in the series : {wrong_components}. They must be removed to avoid any ambiguity."
+                )
+            elif len(missing_keys) > 0 and "default_lags" not in provided_components:
+                component_lags_error_msg.append(
+                    f"The {variate2arg[variate_type]} dictionary is missing the lags for the following components "
+                    f"present in the series: {missing_keys}. The key 'default_lags' can be used to provide lags for "
+                    f"all the non-explicitely defined components."
                 )
+            else:
                 # reorder the components based on the input series, insert the default when necessary
                 self.component_lags[variate_type] = {
                     comp_name: self.component_lags[variate_type][comp_name]
@@ -719,6 +747,10 @@ def fit(
                     for comp_name in variate[0].components
                 }
 
+        # single error message for all the lags arguments
+        if len(component_lags_error_msg) > 0:
+            raise_log(ValueError("\n".join(component_lags_error_msg)), logger)
+
         self._fit_model(
             series, past_covariates, future_covariates, max_samples_per_ts, **kwargs
         )
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index eabee4447d..9e8426bbe6 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -542,7 +542,7 @@ def test_training_data_creation(self, mode):
         ]
         assert list(training_labels[0]) == [82, 182, 282]
 
-        # lags defined using dictionnaries
+        # lags defined using dictionaries
         # cannot use 'default_lags' because it's converted in `fit()`, before calling `_created_lagged_data`
         model_instance = RegressionModel(
             lags={"0-trgt-0": [-4, -3], "0-trgt-1": [-3, -2], "0-trgt-2": [-2, -1]},
diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index d5249ee95f..9078515a2b 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -331,13 +331,9 @@ def create_lagged_training_data(
     output_chunk_length: int,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
-    lags_past_covariates: Optional[
-        Union[Sequence[int], Dict[str, Sequence[int]]]
-    ] = None,
-    lags_future_covariates: Optional[
-        Union[Sequence[int], Dict[str, Sequence[int]]]
-    ] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
     uses_static_covariates: bool = True,
     last_static_covariates_shape: Optional[Tuple[int, int]] = None,
     max_samples_per_ts: Optional[int] = None,

From 3221f867cb2fb40670a2eca6616f6396283ee5c9 Mon Sep 17 00:00:00 2001
From: madtoinou <32447896+madtoinou@users.noreply.github.com>
Date: Fri, 25 Aug 2023 16:52:03 +0200
Subject: [PATCH 14/30] Apply suggestions from code review

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/models/forecasting/regression_model.py | 48 ++++++++++----------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index c55d7acca3..8b990ec369 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -84,31 +84,33 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step.
-            If an integer is given the last `lags` past lags are used (from -1 backward).
-            If a list of integers, each lag must be < 0.
-            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
-            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
-            provide fallback lags values for un-specified components. An error will be raised if some components are
-            missing and the 'default_lags' key is not present in the dictionnary.
+            Lagged target `series` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `series` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step.
-            If an integer is given the last `lags_past_covariates` past lags are used (inclusive, starting from lag -1).
-            If a list of integers, each lag must be < 0.
-            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
-            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
-            provide fallback lags values for un-specified components. An error will be raised if some components are
-            missing and the 'default_lags' key is not present in the dictionnary.
+            Lagged `past_covariates` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+            where `0` corresponds to the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step.
-            If a tuple (past, future) is given the last `past` lags in the past are used (inclusive, starting from
-            lag -1) along with the first `future` future lags (starting from 0 - the prediction time - up to
-            `future - 1` included).
-            If a list of integer, the values will be used as is.
-            If a dictionnary, the keys must be the components' name  (first series when using multiple series) and
-            the values corresponds to the lags (integer or list of integers). The key 'default_lags' can be used to
-            provide fallback lags values for un-specified components. An error will be raised if some components are
-            missing and the 'default_lags' key is not present in the dictionnary.
+            Lagged `future_covariates` values used to predict the next time step/s.
+            If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+            future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+            'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may

From 3254db3f96a87f8052b3c4966bb2ff8c405eb15f Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 28 Aug 2023 10:12:00 +0200
Subject: [PATCH 15/30] refactor: lags argument are converted to dict before
 running the type check and processing of the values

---
 darts/models/forecasting/regression_model.py | 230 +++++++------------
 1 file changed, 81 insertions(+), 149 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 8b990ec369..a0c5ea4933 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -215,101 +215,102 @@ def _generate_lags(
         attributes contain only the extreme values
         If the lags are provided as integer, list, tuple or dictionary containing only the 'default_lags' keys, the lags
         values are contained in the self.lags attribute and the self.component_lags is an empty dictionary.
-
-        `lags` and `lags_past_covariates` are processed using the same local functions,
-        `lags_future_covariates` is processed with different local functions
         """
+        processed_lags: Dict[str, List[int]] = dict()
+        processed_component_lags: Dict[str, Dict[str, List[int]]] = dict()
+        for lags_values, lags_name, lags_abbrev in zip(
+            [lags, lags_past_covariates, lags_future_covariates],
+            ["lags", "lags_past_covariates", "lags_future_covariates"],
+            ["target", "past", "future"],
+        ):
+            if lags_values is None:
+                continue
 
-        def _process_int_lags(lags: int, lags_name: str) -> List[int]:
-            raise_if_not(
-                lags > 0, f"`{lags_name}` must be strictly positive. Given: {lags}."
-            )
-            return list(range(-lags, 0))
-
-        def _process_list_lags(lags: list, lags_name: str) -> List[int]:
-            for lag in lags:
+            # check type of argument before converting to dictionary
+            if not isinstance(lags_values, dict):
                 raise_if(
-                    not isinstance(lag, int) or (lag >= 0),
-                    f"Every element of `{lags_name}` must be a strictly negative integer. Given: {lags}.",
+                    lags_name == "lags_future_covariates"
+                    and not isinstance(lags_values, (tuple, list)),
+                    f"`lags_future_covariates` must be of type tuple, list or dict."
+                    f"Given: {type(lags_values)}.",
                 )
-            return sorted(lags)
-
-        def _process_tuple_future_lags(
-            lags_future_covariates: Tuple[int, int], lags_name: str
-        ) -> List[int]:
-            raise_if_not(
-                len(lags_future_covariates) == 2
-                and isinstance(lags_future_covariates[0], int)
-                and isinstance(lags_future_covariates[1], int),
-                f"`{lags_name}` tuple must be of length 2, and must contain two integers",
-                logger,
-            )
-
-            raise_if(
-                isinstance(lags_future_covariates[0], bool)
-                or isinstance(lags_future_covariates[1], bool),
-                f"`{lags_name}` tuple must contain integers, not bool",
-                logger,
-            )
 
-            raise_if_not(
-                lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
-                f"`{lags_name}` tuple must contain positive integers. Given: {lags_future_covariates}.",
-            )
-            raise_if(
-                lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
-                f"`{lags_name}` tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
-                logger,
-            )
-            return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
-
-        def _process_list_future_lags(
-            lags_future_covariates: List[int], lags_name: str
-        ) -> List[int]:
-            for lag in lags_future_covariates:
                 raise_if(
-                    not isinstance(lag, int) or isinstance(lag, bool),
-                    f"Every element of `{lags_name}` must be an integer. Given: {lags_future_covariates}.",
+                    lags_name in ["lags", "lags_past_covariates"]
+                    and not isinstance(lags_values, (int, list)),
+                    f"`{lags_name}` must be of type int, list or dict."
+                    f"Given: {type(lags_values)}.",
                 )
-            return sorted(lags_future_covariates)
 
-        def _process_dict_lags(
-            lags: dict, lags_name: str
-        ) -> Tuple[List[int], Dict[str, List[int]]]:
+                lags_values = {"default_lags": lags_values}
 
-            raise_if_not(
-                len(lags) > 0,
-                f"When passed as a dictionary, `{lags_name}` must contain at least one key.",
-                logger,
-            )
+            elif len(lags_values) == 0:
+                raise_log(
+                    ValueError(
+                        f"When passed as a dictionary, `{lags_name}` must contain at least one key."
+                    ),
+                    logger,
+                )
 
             invalid_type = False
             supported_types = ""
             min_lags = None
             max_lags = None
-            components_lags: Dict[str, List[int]] = dict()
-            for comp_name, comp_lags in lags.items():
+            tmp_components_lags: Dict[str, List[int]] = dict()
+            for comp_name, comp_lags in lags_values.items():
                 if lags_name == "lags_future_covariates":
                     if isinstance(comp_lags, tuple):
-                        components_lags[comp_name] = _process_tuple_future_lags(
-                            comp_lags, f"`{lags_name}` for component {comp_name}"
+                        raise_if_not(
+                            len(comp_lags) == 2
+                            and isinstance(comp_lags[0], int)
+                            and isinstance(comp_lags[1], int),
+                            f"`{lags_name}` tuple must be of length 2, and must contain two integers",
+                            logger,
                         )
-                    elif isinstance(comp_lags, list):
-                        components_lags[comp_name] = _process_list_future_lags(
-                            comp_lags, f"`{lags_name}` for component {comp_name}"
+
+                        raise_if(
+                            isinstance(comp_lags[0], bool)
+                            or isinstance(comp_lags[1], bool),
+                            f"`{lags_name}` tuple must contain integers, not bool",
+                            logger,
+                        )
+
+                        raise_if_not(
+                            comp_lags[0] >= 0 and comp_lags[1] >= 0,
+                            f"`{lags_name}` tuple must contain positive integers. Given: {comp_lags}.",
+                        )
+                        raise_if(
+                            comp_lags[0] == 0 and comp_lags[1] == 0,
+                            f"`{lags_name}` tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
+                            logger,
                         )
+                        tmp_components_lags[comp_name] = list(
+                            range(-comp_lags[0], comp_lags[1])
+                        )
+                    elif isinstance(comp_lags, list):
+                        for lag in comp_lags:
+                            raise_if(
+                                not isinstance(lag, int) or isinstance(lag, bool),
+                                f"`{lags_name}` list must contain only integers. Given: {comp_lags}.",
+                            )
+                        tmp_components_lags[comp_name] = sorted(comp_lags)
                     else:
                         invalid_type = True
                         supported_types = "tuple or a list"
                 else:
                     if isinstance(comp_lags, int):
-                        components_lags[comp_name] = _process_int_lags(
-                            comp_lags, f"`{lags_name}` for component {comp_name}"
+                        raise_if_not(
+                            comp_lags > 0,
+                            f"`{lags_name}` integer must be strictly positive . Given: {comp_lags}.",
                         )
+                        tmp_components_lags[comp_name] = list(range(-comp_lags, 0))
                     elif isinstance(comp_lags, list):
-                        components_lags[comp_name] = _process_list_lags(
-                            comp_lags, f"`{lags_name}` for component {comp_name}"
-                        )
+                        for lag in comp_lags:
+                            raise_if(
+                                not isinstance(lag, int) or (lag >= 0),
+                                f"`{lags_name}` list must contain only strictly negative integers. Given: {comp_lags}.",
+                            )
+                        tmp_components_lags[comp_name] = sorted(comp_lags)
                     else:
                         invalid_type = True
                         supported_types = "strictly positive integer or a list"
@@ -317,98 +318,29 @@ def _process_dict_lags(
                 if invalid_type:
                     raise_log(
                         ValueError(
-                            f"When passed as a dictionary, `{lags_name}` for component {comp_name} must be either a "
+                            f"When passed in a dictionary, `{lags_name}` for component {comp_name} must be either a "
                             f"{supported_types}, received : {type(comp_lags)}."
                         ),
                         logger,
                     )
 
                 if min_lags is None:
-                    min_lags = components_lags[comp_name][0]
+                    min_lags = tmp_components_lags[comp_name][0]
                 else:
-                    min_lags = min(min_lags, components_lags[comp_name][0])
+                    min_lags = min(min_lags, tmp_components_lags[comp_name][0])
 
                 if max_lags is None:
-                    max_lags = components_lags[comp_name][-1]
+                    max_lags = tmp_components_lags[comp_name][-1]
                 else:
-                    max_lags = max(max_lags, components_lags[comp_name][-1])
+                    max_lags = max(max_lags, tmp_components_lags[comp_name][-1])
 
-            # revert to lags shared across components logic
-            if list(components_lags.keys()) == ["default_lags"]:
-                return components_lags["default_lags"], {}
+            # revert to shared lags logic when applicable
+            if list(tmp_components_lags.keys()) == ["default_lags"]:
+                processed_lags[lags_abbrev] = tmp_components_lags["default_lags"]
             else:
-                return [min_lags, max_lags], components_lags
-
-        # perform the type and sanity checks
-        lags_type_error_msg = []
-        processed_lags: Dict[str, List[int]] = dict()
-        processed_component_lags: Dict[str, Dict[str, List[int]]] = dict()
-        if lags is None:
-            pass
-        elif isinstance(lags, int):
-            processed_lags["target"] = _process_int_lags(lags, "lags")
-        elif isinstance(lags, list):
-            processed_lags["target"] = _process_list_lags(lags, "lags")
-        elif isinstance(lags, dict):
-            conv_lags = _process_dict_lags(lags, "lags")
-            # dummy, used to compute the extreme lags
-            processed_lags["target"] = conv_lags[0]
-            # actual lags
-            processed_component_lags["target"] = conv_lags[1]
-        else:
-            lags_type_error_msg.append(
-                f"`lags` must be of type int, list or dict." f"Given: {type(lags)}."
-            )
-
-        if lags_past_covariates is None:
-            pass
-        elif isinstance(lags_past_covariates, int):
-            processed_lags["past"] = _process_int_lags(
-                lags_past_covariates, "lags_past_covariates"
-            )
-        elif isinstance(lags_past_covariates, list):
-            processed_lags["past"] = _process_list_lags(
-                lags_past_covariates, "lags_past_covariates"
-            )
-        elif isinstance(lags_past_covariates, dict):
-            conv_lags = _process_dict_lags(lags_past_covariates, "lags_past_covariates")
-            # dummy, used to compute the extreme lags
-            processed_lags["past"] = conv_lags[0]
-            # actual lags
-            processed_component_lags["past"] = conv_lags[1]
-        else:
-            lags_type_error_msg.append(
-                f"`lags_past_covariates` must be of type int, list or dict."
-                f"Given: {type(lags_past_covariates)}."
-            )
-
-        if lags_future_covariates is None:
-            pass
-        elif isinstance(lags_future_covariates, tuple):
-            processed_lags["future"] = _process_tuple_future_lags(
-                lags_future_covariates, "lags_future_covariates"
-            )
-        elif isinstance(lags_future_covariates, list):
-            processed_lags["future"] = _process_list_future_lags(
-                lags_future_covariates, "lags_future_covariates"
-            )
-        elif isinstance(lags_future_covariates, dict):
-            conv_lags = _process_dict_lags(
-                lags_future_covariates, "lags_future_covariates"
-            )
-            # dummy, used to compute the extreme lags
-            processed_lags["future"] = conv_lags[0]
-            # actual lags
-            processed_component_lags["future"] = conv_lags[1]
-        else:
-            lags_type_error_msg.append(
-                f"`lags_future_covariates` must be of type tuple, list or dict. "
-                f"Given: {type(lags_future_covariates)}."
-            )
+                processed_lags[lags_abbrev] = [min_lags, max_lags]
+                processed_component_lags[lags_abbrev] = tmp_components_lags
 
-        # error message for all the invalid types
-        if len(lags_type_error_msg) > 0:
-            raise_log(ValueError("\n".join(lags_type_error_msg)), logger)
         return processed_lags, processed_component_lags
 
     def _get_lags(self, lags_type: str):

From 269005e41d2bc0a3d79562abdb4e0a2f1be24a25 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 28 Aug 2023 10:12:34 +0200
Subject: [PATCH 16/30] refactor: lags argument are converted to dict before
 running the type check and processing of the values

---
 darts/models/forecasting/regression_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index a0c5ea4933..186688b419 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -324,6 +324,7 @@ def _generate_lags(
                         logger,
                     )
 
+                # extracting min and max lags va
                 if min_lags is None:
                     min_lags = tmp_components_lags[comp_name][0]
                 else:

From bcd44555ccae60cc4eceb3586977b17bf7db3ba3 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 28 Aug 2023 11:27:55 +0200
Subject: [PATCH 17/30] doc: improved documentation of the component-specific
 lags in tabularization

---
 darts/utils/data/tabularization.py | 83 ++++++++++++++++++------------
 1 file changed, 50 insertions(+), 33 deletions(-)

diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index 9078515a2b..404452ba74 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -27,9 +27,9 @@ def create_lagged_data(
     target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Sequence[int]] = None,
-    lags_past_covariates: Optional[Sequence[int]] = None,
-    lags_future_covariates: Optional[Sequence[int]] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
     output_chunk_length: int = 1,
     uses_static_covariates: bool = True,
     last_static_covariates_shape: Optional[Tuple[int, int]] = None,
@@ -154,15 +154,18 @@ def create_lagged_data(
         Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
         auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
         `lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
-        the current value).
+        the current value). If the lags are provided as a dictionary, the lags values are specific to each
+        component in the target series.
     lags_past_covariates
         Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
-        be less than or equal to -1.
+        be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+        component in the past covariates series.
     lags_future_covariates
         Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and
         `lags_past_covariates`, `lags_future_covariates` values can be positive (i.e. use values *after* time `t`
         to predict target at time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or
-        negative (i.e. use values *before* time `t` to predict target at time `t`).
+        negative (i.e. use values *before* time `t` to predict target at time `t`). If the lags are provided as
+        a dictionary, the lags values are specific to each component in the future covariates series.
     uses_static_covariates
         Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
         have identical shapes across all target series.
@@ -372,15 +375,18 @@ def create_lagged_training_data(
         Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
         auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
         `lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
-        the current value).
+        the current value). If the lags are provided as a dictionary, the lags values are specific to each
+        component in the target series.
     lags_past_covariates
         Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
-        be less than or equal to -1.
+        be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+        component in the past covariates series.
     lags_future_covariates
         Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and `lags_past_covariates`,
         `lags_future_covariates` values can be positive (i.e. use values *after* time `t` to predict target at
         time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or negative (i.e. use values
-        *before* time `t` to predict target at time `t`).
+        *before* time `t` to predict target at time `t`). If the lags are provided as a dictionary, the lags values
+        are specific to each component in the future covariates series.
     uses_static_covariates
         Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
         have identical shapes across all target series.
@@ -584,11 +590,11 @@ def create_lagged_prediction_data(
 
 
 def add_static_covariates_to_lagged_data(
-    features: Union[np.array, Sequence[np.array]],
+    features: Union[np.ndarray, Sequence[np.ndarray]],
     target_series: Union[TimeSeries, Sequence[TimeSeries]],
     uses_static_covariates: bool = True,
     last_shape: Optional[Tuple[int, int]] = None,
-) -> Union[np.array, Sequence[np.array]]:
+) -> Union[np.ndarray, Sequence[np.ndarray]]:
     """
     Add static covariates to the features' table for RegressionModels.
     If `uses_static_covariates=True`, all target series used in `fit()` and `predict()` must have static
@@ -678,13 +684,9 @@ def create_lagged_component_names(
     target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
-    lags_past_covariates: Optional[
-        Union[Sequence[int], Dict[str, Sequence[int]]]
-    ] = None,
-    lags_future_covariates: Optional[
-        Union[Sequence[int], Dict[str, Sequence[int]]]
-    ] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
     output_chunk_length: int = 1,
     concatenate: bool = True,
     use_static_covariates: bool = False,
@@ -693,11 +695,16 @@ def create_lagged_component_names(
     Helper function called to retrieve the name of the features and labels arrays created with
     `create_lagged_data()`. The order of the features is the following:
 
-    Along the `n_lagged_features` axis, `X` has the following structure (for `*_lags=[-2,-1]` and
-    `*_series.n_components = 2`):
+    Along the `n_lagged_features` axis, `X` has the following structure:
         lagged_target | lagged_past_covariates | lagged_future_covariates | static covariates
-    where each `lagged_*` has the following structure:
+
+    For `*_lags=[-2,-1]` and `*_series.n_components = 2` (lags shared across all the components),
+    each `lagged_*` has the following structure (grouped by lags):
         comp0_*_lag-2 | comp1_*_lag-2 | comp0_*_lag_-1 | comp1_*_lag-1
+    For `*_lags={'comp0':[-2, -1], 'comp1':[-5, -3]}` and `*_series.n_components = 2` (component-
+    specific lags), each `lagged_*` has the following structure (grouped by components):
+        comp0_*_lag-2 | comp0_*_lag-1 | comp1_*_lag_-5 | comp1_*_lag-3
+
     and for static covariates (2 static covariates acting on 2 target components):
         cov0_*_target_comp0 | cov0_*_target_comp1 | cov1_*_target_comp0 | cov1_*_target_comp1
 
@@ -790,9 +797,9 @@ def _create_lagged_data_by_moving_window(
     output_chunk_length: int,
     past_covariates: Optional[TimeSeries],
     future_covariates: Optional[TimeSeries],
-    lags: Optional[Sequence[int]],
-    lags_past_covariates: Optional[Sequence[int]],
-    lags_future_covariates: Optional[Sequence[int]],
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
     max_samples_per_ts: Optional[int],
     multi_models: bool,
     check_inputs: bool,
@@ -964,7 +971,10 @@ def _extract_lagged_vals_from_windows(
     lagged values is `(num_windows, num_components * lags_to_extract.size, num_series)`. For example,
     if `lags_to_extract = [-2]`, only the second-to-last values within each window will be extracted.
     If `lags_to_extract` is specified as a list of np.ndarray, the values will be extracted using the
-    lags provided for each component.
+    lags provided for each component. In such cases, the shape of the returned lagged values is
+    `(num_windows, sum([comp_lags.size for comp_lags in lags_to_extract]), num_series)`. For example,
+    if `lags_to_extract = [[-2, -1], [-1]]`, the second-to-last and last values of the first component
+    and the last values of the second component within each window will be extracted.
     """
     # windows.shape = (num_windows, num_components, num_samples, window_len):
     if isinstance(lags_to_extract, list):
@@ -1113,9 +1123,9 @@ def _get_feature_times(
     target_series: Optional[TimeSeries] = None,
     past_covariates: Optional[TimeSeries] = None,
     future_covariates: Optional[TimeSeries] = None,
-    lags: Optional[Sequence[int]] = None,
-    lags_past_covariates: Optional[Sequence[int]] = None,
-    lags_future_covariates: Optional[Sequence[int]] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
     output_chunk_length: int = 1,
     is_training: bool = True,
     return_min_and_max_lags: bool = False,
@@ -1230,6 +1240,9 @@ def _get_feature_times(
         Optionally, specifies whether the largest magnitude lag value for each series should also be returned along with
         the 'eligible' feature times
 
+    Note: if the lags are provided as a dictionary for the target series or any of the covariates series, the
+    component-specific lags are grouped into a single list to compute the corresponding feature time.
+
     Returns
     -------
     feature_times
@@ -1275,7 +1288,7 @@ def _get_feature_times(
         [target_series, past_covariates, future_covariates],
         [lags, lags_past_covariates, lags_future_covariates],
     ):
-        # TODO: information is available in model.lags, not sure how to make the info get here
+        # union of the component-specific lags, unsorted
         if isinstance(lags_i, dict):
             lags_i = list(set(chain(*lags_i.values())))
 
@@ -1627,9 +1640,9 @@ def _all_equal_freq(*series: Union[TimeSeries, None]) -> bool:
 
 
 def _check_lags(
-    lags: Sequence[int],
-    lags_past_covariates: Sequence[int],
-    lags_future_covariates: Sequence[int],
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]],
 ) -> None:
     """
     Throws `ValueError` if any `lag` values aren't negative OR if no lags have been specified.
@@ -1642,9 +1655,13 @@ def _check_lags(
         if not lags_is_none[-1]:
             is_target_or_past = i < 2
             max_lag = -1 if is_target_or_past else inf
+
+            if isinstance(lags_i, dict):
+                lags_i = list(set(chain(*lags_i.values())))
+
             raise_if(
                 any((lag > max_lag or not isinstance(lag, int)) for lag in lags_i),
-                f"`lags{suffix}` must be a `Sequence` containing only `int` values less than {max_lag + 1}.",
+                f"`lags{suffix}` must be a `Sequence` or `Dict` containing only `int` values less than {max_lag + 1}.",
             )
     raise_if(
         all(lags_is_none),

From b859d9a7885f74ebc747e946b46f455a27384f28 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 28 Aug 2023 13:37:30 +0200
Subject: [PATCH 18/30] test: adding a test for the multivariate scenario

---
 .../forecasting/test_regression_models.py     | 84 +++++++++++--------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 9e8426bbe6..8786ecbcc1 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1596,41 +1596,47 @@ def test_integer_indexed_series(self, mode):
 
     @pytest.mark.parametrize(
         "config",
-        [
-            ({"lags": [-3, -2, -1]}, {"lags": {"gaussian": 3}}),
-            ({"lags": 3}, {"lags": {"gaussian": 3, "sine": 3}}),
-            ({"lags_past_covariates": 2}, {"lags_past_covariates": {"lin_past": 2}}),
-            (
-                {"lags": 5, "lags_future_covariates": [-2, 3]},
-                {
-                    "lags": {
-                        "gaussian": [-5, -4, -3, -2, -1],
-                        "sine": [-5, -4, -3, -2, -1],
-                    },
-                    "lags_future_covariates": {
-                        "lin_future": [-2, 3],
-                        "sine_future": [-2, 3],
-                    },
-                },
-            ),
-            (
-                {"lags": 5, "lags_future_covariates": [-2, 3]},
-                {
-                    "lags": {
-                        "gaussian": [-5, -4, -3, -2, -1],
-                        "sine": [-5, -4, -3, -2, -1],
+        itertools.product(
+            [
+                ({"lags": [-3, -2, -1]}, {"lags": {"gaussian": 3}}),
+                ({"lags": 3}, {"lags": {"gaussian": 3, "sine": 3}}),
+                (
+                    {"lags_past_covariates": 2},
+                    {"lags_past_covariates": {"lin_past": 2}},
+                ),
+                (
+                    {"lags": 5, "lags_future_covariates": [-2, 3]},
+                    {
+                        "lags": {
+                            "gaussian": [-5, -4, -3, -2, -1],
+                            "sine": [-5, -4, -3, -2, -1],
+                        },
+                        "lags_future_covariates": {
+                            "lin_future": [-2, 3],
+                            "sine_future": [-2, 3],
+                        },
                     },
-                    "lags_future_covariates": {
-                        "sine_future": [-2, 3],
-                        "default_lags": [-2, 3],
+                ),
+                (
+                    {"lags": 5, "lags_future_covariates": [-2, 3]},
+                    {
+                        "lags": {
+                            "gaussian": [-5, -4, -3, -2, -1],
+                            "sine": [-5, -4, -3, -2, -1],
+                        },
+                        "lags_future_covariates": {
+                            "sine_future": [-2, 3],
+                            "default_lags": [-2, 3],
+                        },
                     },
-                },
-            ),
-        ],
+                ),
+            ],
+            [True, False],
+        ),
     )
     def test_component_specific_lags(self, config):
         """Verify that the same lags, defined using int/list or dictionnaries yield the same results"""
-        list_lags, dict_lags = config
+        (list_lags, dict_lags), multiple_series = config
         multivar_target = "lags" in dict_lags and len(dict_lags["lags"]) > 1
         multivar_future_cov = (
             "lags_future_covariates" in dict_lags
@@ -1641,6 +1647,16 @@ def test_component_specific_lags(self, config):
         series = tg.gaussian_timeseries(length=20, column_name="gaussian")
         if multivar_target:
             series = series.stack(tg.sine_timeseries(length=20, column_name="sine"))
+        if multiple_series:
+            # second series have different component names
+            series = [
+                series,
+                series.with_columns_renamed(
+                    ["gaussian", "sine"][: series.width],
+                    ["other", "names"][: series.width],
+                )
+                + 10,
+            ]
 
         future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
         if multivar_future_cov:
@@ -1667,14 +1683,14 @@ def test_component_specific_lags(self, config):
         )
 
         # n == output_chunk_length
-        pred = model.predict(1)
-        pred2 = model2.predict(1)
+        pred = model.predict(1, series=series[0] if multiple_series else None)
+        pred2 = model2.predict(1, series=series[0] if multiple_series else None)
         np.testing.assert_array_almost_equal(pred.values(), pred2.values())
         assert pred.time_index.equals(pred2.time_index)
 
         # n > output_chunk_length
-        pred = model.predict(3)
-        pred2 = model2.predict(3)
+        pred = model.predict(3, series=series[0] if multiple_series else None)
+        pred2 = model2.predict(3, series=series[0] if multiple_series else None)
         np.testing.assert_array_almost_equal(pred.values(), pred2.values())
         assert pred.time_index.equals(pred2.time_index)
 

From c0121a549a217d8f10980b46d54211b3b66c0498 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Tue, 29 Aug 2023 15:48:31 +0200
Subject: [PATCH 19/30] test: checking the appriopriate lags are extracted by
 the shap explainer

---
 .../explainability/test_shap_explainer.py     | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/darts/tests/explainability/test_shap_explainer.py b/darts/tests/explainability/test_shap_explainer.py
index e526d1b81b..a953c79dd3 100644
--- a/darts/tests/explainability/test_shap_explainer.py
+++ b/darts/tests/explainability/test_shap_explainer.py
@@ -14,7 +14,7 @@
 from darts import TimeSeries
 from darts.dataprocessing.transformers import Scaler
 from darts.explainability.explainability_result import ShapExplainabilityResult
-from darts.explainability.shap_explainer import ShapExplainer
+from darts.explainability.shap_explainer import MIN_BACKGROUND_SAMPLE, ShapExplainer
 from darts.models import (
     CatBoostModel,
     ExponentialSmoothing,
@@ -24,6 +24,7 @@
     RegressionModel,
     XGBModel,
 )
+from darts.utils.timeseries_generation import linear_timeseries
 
 lgbm_available = not isinstance(LightGBMModel, NotImportedModule)
 cb_available = not isinstance(CatBoostModel, NotImportedModule)
@@ -799,3 +800,38 @@ def test_shapley_multiple_series_with_different_static_covs(self):
         for explained_forecast in explanation_results.explained_forecasts:
             comps_out = explained_forecast[1]["price"].columns.tolist()
             assert comps_out[-1] == "type_statcov_target_price"
+
+    def test_shap_regressor_component_specific_lags(self):
+        model = LinearRegressionModel(
+            lags={"price": [-3, -2], "power": [-1]},
+            output_chunk_length=1,
+        )
+        # multivariate ts as short as possible
+        min_ts_length = MIN_BACKGROUND_SAMPLE * np.abs(min(model.lags["target"]))
+        ts = linear_timeseries(
+            start_value=1,
+            end_value=min_ts_length,
+            length=min_ts_length,
+            column_name="price",
+        ).stack(
+            linear_timeseries(
+                start_value=102,
+                end_value=100 + 2 * min_ts_length,
+                length=min_ts_length,
+                column_name="power",
+            )
+        )
+        model.fit(ts)
+        shap_explain = ShapExplainer(model)
+
+        # one column per lag, grouped by components
+        expected_df = pd.DataFrame(
+            data=np.stack(
+                [np.arange(1, 29), np.arange(3, 31), np.arange(106, 161, 2)], axis=1
+            ),
+            columns=["price_target_lag-3", "price_target_lag-2", "power_target_lag-1"],
+        )
+
+        # check that the appropriate lags are extracted
+        assert all(shap_explain.explainers.background_X == expected_df)
+        assert model.lagged_feature_names == list(expected_df.columns)

From d682f1303e11f0ab71ee52da21586cd412646ef0 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Tue, 29 Aug 2023 15:51:10 +0200
Subject: [PATCH 20/30] fix: shapexplainer extract the appropriate lags,
 updated the type hints

---
 darts/explainability/shap_explainer.py        | 12 ++---
 darts/models/forecasting/lgbm.py              | 48 +++++++++++------
 .../forecasting/linear_regression_model.py    | 53 +++++++++++++------
 darts/models/forecasting/random_forest.py     | 48 ++++++++++++-----
 darts/models/forecasting/xgboost.py           | 53 +++++++++++++------
 darts/utils/data/tabularization.py            | 17 +++---
 6 files changed, 157 insertions(+), 74 deletions(-)

diff --git a/darts/explainability/shap_explainer.py b/darts/explainability/shap_explainer.py
index 29be9d5e3d..143ea0d8b9 100644
--- a/darts/explainability/shap_explainer.py
+++ b/darts/explainability/shap_explainer.py
@@ -732,9 +732,9 @@ def _build_explainer_sklearn(
 
     def _create_regression_model_shap_X(
         self,
-        target_series,
-        past_covariates,
-        future_covariates,
+        target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
+        past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
+        future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
         n_samples=None,
         train=False,
     ) -> pd.DataFrame:
@@ -746,9 +746,9 @@ def _create_regression_model_shap_X(
 
         """
 
-        lags_list = self.model.lags.get("target")
-        lags_past_covariates_list = self.model.lags.get("past")
-        lags_future_covariates_list = self.model.lags.get("future")
+        lags_list = self.model._get_lags("target")
+        lags_past_covariates_list = self.model._get_lags("past")
+        lags_future_covariates_list = self.model._get_lags("future")
 
         X, indexes = create_lagged_prediction_data(
             target_series=target_series if lags_list else None,
diff --git a/darts/models/forecasting/lgbm.py b/darts/models/forecasting/lgbm.py
index 8f85fe3237..b4ef4c020a 100644
--- a/darts/models/forecasting/lgbm.py
+++ b/darts/models/forecasting/lgbm.py
@@ -10,13 +10,15 @@
 https://github.com/unit8co/darts/blob/master/INSTALL.md
 """
 
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
 
 import lightgbm as lgb
 import numpy as np
 
 from darts.logging import get_logger
 from darts.models.forecasting.regression_model import (
+    FUTURE_LAGS_TYPE,
+    LAGS_TYPE,
     RegressionModelWithCategoricalCovariates,
     _LikelihoodMixin,
 )
@@ -28,13 +30,13 @@
 class LightGBMModel(RegressionModelWithCategoricalCovariates, _LikelihoodMixin):
     def __init__(
         self,
-        lags: Union[int, list] = None,
-        lags_past_covariates: Union[int, List[int]] = None,
-        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        lags: Optional[LAGS_TYPE] = None,
+        lags_past_covariates: Optional[LAGS_TYPE] = None,
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
         output_chunk_length: int = 1,
         add_encoders: Optional[dict] = None,
-        likelihood: str = None,
-        quantiles: List[float] = None,
+        likelihood: Optional[str] = None,
+        quantiles: Optional[List[float]] = None,
         random_state: Optional[int] = None,
         multi_models: Optional[bool] = True,
         use_static_covariates: bool = True,
@@ -48,17 +50,33 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+            Lagged target `series` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `series` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
-            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
-            with lags < 0 is required.
+            Lagged `past_covariates` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+            where `0` corresponds to the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
-            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
-            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
-            of integers with lags is required.
+            Lagged `future_covariates` values used to predict the next time step/s.
+            If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+            future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+            'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
index 778619bae4..4e09a948ed 100644
--- a/darts/models/forecasting/linear_regression_model.py
+++ b/darts/models/forecasting/linear_regression_model.py
@@ -5,14 +5,19 @@
 A forecasting model using a linear regression of some of the target series' lags, as well as optionally some
 covariate series lags in order to obtain a forecast.
 """
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
 
 import numpy as np
 from scipy.optimize import linprog
 from sklearn.linear_model import LinearRegression, PoissonRegressor, QuantileRegressor
 
 from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin
+from darts.models.forecasting.regression_model import (
+    FUTURE_LAGS_TYPE,
+    LAGS_TYPE,
+    RegressionModel,
+    _LikelihoodMixin,
+)
 from darts.timeseries import TimeSeries
 
 logger = get_logger(__name__)
@@ -21,13 +26,13 @@
 class LinearRegressionModel(RegressionModel, _LikelihoodMixin):
     def __init__(
         self,
-        lags: Union[int, list] = None,
-        lags_past_covariates: Union[int, List[int]] = None,
-        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        lags: Optional[LAGS_TYPE] = None,
+        lags_past_covariates: Optional[LAGS_TYPE] = None,
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
         output_chunk_length: int = 1,
         add_encoders: Optional[dict] = None,
-        likelihood: str = None,
-        quantiles: List[float] = None,
+        likelihood: Optional[str] = None,
+        quantiles: Optional[List[float]] = None,
         random_state: Optional[int] = None,
         multi_models: Optional[bool] = True,
         use_static_covariates: bool = True,
@@ -38,17 +43,33 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+            Lagged target `series` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `series` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
-            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
-            with lags < 0 is required.
+            Lagged `past_covariates` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+            where `0` corresponds to the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
-            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
-            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
-            of integers with lags is required.
+            Lagged `future_covariates` values used to predict the next time step/s.
+            If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+            future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+            'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
index 600f307302..fe9a4e4096 100644
--- a/darts/models/forecasting/random_forest.py
+++ b/darts/models/forecasting/random_forest.py
@@ -14,12 +14,16 @@
 ----------
 .. [1] https://en.wikipedia.org/wiki/Random_forest
 """
-from typing import List, Optional, Tuple, Union
+from typing import Optional
 
 from sklearn.ensemble import RandomForestRegressor
 
 from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel
+from darts.models.forecasting.regression_model import (
+    FUTURE_LAGS_TYPE,
+    LAGS_TYPE,
+    RegressionModel,
+)
 
 logger = get_logger(__name__)
 
@@ -27,9 +31,9 @@
 class RandomForest(RegressionModel):
     def __init__(
         self,
-        lags: Union[int, list] = None,
-        lags_past_covariates: Union[int, List[int]] = None,
-        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        lags: Optional[LAGS_TYPE] = None,
+        lags_past_covariates: Optional[LAGS_TYPE] = None,
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
         output_chunk_length: int = 1,
         add_encoders: Optional[dict] = None,
         n_estimators: Optional[int] = 100,
@@ -43,17 +47,33 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+            Lagged target `series` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `series` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
-            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
-            with lags < 0 is required.
+            Lagged `past_covariates` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+            where `0` corresponds to the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
-            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
-            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
-            of integers with lags is required.
+            Lagged `future_covariates` values used to predict the next time step/s.
+            If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+            future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+            'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
diff --git a/darts/models/forecasting/xgboost.py b/darts/models/forecasting/xgboost.py
index ef693f4723..302e190781 100644
--- a/darts/models/forecasting/xgboost.py
+++ b/darts/models/forecasting/xgboost.py
@@ -8,13 +8,18 @@
 """
 
 from functools import partial
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Union
 
 import numpy as np
 import xgboost as xgb
 
 from darts.logging import get_logger
-from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin
+from darts.models.forecasting.regression_model import (
+    FUTURE_LAGS_TYPE,
+    LAGS_TYPE,
+    RegressionModel,
+    _LikelihoodMixin,
+)
 from darts.timeseries import TimeSeries
 from darts.utils.utils import raise_if_not
 
@@ -43,13 +48,13 @@ def xgb_quantile_loss(labels: np.ndarray, preds: np.ndarray, quantile: float):
 class XGBModel(RegressionModel, _LikelihoodMixin):
     def __init__(
         self,
-        lags: Union[int, list] = None,
-        lags_past_covariates: Union[int, List[int]] = None,
-        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        lags: Optional[LAGS_TYPE] = None,
+        lags_past_covariates: Optional[LAGS_TYPE] = None,
+        lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
         output_chunk_length: int = 1,
         add_encoders: Optional[dict] = None,
-        likelihood: str = None,
-        quantiles: List[float] = None,
+        likelihood: Optional[str] = None,
+        quantiles: Optional[List[float]] = None,
         random_state: Optional[int] = None,
         multi_models: Optional[bool] = True,
         use_static_covariates: bool = True,
@@ -60,17 +65,33 @@ def __init__(
         Parameters
         ----------
         lags
-            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
-            are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
+            Lagged target `series` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `series` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_past_covariates
-            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
-            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
-            with lags < 0 is required.
+            Lagged `past_covariates` values used to predict the next time step/s.
+            If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
+            where `0` corresponds to the first predicted time step of each sample.
+            If a list of integers, each value must be < 0. Uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (integer or list of integers). The
+            key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         lags_future_covariates
-            Number of lagged future_covariates values used to predict the next time step. If a tuple (past, future) is
-            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
-            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
-            of integers with lags is required.
+            Lagged `future_covariates` values used to predict the next time step/s.
+            If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
+            future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0`
+            corresponds the first predicted time step of each sample.
+            If a list of integers, uses only the specified values as lags.
+            If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
+            using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
+            'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
+            components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index 404452ba74..835d793196 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -475,9 +475,9 @@ def create_lagged_prediction_data(
     target_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
     future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
-    lags: Optional[Sequence[int]] = None,
-    lags_past_covariates: Optional[Sequence[int]] = None,
-    lags_future_covariates: Optional[Sequence[int]] = None,
+    lags: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_past_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
+    lags_future_covariates: Optional[Union[Sequence[int], Dict[str, List[int]]]] = None,
     uses_static_covariates: bool = True,
     last_static_covariates_shape: Optional[Tuple[int, int]] = None,
     max_samples_per_ts: Optional[int] = None,
@@ -508,15 +508,18 @@ def create_lagged_prediction_data(
         Optionally, the lags of the target series to be used as (auto-regressive) features. If not specified,
         auto-regressive features will *not* be added to `X`. Each lag value is assumed to be negative (e.g.
         `lags = [-3, -1]` will extract `target_series` values which are 3 timesteps and 1 timestep away from
-        the current value).
+        the current value). If the lags are provided as a dictionary, the lags values are specific to each
+        component in the target series.
     lags_past_covariates
         Optionally, the lags of `past_covariates` to be used as features. Like `lags`, each lag value is assumed to
-        be less than or equal to -1.
+        be less than or equal to -1. If the lags are provided as a dictionary, the lags values are specific to each
+        component in the past covariates series.
     lags_future_covariates
         Optionally, the lags of `future_covariates` to be used as features. Unlike `lags` and `lags_past_covariates`,
         `lags_future_covariates` values can be positive (i.e. use values *after* time `t` to predict target at
         time `t`), zero (i.e. use values *at* time `t` to predict target at time `t`), and/or negative (i.e. use
-        values *before* time `t` to predict target at time `t`).
+        values *before* time `t` to predict target at time `t`). If the lags are provided as a dictionary, the lags
+        values are specific to each component in the future covariates series.
     uses_static_covariates
         Whether the model uses/expects static covariates. If `True`, it enforces that static covariates must
         have identical shapes across all target series.
@@ -793,7 +796,7 @@ def create_lagged_component_names(
 
 
 def _create_lagged_data_by_moving_window(
-    target_series: TimeSeries,
+    target_series: Optional[TimeSeries],
     output_chunk_length: int,
     past_covariates: Optional[TimeSeries],
     future_covariates: Optional[TimeSeries],

From 96f1a7f07e4dee03605b577ceaeb70ccc372bf26 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 31 Aug 2023 10:07:24 +0200
Subject: [PATCH 21/30] fix: passing covariates when trained on multiple series

---
 Dockerfile                                    | 32 +++++-----
 .../forecasting/test_regression_models.py     | 61 ++++++++++++++++---
 2 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b604f92713..bf07201462 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,24 @@
-FROM jupyter/base-notebook:python-3.9.5
+FROM ubuntu:latest
 
-RUN conda update --all -y --quiet \
- && conda install -c conda-forge ipywidgets -y --quiet \
- && conda clean --all -f -y
+# setup packages
+RUN apt-get update -y
+RUN apt-get install -y python3 python-is-python3 python3-pip default-jre
+RUN pip install --upgrade pip
 
-USER root
+# install python requirements before copying the rest of the files
+# this way we can cache the requirements and not have to reinstall them
+COPY requirements/ /app/requirements/
+RUN pip install -r /app/requirements/dev-all.txt
 
-# to build pystan
-RUN apt-get update \
- && apt-get -y install build-essential \
- && apt-get clean && rm -rf /var/lib/apt/lists/*
+# copy local files
+COPY . /app
 
-USER $NB_USER
+# set work directory
+WORKDIR /app
 
-ADD . /home/jovyan/work
+# install darts
+RUN pip install -e .
 
-WORKDIR /home/jovyan/work
-
-RUN pip install .
+# assuming you are working out of your darts directory:
+# docker build . -t darts-test:latest
+# docker run -it -v $(pwd)/:/app/ darts-test:latest bash
\ No newline at end of file
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 8786ecbcc1..1ef940a6c9 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1647,6 +1647,15 @@ def test_component_specific_lags(self, config):
         series = tg.gaussian_timeseries(length=20, column_name="gaussian")
         if multivar_target:
             series = series.stack(tg.sine_timeseries(length=20, column_name="sine"))
+
+        future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
+        if multivar_future_cov:
+            future_cov = future_cov.stack(
+                tg.sine_timeseries(length=30, column_name="sine_future")
+            )
+
+        past_cov = tg.linear_timeseries(length=30, column_name="lin_past")
+
         if multiple_series:
             # second series have different component names
             series = [
@@ -1658,13 +1667,9 @@ def test_component_specific_lags(self, config):
                 + 10,
             ]
 
-        future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
-        if multivar_future_cov:
-            future_cov = future_cov.stack(
-                tg.sine_timeseries(length=30, column_name="sine_future")
-            )
+            past_cov = [past_cov, past_cov]
 
-        past_cov = tg.linear_timeseries(length=30, column_name="lin_past")
+            future_cov = [future_cov, future_cov]
 
         # the lags are identical across the components for each series
         model = LinearRegressionModel(**list_lags)
@@ -1683,14 +1688,50 @@ def test_component_specific_lags(self, config):
         )
 
         # n == output_chunk_length
-        pred = model.predict(1, series=series[0] if multiple_series else None)
-        pred2 = model2.predict(1, series=series[0] if multiple_series else None)
+        pred = model.predict(
+            1,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model.supports_future_covariates
+            else None,
+        )
+        pred2 = model2.predict(
+            1,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model2.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model2.supports_future_covariates
+            else None,
+        )
         np.testing.assert_array_almost_equal(pred.values(), pred2.values())
         assert pred.time_index.equals(pred2.time_index)
 
         # n > output_chunk_length
-        pred = model.predict(3, series=series[0] if multiple_series else None)
-        pred2 = model2.predict(3, series=series[0] if multiple_series else None)
+        pred = model.predict(
+            3,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model.supports_future_covariates
+            else None,
+        )
+        pred2 = model2.predict(
+            3,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model2.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model2.supports_future_covariates
+            else None,
+        )
         np.testing.assert_array_almost_equal(pred.values(), pred2.values())
         assert pred.time_index.equals(pred2.time_index)
 

From d987141279b2188fecea11ec8702f69cc885a8d5 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 31 Aug 2023 14:17:41 +0200
Subject: [PATCH 22/30] fix: moved the series components consistency to
 create_lagged_data to limit iteration of the series

---
 darts/models/forecasting/forecasting_model.py | 14 ----------
 darts/models/forecasting/regression_model.py  | 26 +++++++++++++++----
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index a848674570..452d2368cd 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -2078,20 +2078,6 @@ def fit(
             ):
                 self.static_covariates = series.static_covariates
         else:
-            # check that all the ts within one group have the same number of components
-            for ts_sequence, cov_name in zip(
-                [series, past_covariates, future_covariates],
-                ["series", "past_covariates", "future_covariates"],
-            ):
-                raise_if(
-                    ts_sequence is not None
-                    and not all(
-                        [ts_sequence[0].width == ts.width for ts in ts_sequence]
-                    ),
-                    f"All the series in `{cov_name}` should have the same number of components",
-                    logger,
-                )
-
             if past_covariates is not None:
                 self._expect_past_covariates = True
             if future_covariates is not None:
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 6b0c52842a..164817c2fa 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -456,7 +456,11 @@ def _get_last_prediction_time(self, series, forecast_horizon, overlap_end):
         return last_valid_pred_time
 
     def _create_lagged_data(
-        self, target_series, past_covariates, future_covariates, max_samples_per_ts
+        self,
+        target_series: Sequence[TimeSeries],
+        past_covariates: Sequence[TimeSeries],
+        future_covariates: Sequence[TimeSeries],
+        max_samples_per_ts: int,
     ):
         (
             features,
@@ -479,7 +483,19 @@ def _create_lagged_data(
             concatenate=False,
         )
 
+        expected_nb_feat = (
+            features[0].shape[1]
+            if isinstance(features, Sequence)
+            else features.shape[1]
+        )
         for i, (X_i, y_i) in enumerate(zip(features, labels)):
+            # number of components inconsistency, cannot determine from which argument without iterating
+            raise_if(
+                expected_nb_feat != X_i.shape[1],
+                "When `series`, `past_covariates` or `future_covariates` is provided as a `Sequence[TimeSeries]`, "
+                "all the `TimeSeries` in the `Sequence` must have the same number of components.",
+                logger,
+            )
             features[i] = X_i[:, :, 0]
             labels[i] = y_i[:, :, 0]
 
@@ -490,10 +506,10 @@ def _create_lagged_data(
 
     def _fit_model(
         self,
-        target_series,
-        past_covariates,
-        future_covariates,
-        max_samples_per_ts,
+        target_series: Sequence[TimeSeries],
+        past_covariates: Sequence[TimeSeries],
+        future_covariates: Sequence[TimeSeries],
+        max_samples_per_ts: int,
         **kwargs,
     ):
         """

From 70467cf7fa4972f230bcb731a86bc8ff9bf011eb Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 31 Aug 2023 15:27:18 +0200
Subject: [PATCH 23/30] fix: improved the error message for components
 inconsistency, improve tests parametrization

---
 darts/models/forecasting/regression_model.py  | 21 +++++---
 .../forecasting/test_regression_models.py     | 48 ++++++++++++++-----
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 164817c2fa..00310d0ea9 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -489,13 +489,20 @@ def _create_lagged_data(
             else features.shape[1]
         )
         for i, (X_i, y_i) in enumerate(zip(features, labels)):
-            # number of components inconsistency, cannot determine from which argument without iterating
-            raise_if(
-                expected_nb_feat != X_i.shape[1],
-                "When `series`, `past_covariates` or `future_covariates` is provided as a `Sequence[TimeSeries]`, "
-                "all the `TimeSeries` in the `Sequence` must have the same number of components.",
-                logger,
-            )
+            # TODO: account for scenario where two wrong shapes can silently hide the problem
+            if expected_nb_feat != X_i.shape[1]:
+                shape_error_msg = []
+                for ts, cov_name, arg_name in zip(
+                    [target_series, past_covariates, future_covariates],
+                    ["target", "past", "future"],
+                    ["series", "past_covariates", "future_covariates"],
+                ):
+                    if ts is not None and ts[i].width != self.input_dim[cov_name]:
+                        shape_error_msg.append(
+                            f"Expected {self.input_dim[cov_name]} components but received "
+                            f"{target_series[i].width} components at index {i} of `{arg_name}`."
+                        )
+                raise_log(ValueError("\n".join(shape_error_msg)), logger)
             features[i] = X_i[:, :, 0]
             labels[i] = y_i[:, :, 0]
 
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 1ef940a6c9..c6446ae426 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -27,7 +27,6 @@
     RegressionModel,
     XGBModel,
 )
-from darts.models.forecasting.forecasting_model import GlobalForecastingModel
 from darts.utils import timeseries_generation as tg
 from darts.utils.multioutput import MultiOutputRegressor
 
@@ -1019,11 +1018,33 @@ def test_models_runnability(self, config):
     def test_fit(self, config):
         # test fitting both on univariate and multivariate timeseries
         model, mode, series = config
+        # auto-regression but past_covariates does not extend enough in the future
         with pytest.raises(ValueError):
             model_instance = model(lags=4, lags_past_covariates=4, multi_models=mode)
             model_instance.fit(series=series, past_covariates=self.sine_multivariate1)
             model_instance.predict(n=10)
 
+        # inconsistent number of components in series Sequence[TimeSeries]
+        with pytest.raises(ValueError) as err:
+            model_instance = model(lags=4, multi_models=mode)
+            model_instance.fit(series=[series.stack(series + 10), series])
+            assert (
+                str(err.value)
+                == "Expected 2 components but received 1 components at index 1 of `series`"
+            )
+
+        # inconsistent number of components in past_covariates Sequence[TimeSeries]
+        with pytest.raises(ValueError) as err:
+            model_instance = model(lags=4, lags_past_covariates=2, multi_models=mode)
+            model_instance.fit(
+                series=[series, series + 10],
+                past_covariates=[self.sine_univariate1, self.sine_multivariate1],
+            )
+            assert (
+                str(err.value)
+                == "Expected 1 components but received 2 components at index 1 of `past_covariates`"
+            )
+
         model_instance = model(lags=12, multi_models=mode)
         model_instance.fit(series=series)
         assert model_instance.lags.get("past") is None
@@ -2468,29 +2489,34 @@ def test_fit_predict_determinism(self, config):
     @pytest.mark.parametrize(
         "config", itertools.product(models_cls_kwargs_errs, [True, False])
     )
-    def test_probabilistic_forecast_accuracy(self, config):
+    def test_probabilistic_forecast_accuracy_univariate(self, config):
         (model_cls, model_kwargs, err), mode = config
         model_kwargs["multi_models"] = mode
+        model = model_cls(**model_kwargs)
         self.helper_test_probabilistic_forecast_accuracy(
-            model_cls,
-            model_kwargs,
+            model,
             err,
             self.constant_ts,
             self.constant_noisy_ts,
         )
-        if issubclass(model_cls, GlobalForecastingModel):
+
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "config", itertools.product(models_cls_kwargs_errs, [True, False])
+    )
+    def test_probabilistic_forecast_accuracy_multivariate(self, config):
+        (model_cls, model_kwargs, err), mode = config
+        model_kwargs["multi_models"] = mode
+        model = model_cls(**model_kwargs)
+        if model.supports_multivariate:
             self.helper_test_probabilistic_forecast_accuracy(
-                model_cls,
-                model_kwargs,
+                model,
                 err,
                 self.constant_multivar_ts,
                 self.constant_noisy_multivar_ts,
             )
 
-    def helper_test_probabilistic_forecast_accuracy(
-        self, model_cls, model_kwargs, err, ts, noisy_ts
-    ):
-        model = model_cls(**model_kwargs)
+    def helper_test_probabilistic_forecast_accuracy(self, model, err, ts, noisy_ts):
         model.fit(noisy_ts[:100])
         pred = model.predict(n=100, num_samples=100)
 

From f2a9e0802bcd670310873ee3f26c089207f0ec63 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 1 Sep 2023 18:01:52 +0200
Subject: [PATCH 24/30] fix: addressing reviewer comments

---
 darts/models/forecasting/regression_model.py  | 43 ++++++++-----------
 .../explainability/test_shap_explainer.py     | 14 +++++-
 .../forecasting/test_regression_models.py     | 24 ++++++-----
 3 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 00310d0ea9..497de459fb 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -229,24 +229,9 @@ def _generate_lags(
             if lags_values is None:
                 continue
 
-            # check type of argument before converting to dictionary
+            # converting to dictionary to run sanity checks
             if not isinstance(lags_values, dict):
-                raise_if(
-                    lags_name == "lags_future_covariates"
-                    and not isinstance(lags_values, (tuple, list)),
-                    f"`lags_future_covariates` must be of type tuple, list or dict."
-                    f"Given: {type(lags_values)}.",
-                )
-
-                raise_if(
-                    lags_name in ["lags", "lags_past_covariates"]
-                    and not isinstance(lags_values, (int, list)),
-                    f"`{lags_name}` must be of type int, list or dict."
-                    f"Given: {type(lags_values)}.",
-                )
-
                 lags_values = {"default_lags": lags_values}
-
             elif len(lags_values) == 0:
                 raise_log(
                     ValueError(
@@ -267,24 +252,26 @@ def _generate_lags(
                             len(comp_lags) == 2
                             and isinstance(comp_lags[0], int)
                             and isinstance(comp_lags[1], int),
-                            f"`{lags_name}` tuple must be of length 2, and must contain two integers",
+                            f"`{lags_name}` - `{comp_name}`: tuple must be of length 2, and must contain two integers",
                             logger,
                         )
 
                         raise_if(
                             isinstance(comp_lags[0], bool)
                             or isinstance(comp_lags[1], bool),
-                            f"`{lags_name}` tuple must contain integers, not bool",
+                            f"`{lags_name}` - `{comp_name}`: tuple must contain integers, not bool",
                             logger,
                         )
 
                         raise_if_not(
                             comp_lags[0] >= 0 and comp_lags[1] >= 0,
-                            f"`{lags_name}` tuple must contain positive integers. Given: {comp_lags}.",
+                            f"`{lags_name}` - `{comp_name}`: tuple must contain positive integers. Given: {comp_lags}.",
+                            logger,
                         )
                         raise_if(
                             comp_lags[0] == 0 and comp_lags[1] == 0,
-                            f"`{lags_name}` tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
+                            f"`{lags_name}` - `{comp_name}`: tuple cannot be (0, 0) as it corresponds to an empty "
+                            f"list of lags.",
                             logger,
                         )
                         tmp_components_lags[comp_name] = list(
@@ -294,7 +281,8 @@ def _generate_lags(
                         for lag in comp_lags:
                             raise_if(
                                 not isinstance(lag, int) or isinstance(lag, bool),
-                                f"`{lags_name}` list must contain only integers. Given: {comp_lags}.",
+                                f"`{lags_name}` - `{comp_name}`: list must contain only integers. Given: {comp_lags}.",
+                                logger,
                             )
                         tmp_components_lags[comp_name] = sorted(comp_lags)
                     else:
@@ -304,14 +292,17 @@ def _generate_lags(
                     if isinstance(comp_lags, int):
                         raise_if_not(
                             comp_lags > 0,
-                            f"`{lags_name}` integer must be strictly positive . Given: {comp_lags}.",
+                            f"`{lags_name}` - `{comp_name}`: integer must be strictly positive . Given: {comp_lags}.",
+                            logger,
                         )
                         tmp_components_lags[comp_name] = list(range(-comp_lags, 0))
                     elif isinstance(comp_lags, list):
                         for lag in comp_lags:
                             raise_if(
                                 not isinstance(lag, int) or (lag >= 0),
-                                f"`{lags_name}` list must contain only strictly negative integers. Given: {comp_lags}.",
+                                f"`{lags_name}` - `{comp_name}`: list must contain only strictly negative integers. "
+                                f"Given: {comp_lags}.",
+                                logger,
                             )
                         tmp_components_lags[comp_name] = sorted(comp_lags)
                     else:
@@ -321,8 +312,8 @@ def _generate_lags(
                 if invalid_type:
                     raise_log(
                         ValueError(
-                            f"When passed in a dictionary, `{lags_name}` for component {comp_name} must be either a "
-                            f"{supported_types}, received : {type(comp_lags)}."
+                            f"`{lags_name}` - `{comp_name}`: must be either a {supported_types}. "
+                            f"Gived : {type(comp_lags)}."
                         ),
                         logger,
                     )
@@ -500,7 +491,7 @@ def _create_lagged_data(
                     if ts is not None and ts[i].width != self.input_dim[cov_name]:
                         shape_error_msg.append(
                             f"Expected {self.input_dim[cov_name]} components but received "
-                            f"{target_series[i].width} components at index {i} of `{arg_name}`."
+                            f"{ts[i].width} components at index {i} of `{arg_name}`."
                         )
                 raise_log(ValueError("\n".join(shape_error_msg)), logger)
             features[i] = X_i[:, :, 0]
diff --git a/darts/tests/explainability/test_shap_explainer.py b/darts/tests/explainability/test_shap_explainer.py
index 2ea6e61f97..a5e950adb4 100644
--- a/darts/tests/explainability/test_shap_explainer.py
+++ b/darts/tests/explainability/test_shap_explainer.py
@@ -830,13 +830,25 @@ def test_shap_regressor_component_specific_lags(self):
         shap_explain = ShapExplainer(model)
 
         # one column per lag, grouped by components
+        expected_columns = [
+            "price_target_lag-3",
+            "price_target_lag-2",
+            "power_target_lag-1",
+        ]
         expected_df = pd.DataFrame(
             data=np.stack(
                 [np.arange(1, 29), np.arange(3, 31), np.arange(106, 161, 2)], axis=1
             ),
-            columns=["price_target_lag-3", "price_target_lag-2", "power_target_lag-1"],
+            columns=expected_columns,
         )
 
         # check that the appropriate lags are extracted
         assert all(shap_explain.explainers.background_X == expected_df)
         assert model.lagged_feature_names == list(expected_df.columns)
+
+        # check that explain() can be called
+        explanation_results = shap_explain.explain()
+        plt.close()
+        for comp in ts.components:
+            comps_out = explanation_results.explained_forecasts[1][comp].columns
+            assert all(comps_out == expected_columns)
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index c6446ae426..09afd4a773 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1025,25 +1025,29 @@ def test_fit(self, config):
             model_instance.predict(n=10)
 
         # inconsistent number of components in series Sequence[TimeSeries]
+        training_series = [series.stack(series + 10), series]
         with pytest.raises(ValueError) as err:
             model_instance = model(lags=4, multi_models=mode)
-            model_instance.fit(series=[series.stack(series + 10), series])
-            assert (
-                str(err.value)
-                == "Expected 2 components but received 1 components at index 1 of `series`"
-            )
+            model_instance.fit(series=training_series)
+        assert (
+            str(err.value)
+            == f"Expected {training_series[0].width} components but received {training_series[1].width} "
+            f"components at index 1 of `series`."
+        )
 
         # inconsistent number of components in past_covariates Sequence[TimeSeries]
+        training_past_covs = [series, series.stack(series * 2)]
         with pytest.raises(ValueError) as err:
             model_instance = model(lags=4, lags_past_covariates=2, multi_models=mode)
             model_instance.fit(
                 series=[series, series + 10],
-                past_covariates=[self.sine_univariate1, self.sine_multivariate1],
-            )
-            assert (
-                str(err.value)
-                == "Expected 1 components but received 2 components at index 1 of `past_covariates`"
+                past_covariates=training_past_covs,
             )
+        assert (
+            str(err.value)
+            == f"Expected {training_past_covs[0].width} components but received {training_past_covs[1].width} "
+            f"components at index 1 of `past_covariates`."
+        )
 
         model_instance = model(lags=12, multi_models=mode)
         model_instance.fit(series=series)

From f0967f65b36425fd1d2a9901c755d09392bf3deb Mon Sep 17 00:00:00 2001
From: madtoinou <32447896+madtoinou@users.noreply.github.com>
Date: Fri, 1 Sep 2023 18:04:20 +0200
Subject: [PATCH 25/30] Apply suggestions from code review

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 CHANGELOG.md                                             | 2 +-
 darts/models/forecasting/regression_model.py             | 4 +---
 darts/tests/models/forecasting/test_regression_models.py | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 785147bdff..e9a5186fbd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
 - Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` that allows to use an integer `start` either as the index position or index value/label for `series` indexed with a `pd.RangeIndex`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
 - Reduced the size of the Darts docker image `unit8/darts:latest`, and included all optional models as well as dev requirements. [#1878](https://github.com/unit8co/darts/pull/1878) by [Alex Colpitts](https://github.com/alexcolpitts96). 
-- `RegressionModel` can now be created with different lags for each component of the target and past/future covariates series. [#1962](https://github.com/unit8co/darts/pull/1962) by [Antoine Madrona](https://github.com/madtoinou).
+- All `RegressionModel`s now support component/column-specific lags for target, past, and future covariates series. [#1962](https://github.com/unit8co/darts/pull/1962) by [Antoine Madrona](https://github.com/madtoinou).
 
 **Fixed**
 - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 497de459fb..e765201a6b 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -193,14 +193,12 @@ def encode_year(idx):
         )
 
         # convert lags arguments to list of int
-        processed_lags, processed_component_lags = self._generate_lags(
+        self.lags, self.component_lags = self._generate_lags(
             lags=lags,
             lags_past_covariates=lags_past_covariates,
             lags_future_covariates=lags_future_covariates,
         )
 
-        self.lags = processed_lags
-        self.component_lags = processed_component_lags
 
         self.pred_dim = self.output_chunk_length if self.multi_models else 1
 
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 09afd4a773..9149239dc4 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -545,7 +545,7 @@ def test_training_data_creation(self, mode):
         # cannot use 'default_lags' because it's converted in `fit()`, before calling `_created_lagged_data`
         model_instance = RegressionModel(
             lags={"0-trgt-0": [-4, -3], "0-trgt-1": [-3, -2], "0-trgt-2": [-2, -1]},
-            lags_past_covariates={"0-pcov-0": [-10], "0-pvoc-1": [-7]},
+            lags_past_covariates={"0-pcov-0": [-10], "0-pcov-1": [-7]},
             lags_future_covariates={"0-fcov-0": (2, 2)},
             multi_models=mode,
         )

From be536952423c1977f98aa42f06d0b5ce87f2ec11 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 4 Sep 2023 09:18:12 +0200
Subject: [PATCH 26/30] test: checking that the name of the features is
 correctly generated when using dict to define the lags

---
 .../forecasting/test_regression_models.py     | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 9149239dc4..9ce0a4fa85 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -590,6 +590,27 @@ def test_training_data_creation(self, mode):
             20098,  # future cov; target + 20'000
         ]
 
+        # checking the name of the lagged features
+        model_instance.fit(
+            series=self.target_series[0],
+            past_covariates=self.past_covariates[0],
+            future_covariates=self.future_covariates[0],
+        )
+        assert model_instance.lagged_feature_names == [
+            "0-trgt-0_target_lag-4",
+            "0-trgt-0_target_lag-3",
+            "0-trgt-1_target_lag-3",
+            "0-trgt-1_target_lag-2",
+            "0-trgt-2_target_lag-2",
+            "0-trgt-2_target_lag-1",
+            "0-pcov-0_pastcov_lag-10",
+            "0-pcov-1_pastcov_lag-7",
+            "0-fcov-0_futcov_lag-2",
+            "0-fcov-0_futcov_lag-1",
+            "0-fcov-0_futcov_lag0",
+            "0-fcov-0_futcov_lag1",
+        ]
+
     @pytest.mark.parametrize("mode", [True, False])
     def test_prediction_data_creation(self, mode):
         # assigning correct names to variables

From 1b2bd4c481a9a3e438f796930ce79e6d330f9fe3 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 4 Sep 2023 09:19:57 +0200
Subject: [PATCH 27/30] fix: linting

---
 darts/models/forecasting/regression_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index e765201a6b..539f74bc41 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -199,7 +199,6 @@ def encode_year(idx):
             lags_future_covariates=lags_future_covariates,
         )
 
-
         self.pred_dim = self.output_chunk_length if self.multi_models else 1
 
     def _generate_lags(

From 1ea2c7f0a92885d78062f5e6f763efd0c688188f Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Mon, 4 Sep 2023 11:23:53 +0200
Subject: [PATCH 28/30] fix: updating the error msg

---
 .../tabularization/test_create_lagged_prediction_data.py      | 4 ++--
 .../utils/tabularization/test_create_lagged_training_data.py  | 4 ++--
 darts/tests/utils/tabularization/test_get_feature_times.py    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/darts/tests/utils/tabularization/test_create_lagged_prediction_data.py b/darts/tests/utils/tabularization/test_create_lagged_prediction_data.py
index 3c46330022..4bff71fbe9 100644
--- a/darts/tests/utils/tabularization/test_create_lagged_prediction_data.py
+++ b/darts/tests/utils/tabularization/test_create_lagged_prediction_data.py
@@ -1396,7 +1396,7 @@ def test_lagged_prediction_data_invalid_lag_values_error(self):
                     use_moving_windows=use_moving_windows,
                 )
             assert (
-                "`lags` must be a `Sequence` containing only `int` values less than 0."
+                "`lags` must be a `Sequence` or `Dict` containing only `int` values less than 0."
             ) == str(err.value)
             # Test invalid `lags_past_covariates` values:
             with pytest.raises(ValueError) as err:
@@ -1407,7 +1407,7 @@ def test_lagged_prediction_data_invalid_lag_values_error(self):
                     use_moving_windows=use_moving_windows,
                 )
             assert (
-                "`lags_past_covariates` must be a `Sequence` containing only `int` values less than 0."
+                "`lags_past_covariates` must be a `Sequence` or `Dict` containing only `int` values less than 0."
             ) == str(err.value)
             # This should *not* throw an error:
             create_lagged_prediction_data(
diff --git a/darts/tests/utils/tabularization/test_create_lagged_training_data.py b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
index b17a3f862c..98f515e545 100644
--- a/darts/tests/utils/tabularization/test_create_lagged_training_data.py
+++ b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
@@ -1695,7 +1695,7 @@ def test_lagged_training_data_invalid_lag_values_error(self):
                     use_moving_windows=use_moving_windows,
                 )
             assert (
-                "`lags` must be a `Sequence` containing only `int` values less than 0."
+                "`lags` must be a `Sequence` or `Dict` containing only `int` values less than 0."
             ) == str(err.value)
             # Test invalid `lags_past_covariates` values:
             with pytest.raises(ValueError) as err:
@@ -1708,7 +1708,7 @@ def test_lagged_training_data_invalid_lag_values_error(self):
                     use_moving_windows=use_moving_windows,
                 )
             assert (
-                "`lags_past_covariates` must be a `Sequence` containing only `int` values less than 0."
+                "`lags_past_covariates` must be a `Sequence` or `Dict` containing only `int` values less than 0."
             ) == str(err.value)
             # Test invalid `lags_future_covariates` values:
             create_lagged_training_data(
diff --git a/darts/tests/utils/tabularization/test_get_feature_times.py b/darts/tests/utils/tabularization/test_get_feature_times.py
index 6402fc2d32..e63a8e4057 100644
--- a/darts/tests/utils/tabularization/test_get_feature_times.py
+++ b/darts/tests/utils/tabularization/test_get_feature_times.py
@@ -1055,7 +1055,7 @@ def test_feature_times_invalid_lag_values_error(self):
         with pytest.raises(ValueError) as err:
             _get_feature_times(target_series=series, lags=[0], is_training=False)
         assert (
-            "`lags` must be a `Sequence` containing only `int` values less than 0."
+            "`lags` must be a `Sequence` or `Dict` containing only `int` values less than 0."
         ) == str(err.value)
         # `lags_past_covariates` not <= -1:
         with pytest.raises(ValueError) as err:
@@ -1063,7 +1063,7 @@ def test_feature_times_invalid_lag_values_error(self):
                 past_covariates=series, lags_past_covariates=[0], is_training=False
             )
         assert (
-            "`lags_past_covariates` must be a `Sequence` containing only `int` values less than 0."
+            "`lags_past_covariates` must be a `Sequence` or `Dict` containing only `int` values less than 0."
         ) == str(err.value)
         # `lags_future_covariates` can be positive, negative, and/or zero - no error should be thrown:
         _get_feature_times(

From 970d8a3952a42d7d7b40c8ac293b7692d4d0d224 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 14 Sep 2023 10:19:15 +0200
Subject: [PATCH 29/30] fix: bug when the number of lags is different across
 components

---
 darts/models/forecasting/regression_model.py  |  8 +-
 .../forecasting/test_regression_models.py     | 98 ++++++++++++++++++-
 2 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 539f74bc41..2d052eafbf 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -914,7 +914,9 @@ def predict(
                     ]
                     # values are grouped by component
                     np_X.append(
-                        np.concatenate(tmp_X).reshape(len(series) * num_samples, -1)
+                        np.concatenate(tmp_X, axis=1).reshape(
+                            len(series) * num_samples, -1
+                        )
                     )
                 else:
                     # values are grouped by lags
@@ -943,7 +945,9 @@ def predict(
                             )
                         ]
                         np_X.append(
-                            np.concatenate(tmp_X).reshape(len(series) * num_samples, -1)
+                            np.concatenate(tmp_X, axis=1).reshape(
+                                len(series) * num_samples, -1
+                            )
                         )
                     else:
                         np_X.append(
diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 9ce0a4fa85..852fa4adec 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1680,7 +1680,7 @@ def test_integer_indexed_series(self, mode):
             [True, False],
         ),
     )
-    def test_component_specific_lags(self, config):
+    def test_component_specific_lags_forecasts(self, config):
         """Verify that the same lags, defined using int/list or dictionnaries yield the same results"""
         (list_lags, dict_lags), multiple_series = config
         multivar_target = "lags" in dict_lags and len(dict_lags["lags"]) > 1
@@ -1712,9 +1712,7 @@ def test_component_specific_lags(self, config):
                 )
                 + 10,
             ]
-
             past_cov = [past_cov, past_cov]
-
             future_cov = [future_cov, future_cov]
 
         # the lags are identical across the components for each series
@@ -1781,6 +1779,100 @@ def test_component_specific_lags(self, config):
         np.testing.assert_array_almost_equal(pred.values(), pred2.values())
         assert pred.time_index.equals(pred2.time_index)
 
+    @pytest.mark.parametrize(
+        "config",
+        itertools.product(
+            [
+                {"lags": {"gaussian": [-1, -3], "sine": [-2, -4, -6]}},
+                {"lags_past_covariates": {"default_lags": 2}},
+                {
+                    "lags": {
+                        "gaussian": [-5, -2, -1],
+                        "sine": [-2, -1],
+                    },
+                    "lags_future_covariates": {
+                        "lin_future": (1, 4),
+                        "default_lags": (2, 2),
+                    },
+                },
+                {
+                    "lags": {
+                        "default_lags": [-5, -4],
+                    },
+                    "lags_future_covariates": {
+                        "sine_future": (1, 1),
+                        "default_lags": [-2, 4, 6, 7, 8],
+                    },
+                },
+            ],
+            [True, False],
+        ),
+    )
+    def test_component_specific_lags(self, config):
+        """Checking various combination of component-specific lags"""
+        (dict_lags, multiple_series) = config
+        multivar_target = "lags" in dict_lags and len(dict_lags["lags"]) > 1
+        multivar_future_cov = (
+            "lags_future_covariates" in dict_lags
+            and len(dict_lags["lags_future_covariates"]) > 1
+        )
+
+        # create series based on the model parameters
+        series = tg.gaussian_timeseries(length=20, column_name="gaussian")
+        if multivar_target:
+            series = series.stack(tg.sine_timeseries(length=20, column_name="sine"))
+
+        future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
+        if multivar_future_cov:
+            future_cov = future_cov.stack(
+                tg.sine_timeseries(length=30, column_name="sine_future")
+            )
+
+        past_cov = tg.linear_timeseries(length=30, column_name="lin_past")
+
+        if multiple_series:
+            # second series have different component names
+            series = [
+                series,
+                series.with_columns_renamed(
+                    ["gaussian", "sine"][: series.width],
+                    ["other", "names"][: series.width],
+                )
+                + 10,
+            ]
+            past_cov = [past_cov, past_cov]
+            future_cov = [future_cov, future_cov]
+
+        model = LinearRegressionModel(**dict_lags, output_chunk_length=4)
+        model.fit(
+            series=series,
+            past_covariates=past_cov if model.supports_past_covariates else None,
+            future_covariates=future_cov if model.supports_future_covariates else None,
+        )
+        # n < output_chunk_length
+        model.predict(
+            1,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model.supports_future_covariates
+            else None,
+        )
+
+        # n > output_chunk_length
+        model.predict(
+            7,
+            series=series[0] if multiple_series else None,
+            past_covariates=past_cov[0]
+            if multiple_series and model.supports_past_covariates
+            else None,
+            future_covariates=future_cov[0]
+            if multiple_series and model.supports_future_covariates
+            else None,
+        )
+
     @pytest.mark.parametrize(
         "config",
         itertools.product(

From edf855461750d750cf2cb65cf5a96b5483a32984 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 14 Sep 2023 10:30:04 +0200
Subject: [PATCH 30/30] fix: future lags in test

---
 darts/tests/models/forecasting/test_regression_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
index 6e2566a627..9d5c369526 100644
--- a/darts/tests/models/forecasting/test_regression_models.py
+++ b/darts/tests/models/forecasting/test_regression_models.py
@@ -1811,7 +1811,7 @@ def test_component_specific_lags_forecasts(self, config):
                     },
                     "lags_future_covariates": {
                         "sine_future": (1, 1),
-                        "default_lags": [-2, 4, 6, 7, 8],
+                        "default_lags": [-2, 0, 1, 2],
                     },
                 },
             ],