Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/specify lags per component for RegressionModel #1962

Merged
merged 38 commits into from
Sep 14, 2023
Merged
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
cfd381b
feat: updated lags sanity checks to accept dictionnary
madtoinou Aug 18, 2023
b3ce1f1
fix: better management of corner cases during lags checks
madtoinou Aug 18, 2023
2dde70f
fix: improved modularity
madtoinou Aug 18, 2023
65c82a7
fix: simplified the logic a bit
madtoinou Aug 18, 2023
9c5b312
feat: when generating lagged data, the values can be extracted using …
madtoinou Aug 18, 2023
753db5b
feat: raise error if all the ts in target/past/future don't have the …
madtoinou Aug 18, 2023
0cdeee7
feat: added support for component-specific lags in fit() and predict()
madtoinou Aug 21, 2023
f24ea84
test: added tests and fix some bug accordingly
madtoinou Aug 21, 2023
01b8409
feat: component-wise lags support encoders, improved sanity checks
madtoinou Aug 21, 2023
a671af8
feat: possibility to declare default lags for all the not specified c…
madtoinou Aug 23, 2023
2aa96a4
test: adding a test for the lagged data creation
madtoinou Aug 23, 2023
c3133b2
fix: typo
madtoinou Aug 23, 2023
41f30ec
Merge branch 'master' into feat/lags_per_component
madtoinou Aug 25, 2023
646b671
fix: adressing review comments
madtoinou Aug 25, 2023
3221f86
Apply suggestions from code review
madtoinou Aug 25, 2023
3254db3
refactor: lags argument are converted to dict before running the type…
madtoinou Aug 28, 2023
269005e
refactor: lags argument are converted to dict before running the type…
madtoinou Aug 28, 2023
bcd4455
doc: improved documentation of the component-specific lags in tabular…
madtoinou Aug 28, 2023
b859d9a
test: adding a test for the multivariate scenario
madtoinou Aug 28, 2023
c0121a5
test: checking the appriopriate lags are extracted by the shap explainer
madtoinou Aug 29, 2023
d682f13
fix: shapexplainer extract the appropriate lags, updated the type hints
madtoinou Aug 29, 2023
9db7f73
Merge branch 'master' into feat/lags_per_component
madtoinou Aug 31, 2023
96f1a7f
fix: passing covariates when trained on multiple series
madtoinou Aug 31, 2023
07f0f83
Merge branch 'master' into feat/lags_per_component
madtoinou Aug 31, 2023
d987141
fix: moved the series components consistency to create_lagged_data to…
madtoinou Aug 31, 2023
70467cf
fix: improved the error message for components inconsistency, improve…
madtoinou Aug 31, 2023
da735a2
Merge branch 'master' into feat/lags_per_component
madtoinou Sep 1, 2023
f2a9e08
fix: addressing reviewer comments
madtoinou Sep 1, 2023
f0967f6
Apply suggestions from code review
madtoinou Sep 1, 2023
be53695
test: checking that the name of the features is correctly generated w…
madtoinou Sep 4, 2023
b23da55
Merge branch 'master' into feat/lags_per_component
madtoinou Sep 4, 2023
1b2bd4c
fix: linting
madtoinou Sep 4, 2023
1ea2c7f
fix: updating the error msg
madtoinou Sep 4, 2023
0624f86
Merge branch 'master' into feat/lags_per_component
madtoinou Sep 6, 2023
970d8a3
fix: bug when the number of lags is different across components
madtoinou Sep 14, 2023
37c6b26
Merge branch 'master' into feat/lags_per_component
madtoinou Sep 14, 2023
edf8554
fix: future lags in test
madtoinou Sep 14, 2023
1235e59
Merge branch 'master' into feat/lags_per_component
madtoinou Sep 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
test: added tests and fix some bug accordingly
madtoinou committed Aug 21, 2023
commit f24ea84756923cebfcc0d54c948e3cc6e70d3565
40 changes: 20 additions & 20 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
@@ -228,15 +228,15 @@ def _set_lags(

def _check_int_lags(lags: int, lags_name: str) -> List[int]:
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
raise_if_not(
lags > 0, f"{lags_name} must be strictly positive. Given: {lags}."
lags > 0, f"`{lags_name}` must be strictly positive. Given: {lags}."
)
return list(range(-lags, 0))

def _check_list_lags(lags: list, lags_name: str) -> List[int]:
for lag in lags:
raise_if(
not isinstance(lag, int) or (lag >= 0),
f"Every element of {lags_name} must be a strictly negative integer. Given: {lags}.",
f"Every element of `{lags_name}` must be a strictly negative integer. Given: {lags}.",
)
return sorted(lags)

@@ -245,11 +245,11 @@ def _check_tuple_future_lags(
) -> List[int]:
raise_if_not(
lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0,
f"{lags_name} tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
f"`{lags_name}` tuple must contain stricly positibe integers. Given: {lags_future_covariates}.",
)
raise_if(
lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0,
f"{lags_name} tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
f"`{lags_name}` tuple cannot be (0, 0) as it corresponds to an empty list of lags.",
logger,
)
return list(range(-lags_future_covariates[0], lags_future_covariates[1]))
@@ -260,7 +260,7 @@ def _check_list_future_lags(
for lag in lags_future_covariates:
raise_if(
not isinstance(lag, int) or isinstance(lag, bool),
f"Every element of {lags_name} must be an integer. Given: {lags_future_covariates}.",
f"Every element of `{lags_name}` must be an integer. Given: {lags_future_covariates}.",
)
return sorted(lags_future_covariates)

@@ -270,7 +270,7 @@ def _check_dict_lags(

raise_if_not(
len(lags) > 0,
f"When passed as a dictionnary, {lags_name} must contain at least one key.",
f"When passed as a dictionnary, `{lags_name}` must contain at least one key.",
logger,
)

@@ -284,23 +284,23 @@ def _check_dict_lags(
if lags_name == "lags_future_covariates":
if isinstance(comp_lags, tuple):
components_lags[comp_name] = _check_tuple_future_lags(
comp_lags, f"{lags_name} for component {comp_name}"
comp_lags, f"`{lags_name}` for component {comp_name}"
)
elif isinstance(comp_lags, list):
components_lags[comp_name] = _check_list_future_lags(
comp_lags, f"{lags_name} for component {comp_name}"
comp_lags, f"`{lags_name}` for component {comp_name}"
)
else:
invalid_type = True
supported_types = "tuple or a list"
else:
if isinstance(comp_lags, int):
components_lags[comp_name] = _check_int_lags(
comp_lags, f"{lags_name} for component {comp_name}"
comp_lags, f"`{lags_name}` for component {comp_name}"
)
elif isinstance(comp_lags, list):
components_lags[comp_name] = _check_list_lags(
comp_lags, f"{lags_name} for component {comp_name}"
comp_lags, f"`{lags_name}` for component {comp_name}"
)
else:
invalid_type = True
@@ -309,7 +309,7 @@ def _check_dict_lags(
if invalid_type:
raise_log(
ValueError(
f"When passed as a dictionnary, {lags_name} for component {comp_name} must be either a "
f"When passed as a dictionnary, `{lags_name}` for component {comp_name} must be either a "
f"{supported_types}, received : {type(comp_lags)}."
),
logger,
@@ -328,11 +328,11 @@ def _check_dict_lags(

# perform the type and sanity checks
if isinstance(lags, int):
self.lags["target"] = _check_int_lags(lags, "`lags`")
self.lags["target"] = _check_int_lags(lags, "lags")
elif isinstance(lags, list):
self.lags["target"] = _check_list_lags(lags, "`lags`")
self.lags["target"] = _check_list_lags(lags, "lags")
elif isinstance(lags, dict):
conv_lags = _check_dict_lags(lags, "`lags`")
conv_lags = _check_dict_lags(lags, "lags")
if conv_lags is not None:
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
# dummy, used to compute the extreme lags
self.lags["target"] = conv_lags[0]
@@ -341,14 +341,14 @@ def _check_dict_lags(

if isinstance(lags_past_covariates, int):
self.lags["past"] = _check_int_lags(
lags_past_covariates, "`lags_past_covariates`"
lags_past_covariates, "lags_past_covariates"
)
elif isinstance(lags_past_covariates, list):
self.lags["past"] = _check_list_lags(
lags_past_covariates, "`lags_past_covariates`"
lags_past_covariates, "lags_past_covariates"
)
elif isinstance(lags_past_covariates, dict):
conv_lags = _check_dict_lags(lags_past_covariates, "`lags_past_covariates`")
conv_lags = _check_dict_lags(lags_past_covariates, "lags_past_covariates")
if conv_lags is not None:
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
# dummy, used to compute the extreme lags
self.lags["past"] = conv_lags[0]
@@ -357,15 +357,15 @@ def _check_dict_lags(

if isinstance(lags_future_covariates, tuple):
self.lags["future"] = _check_tuple_future_lags(
lags_future_covariates, "`lags_future_covariates`"
lags_future_covariates, "lags_future_covariates"
)
elif isinstance(lags_future_covariates, list):
self.lags["future"] = _check_list_future_lags(
lags_future_covariates, "`lags_future_covariates`"
lags_future_covariates, "lags_future_covariates"
)
elif isinstance(lags_future_covariates, dict):
conv_lags = _check_dict_lags(
lags_future_covariates, "`lags_future_covariates`"
lags_future_covariates, "lags_future_covariates"
)
if conv_lags is not None:
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
# dummy, used to compute the extreme lags
98 changes: 97 additions & 1 deletion darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
@@ -420,7 +420,9 @@ def test_model_construction(self, config):
# testing lags_past_covariates
model_instance = model(lags=None, lags_past_covariates=3, multi_models=mode)
assert model_instance.lags.get("past") == [-3, -2, -1]
# testing lags_future covariates
# lags_future covariates does not support SINGLE INT

# TESTING TUPLE of int, only supported by lags_future_covariates
model_instance = model(
lags=None, lags_future_covariates=(3, 5), multi_models=mode
)
@@ -435,6 +437,25 @@ def test_model_construction(self, config):
model_instance = model(lags_past_covariates=values, multi_models=mode)
assert model_instance.lags.get("past") == values
# testing lags_future_covariates
values = [-5, -1, 5]
model_instance = model(lags_future_covariates=values, multi_models=mode)
assert model_instance.lags.get("future") == values

# TESTING DICT, lags are specified component-wise
# model.lags contains the extreme across the components
values = {"comp0": [-4, -2], "comp1": [-5, -3]}
model_instance = model(lags=values, multi_models=mode)
assert model_instance.lags.get("target") == [-5, -2]
assert model_instance.component_lags.get("target") == values
# testing lags_past_covariates
model_instance = model(lags_past_covariates=values, multi_models=mode)
assert model_instance.lags.get("past") == [-5, -2]
assert model_instance.component_lags.get("past") == values
# testing lags_future_covariates
values = {"comp0": [-4, 2], "comp1": [-5, 3]}
model_instance = model(lags_future_covariates=values, multi_models=mode)
assert model_instance.lags.get("future") == [-5, 3]
assert model_instance.component_lags.get("future") == values

with pytest.raises(ValueError):
model(multi_models=mode)
@@ -464,6 +485,10 @@ def test_model_construction(self, config):
model(lags=5, lags_future_covariates=(1, True), multi_models=mode)
with pytest.raises(ValueError):
model(lags=5, lags_future_covariates=(1, 1.0), multi_models=mode)
with pytest.raises(ValueError):
model(lags=5, lags_future_covariates={}, multi_models=mode)
with pytest.raises(ValueError):
model(lags=None, lags_future_covariates={}, multi_models=mode)

@pytest.mark.parametrize("mode", [True, False])
def test_training_data_creation(self, mode):
@@ -1519,6 +1544,77 @@ def test_integer_indexed_series(self, mode):
# the time axis returned by the second model should be as expected
assert all(preds[1].time_index == pd.RangeIndex(start=50, stop=70, step=2))

@pytest.mark.parametrize(
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
"config",
[
({"lags": [-3, -2, -1]}, {"lags": {"gaussian": 3}}),
({"lags": 3}, {"lags": {"gaussian": 3, "sine": 3}}),
({"lags_past_covariates": 2}, {"lags_past_covariates": {"lin_past": 2}}),
(
{"lags": 5, "lags_future_covariates": [-2, 3]},
{
"lags": {
"gaussian": [-5, -4, -3, -2, -1],
"sine": [-5, -4, -3, -2, -1],
},
"lags_future_covariates": {
"lin_future": [-2, 3],
"sine_future": [-2, 3],
},
},
),
],
)
def test_component_specific_lags(self, config):
"""Verify that the same lags, defined using int/list or dictionnaries yield the same results"""
list_lags, dict_lags = config
multivar_target = "lags" in dict_lags and len(dict_lags["lags"]) > 1
multivar_future_cov = (
"lags_future_covariates" in dict_lags
and len(dict_lags["lags_future_covariates"]) > 1
)

# create series based on the model parameters
series = tg.gaussian_timeseries(length=20, column_name="gaussian")
if multivar_target:
series = series.stack(tg.sine_timeseries(length=20, column_name="sine"))

future_cov = tg.linear_timeseries(length=30, column_name="lin_future")
if multivar_future_cov:
future_cov = future_cov.stack(
tg.sine_timeseries(length=30, column_name="sine_future")
)

past_cov = tg.linear_timeseries(length=30, column_name="lin_past")

# the lags are identical across the components for each series
model = LinearRegressionModel(**list_lags)
model.fit(
series=series,
past_covariates=past_cov if model.supports_past_covariates else None,
future_covariates=future_cov if model.supports_future_covariates else None,
)

# the lags are specified for each component, individually
model2 = LinearRegressionModel(**dict_lags)
model2.fit(
series=series,
past_covariates=past_cov if model2.supports_past_covariates else None,
future_covariates=future_cov if model2.supports_future_covariates else None,
)

# n == output_chunk_length
pred = model.predict(1)
pred2 = model2.predict(1)
np.testing.assert_array_almost_equal(pred.values(), pred2.values())
assert pred.time_index.equals(pred2.time_index)

# n > output_chunk_length
pred = model.predict(3)
pred2 = model2.predict(3)
np.testing.assert_array_almost_equal(pred.values(), pred2.values())
assert pred.time_index.equals(pred2.time_index)

@pytest.mark.parametrize(
"config",
itertools.product(