Skip to content

Commit

Permalink
Fix/bug time series.append and prepend (#2522)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessiopSymplectic authored Sep 13, 2024
1 parent 08640f2 commit 5740da5
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 52 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

- Added `IQRDetector`, that allows to detect anomalies using the interquartile range algorithm. [#2441] by [Igor Urbanik](https://github.com/u8-igor).
- Added hyperparameters controlling the hidden layer sizes for the feature encoders in `TiDEModel`. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli).
- Added hyperparameter `activation` to `BlockRNNModel` to specify the activation function in case of a multi-layer output network. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli).
- Added support for broadcasting to TimeSeries on component and sample level. [#2476](https://https://github.com/unit8co/darts/pull/2476) by [Joel L.](https://github.com/Joelius300).
- Helper function `darts.utils.utils.generate_index()` now accepts datetime strings as `start` and `end` parameters to generate the pandas DatetimeIndex. [#2522](https://github.com/unit8co/darts/pull/2522) by [Dennis Bader](https://github.com/dennisbader).
- Various improvements in the documentation:
- Made README's forecasting model support table more colorblind-friendly. [#2433](https://github.com/unit8co/darts/pull/2433)
- Updated the Ray Tune Hyperparameter Optimization example in the [user guide](https://unit8co.github.io/darts/userguide/hyperparameter_optimization.html) to work with the latest `ray` versions (`>=2.31.0`). [#2459](https://github.com/unit8co/darts/pull/2459) by [He Weilin](https://github.com/cnhwl).
Expand All @@ -22,6 +24,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

**Fixed**

- Fixed a bug when passing an empty array to `TimeSeries.prepend/append_values()` raised an error. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
- Fixed a bug with `TimeSeries.prepend/append_values()`, where the name of the (time) index was lost. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
- Fixed a bug when using `from_group_dataframe()` with a `time_col` of type integer, where the resulting time index was wrongly converted to a DatetimeIndex. [#2512](https://github.com/unit8co/darts/pull/2512) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
- Fixed a bug when using `historical_forecasts()` with a pre-trained `RegressionModel` that has no target lags `lags=None` but uses static covariates. [#2426](https://github.com/unit8co/darts/pull/2426) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug with `xgboost>=2.1.0`, where multi output regression was not properly handled. [#2426](https://github.com/unit8co/darts/pull/2426) by [Dennis Bader](https://github.com/dennisbader).
Expand Down
128 changes: 106 additions & 22 deletions darts/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from darts import TimeSeries, concatenate
from darts.utils.timeseries_generation import constant_timeseries, linear_timeseries
from darts.utils.utils import freqs, generate_index
from darts.utils.utils import expand_arr, freqs, generate_index


class TestTimeSeries:
Expand Down Expand Up @@ -762,6 +762,9 @@ def helper_test_prepend_values(test_case, test_series: TimeSeries):
assert test_series.time_index.equals(prepended_sq.time_index)
assert prepended_sq.components.equals(test_series.components)

# component and sample dimension should match
assert prepended._xa.shape[1:] == test_series._xa.shape[1:]

def test_slice(self):
TestTimeSeries.helper_test_slice(self, self.series1)

Expand Down Expand Up @@ -797,18 +800,112 @@ def test_append(self):
assert appended.time_index.equals(expected_idx)
assert appended.components.equals(series_1.components)

def test_append_values(self):
TestTimeSeries.helper_test_append_values(self, self.series1)
# Check `append_values` deals with `RangeIndex` series correctly:
series = linear_timeseries(start=1, length=5, freq=2)
appended = series.append_values(np.ones((2, 1, 1)))
expected_vals = np.concatenate(
[series.all_values(), np.ones((2, 1, 1))], axis=0
@pytest.mark.parametrize(
"config",
itertools.product(
[
( # univariate array
np.array([0, 1, 2]).reshape((3, 1, 1)),
np.array([0, 1]).reshape((2, 1, 1)),
),
( # multivariate array
np.array([0, 1, 2, 3, 4, 5]).reshape((3, 2, 1)),
np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
),
( # empty array
np.array([0, 1, 2]).reshape((3, 1, 1)),
np.array([]).reshape((0, 1, 1)),
),
(
# wrong number of components
np.array([0, 1, 2]).reshape((3, 1, 1)),
np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
),
(
# wrong number of samples
np.array([0, 1, 2]).reshape((3, 1, 1)),
np.array([0, 1, 2, 3]).reshape((2, 1, 2)),
),
( # univariate list with times
np.array([0, 1, 2]).reshape((3, 1, 1)),
[0, 1],
),
( # univariate list with times and components
np.array([0, 1, 2]).reshape((3, 1, 1)),
[[0], [1]],
),
( # univariate list with times, components and samples
np.array([0, 1, 2]).reshape((3, 1, 1)),
[[[0]], [[1]]],
),
( # multivar with list has wrong shape
np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
[[1, 2], [3, 4]],
),
( # list with wrong numer of components
np.array([0, 1, 2]).reshape((3, 1, 1)),
[[1, 2], [3, 4]],
),
( # list with wrong numer of samples
np.array([0, 1, 2]).reshape((3, 1, 1)),
[[[0, 1]], [[1, 2]]],
),
( # multivar input but list has wrong shape
np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
[1, 2],
),
],
[True, False],
["append_values", "prepend_values"],
),
)
def test_append_and_prepend_values(self, config):
(series_vals, vals), is_datetime, method = config
start = "20240101" if is_datetime else 1
series_idx = generate_index(
start=start, length=len(series_vals), name="some_name"
)
expected_idx = pd.RangeIndex(start=1, stop=15, step=2)
series = TimeSeries.from_times_and_values(
times=series_idx,
values=series_vals,
)

# expand if it's a list
vals_arr = np.array(vals) if isinstance(vals, list) else vals
vals_arr = expand_arr(vals_arr, ndim=3)

ts_method = getattr(TimeSeries, method)

if vals_arr.shape[1:] != series_vals.shape[1:]:
with pytest.raises(ValueError) as exc:
_ = ts_method(series, vals)
assert str(exc.value).startswith(
"The (expanded) values must have the same number of components and samples"
)
return

appended = ts_method(series, vals)

if method == "append_values":
expected_vals = np.concatenate([series_vals, vals_arr], axis=0)
expected_idx = generate_index(
start=series.start_time(),
length=len(series_vals) + len(vals),
freq=series.freq,
)
else:
expected_vals = np.concatenate([vals_arr, series_vals], axis=0)
expected_idx = generate_index(
end=series.end_time(),
length=len(series_vals) + len(vals),
freq=series.freq,
)

assert np.allclose(appended.all_values(), expected_vals)
assert appended.time_index.equals(expected_idx)
assert appended.components.equals(series.components)
assert appended._xa.shape[1:] == series._xa.shape[1:]
assert appended.time_index.name == series.time_index.name

def test_prepend(self):
TestTimeSeries.helper_test_prepend(self, self.series1)
Expand All @@ -824,19 +921,6 @@ def test_prepend(self):
assert prepended.time_index.equals(expected_idx)
assert prepended.components.equals(series_1.components)

def test_prepend_values(self):
TestTimeSeries.helper_test_prepend_values(self, self.series1)
# Check `prepend_values` deals with `RangeIndex` series correctly:
series = linear_timeseries(start=1, length=5, freq=2)
prepended = series.prepend_values(np.ones((2, 1, 1)))
expected_vals = np.concatenate(
[np.ones((2, 1, 1)), series.all_values()], axis=0
)
expected_idx = pd.RangeIndex(start=-3, stop=11, step=2)
assert np.allclose(prepended.all_values(), expected_vals)
assert prepended.time_index.equals(expected_idx)
assert prepended.components.equals(series.components)

@pytest.mark.parametrize(
"config",
[
Expand Down
50 changes: 49 additions & 1 deletion darts/tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from darts.utils import _with_sanity_checks
from darts.utils.missing_values import extract_subseries
from darts.utils.ts_utils import retain_period_common_to_all
from darts.utils.utils import freqs, generate_index, n_steps_between
from darts.utils.utils import expand_arr, freqs, generate_index, n_steps_between


class TestUtils:
Expand Down Expand Up @@ -418,6 +418,25 @@ def test_generate_index_with_end_length(self, config):
assert idx[0] == expected_start
assert idx[-1] == expected_start + (n_steps - 1) * freq

@pytest.mark.parametrize(
"config",
[
("2000-01-01", None),
(None, "2000-01-03"),
("2000-01-01", "2000-01-03"),
],
)
def test_generate_index_with_string(self, config):
"""Test that index generation with strings as start or end gives same results as with pandas TimeStamps."""
start, end = config
length = 3 if (start is None or end is None) else None
idx = generate_index(start=start, end=end, length=length)

start_ts = pd.Timestamp(start) if start is not None else start
end_ts = pd.Timestamp(end) if end is not None else end
idx_expected = generate_index(start=start_ts, end=end_ts, length=length)
assert idx.equals(idx_expected)

@pytest.mark.parametrize(
"config",
[
Expand Down Expand Up @@ -539,3 +558,32 @@ def test_n_steps_between(self, config):
assert n_steps == expected_n_steps
n_steps_reversed = n_steps_between(end=start, start=end, freq=freq)
assert n_steps_reversed == -expected_n_steps

@pytest.mark.parametrize(
"config",
[
(np.array([0, 1, 2]), (3, 1, 1)),
(np.array([[0], [1], [2]]), (3, 1, 1)),
(np.array([[[0]], [[1]], [[2]]]), (3, 1, 1)),
(np.array([[0, 1], [2, 3], [3, 4]]), (3, 2, 1)),
(np.array([[[0], [1]], [[1], [2]], [[3], [4]]]), (3, 2, 1)),
(
np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]),
(3, 2, 2),
),
],
)
def test_expand_arr(self, config):
"""tests array expansion to 3D."""
arr, shape_expected = config

if len(arr.shape) == 1:
arr_expected = arr[:, None, None]
elif len(arr.shape) == 2:
arr_expected = arr[:, :, None]
else:
arr_expected = arr

arr = expand_arr(arr, ndim=3)
assert arr.shape == shape_expected
np.testing.assert_array_almost_equal(arr, arr_expected)
73 changes: 47 additions & 26 deletions darts/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@

from darts.logging import get_logger, raise_if, raise_if_not, raise_log
from darts.utils import _build_tqdm_iterator, _parallel_apply
from darts.utils.utils import expand_arr, generate_index, n_steps_between
from darts.utils.utils import (
expand_arr,
generate_index,
n_steps_between,
)

try:
from typing import Literal
Expand Down Expand Up @@ -2855,10 +2859,10 @@ def append(self, other: Self) -> Self:
"Both series must have the same number of components.",
logger,
)
if self._has_datetime_index:
if len(self) > 0 and len(other) > 0:
raise_if_not(
other.start_time() == self.end_time() + self.freq,
"Appended TimeSeries must start one time step after current one.",
"Appended TimeSeries must start one (time) step after current one.",
logger,
)

Expand Down Expand Up @@ -2892,17 +2896,28 @@ def append_values(self, values: np.ndarray) -> Self:
TimeSeries
A new TimeSeries with the new values appended
"""
if self._has_datetime_index:
idx = pd.DatetimeIndex(
[self.end_time() + i * self._freq for i in range(1, len(values) + 1)],
freq=self._freq,
)
else:
idx = pd.RangeIndex(
start=self.end_time() + self._freq,
stop=self.end_time() + (len(values) + 1) * self._freq,
step=self._freq,
if len(values) == 0:
return self.copy()

values = np.array(values) if not isinstance(values, np.ndarray) else values
values = expand_arr(values, ndim=len(DIMS))
if not values.shape[1:] == self._xa.values.shape[1:]:
raise_log(
ValueError(
f"The (expanded) values must have the same number of components and samples "
f"(second and third dims) as the series to append to. "
f"Received shape: {values.shape}, expected: {self._xa.values.shape}"
),
logger=logger,
)

idx = generate_index(
start=self.end_time() + self.freq,
length=len(values),
freq=self.freq,
name=self._time_index.name,
)

return self.append(
self.__class__.from_times_and_values(
values=values,
Expand Down Expand Up @@ -2951,22 +2966,28 @@ def prepend_values(self, values: np.ndarray) -> Self:
TimeSeries
A new TimeSeries with the new values prepended.
"""
if len(values) == 0:
return self.copy()

if self._has_datetime_index:
idx = pd.DatetimeIndex(
[
self.start_time() - i * self._freq
for i in reversed(range(1, len(values) + 1))
],
freq=self._freq,
)
else:
idx = pd.RangeIndex(
self.start_time() - self.freq * len(values),
self.start_time(),
step=self.freq,
values = np.array(values) if not isinstance(values, np.ndarray) else values
values = expand_arr(values, ndim=len(DIMS))
if not values.shape[1:] == self._xa.values.shape[1:]:
raise_log(
ValueError(
f"The (expanded) values must have the same number of components and samples "
f"(second and third dims) as the series to prepend to. "
f"Received shape: {values.shape}, expected: {self._xa.values.shape}"
),
logger=logger,
)

idx = generate_index(
end=self.start_time() - self.freq,
length=len(values),
freq=self.freq,
name=self._time_index.name,
)

return self.prepend(
self.__class__.from_times_and_values(
values=values,
Expand Down
9 changes: 6 additions & 3 deletions darts/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ def n_steps_between(


def generate_index(
start: Optional[Union[pd.Timestamp, int]] = None,
end: Optional[Union[pd.Timestamp, int]] = None,
start: Optional[Union[pd.Timestamp, str, int]] = None,
end: Optional[Union[pd.Timestamp, str, int]] = None,
length: Optional[int] = None,
freq: Union[str, int, pd.DateOffset] = None,
name: str = None,
Expand All @@ -441,7 +441,7 @@ def generate_index(
Parameters
----------
start
The start of the returned index. If a pandas Timestamp is passed, the index will be a pandas
The start of the returned index. If a pandas Timestamp or a date string is passed, the index will be a pandas
DatetimeIndex. If an integer is passed, the index will be a pandas RangeIndex index. Works only with
either `length` or `end`.
end
Expand Down Expand Up @@ -477,6 +477,9 @@ def generate_index(
logger,
)

start = pd.Timestamp(start) if isinstance(start, str) else start
end = pd.Timestamp(end) if isinstance(end, str) else end

if isinstance(start, pd.Timestamp) or isinstance(end, pd.Timestamp):
freq = "D" if freq is None else freq
freq = pd.tseries.frequencies.to_offset(freq) if isinstance(freq, str) else freq
Expand Down

0 comments on commit 5740da5

Please sign in to comment.