From 5740da5deffe8ee7472f8c127332ad10c7826add Mon Sep 17 00:00:00 2001
From: AlessiopSymplectic
 <160468679+AlessiopSymplectic@users.noreply.github.com>
Date: Fri, 13 Sep 2024 13:16:16 +0200
Subject: [PATCH] Fix/bug time series.append and prepend (#2522)

---
 CHANGELOG.md                    |   4 +
 darts/tests/test_timeseries.py  | 128 ++++++++++++++++++++++++++------
 darts/tests/utils/test_utils.py |  50 ++++++++++++-
 darts/timeseries.py             |  73 +++++++++++-------
 darts/utils/utils.py            |   9 ++-
 5 files changed, 212 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3584fffb32..02718b8b2e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,9 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 - Added `IQRDetector`, that allows to detect anomalies using the interquartile range algorithm. [#2441] by [Igor Urbanik](https://github.com/u8-igor).
 - Added hyperparameters controlling the hidden layer sizes for the feature encoders in `TiDEModel`. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli).
+- Added hyperparameter `activation` to `BlockRNNModel` to specify the activation function in case of a multi-layer output network. [#2408](https://github.com/unit8co/darts/issues/2408) by [eschibli](https://github.com/eschibli).
 - Added support for broadcasting to TimeSeries on component and sample level. [#2476](https://https://github.com/unit8co/darts/pull/2476) by [Joel L.](https://github.com/Joelius300).
+- Helper function `darts.utils.utils.generate_index()` now accepts datetime strings as `start` and `end` parameters to generate the pandas DatetimeIndex. [#2522](https://github.com/unit8co/darts/pull/2522) by [Dennis Bader](https://github.com/dennisbader).
 - Various improvements in the documentation:
   - Made README's forecasting model support table more colorblind-friendly. [#2433](https://github.com/unit8co/darts/pull/2433)
   - Updated the Ray Tune Hyperparameter Optimization example in the [user guide](https://unit8co.github.io/darts/userguide/hyperparameter_optimization.html) to work with the latest `ray` versions (`>=2.31.0`). [#2459](https://github.com/unit8co/darts/pull/2459) by [He Weilin](https://github.com/cnhwl).
@@ -22,6 +24,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 **Fixed**
 
+- Fixed a bug when passing an empty array to `TimeSeries.prepend/append_values()` raised an error. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
+- Fixed a bug with `TimeSeries.prepend/append_values()`, where the name of the (time) index was lost. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
 - Fixed a bug when using `from_group_dataframe()` with a `time_col` of type integer, where the resulting time index was wrongly converted to a DatetimeIndex. [#2512](https://github.com/unit8co/darts/pull/2512) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic)
 - Fixed a bug when using `historical_forecasts()` with a pre-trained `RegressionModel` that has no target lags `lags=None` but uses static covariates. [#2426](https://github.com/unit8co/darts/pull/2426) by [Dennis Bader](https://github.com/dennisbader).
 - Fixed a bug with `xgboost>=2.1.0`, where multi output regression was not properly handled. [#2426](https://github.com/unit8co/darts/pull/2426) by [Dennis Bader](https://github.com/dennisbader).
diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py
index bd5e1b1562..0fc6f577fb 100644
--- a/darts/tests/test_timeseries.py
+++ b/darts/tests/test_timeseries.py
@@ -11,7 +11,7 @@
 
 from darts import TimeSeries, concatenate
 from darts.utils.timeseries_generation import constant_timeseries, linear_timeseries
-from darts.utils.utils import freqs, generate_index
+from darts.utils.utils import expand_arr, freqs, generate_index
 
 
 class TestTimeSeries:
@@ -762,6 +762,9 @@ def helper_test_prepend_values(test_case, test_series: TimeSeries):
         assert test_series.time_index.equals(prepended_sq.time_index)
         assert prepended_sq.components.equals(test_series.components)
 
+        # component and sample dimension should match
+        assert prepended._xa.shape[1:] == test_series._xa.shape[1:]
+
     def test_slice(self):
         TestTimeSeries.helper_test_slice(self, self.series1)
 
@@ -797,18 +800,112 @@ def test_append(self):
         assert appended.time_index.equals(expected_idx)
         assert appended.components.equals(series_1.components)
 
-    def test_append_values(self):
-        TestTimeSeries.helper_test_append_values(self, self.series1)
-        # Check `append_values` deals with `RangeIndex` series correctly:
-        series = linear_timeseries(start=1, length=5, freq=2)
-        appended = series.append_values(np.ones((2, 1, 1)))
-        expected_vals = np.concatenate(
-            [series.all_values(), np.ones((2, 1, 1))], axis=0
+    @pytest.mark.parametrize(
+        "config",
+        itertools.product(
+            [
+                (  # univariate array
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    np.array([0, 1]).reshape((2, 1, 1)),
+                ),
+                (  # multivariate array
+                    np.array([0, 1, 2, 3, 4, 5]).reshape((3, 2, 1)),
+                    np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
+                ),
+                (  # empty array
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    np.array([]).reshape((0, 1, 1)),
+                ),
+                (
+                    # wrong number of components
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
+                ),
+                (
+                    # wrong number of samples
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    np.array([0, 1, 2, 3]).reshape((2, 1, 2)),
+                ),
+                (  # univariate list with times
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    [0, 1],
+                ),
+                (  # univariate list with times and components
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    [[0], [1]],
+                ),
+                (  # univariate list with times, components and samples
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    [[[0]], [[1]]],
+                ),
+                (  # multivar with list has wrong shape
+                    np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
+                    [[1, 2], [3, 4]],
+                ),
+                (  # list with wrong numer of components
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    [[1, 2], [3, 4]],
+                ),
+                (  # list with wrong numer of samples
+                    np.array([0, 1, 2]).reshape((3, 1, 1)),
+                    [[[0, 1]], [[1, 2]]],
+                ),
+                (  # multivar input but list has wrong shape
+                    np.array([0, 1, 2, 3]).reshape((2, 2, 1)),
+                    [1, 2],
+                ),
+            ],
+            [True, False],
+            ["append_values", "prepend_values"],
+        ),
+    )
+    def test_append_and_prepend_values(self, config):
+        (series_vals, vals), is_datetime, method = config
+        start = "20240101" if is_datetime else 1
+        series_idx = generate_index(
+            start=start, length=len(series_vals), name="some_name"
         )
-        expected_idx = pd.RangeIndex(start=1, stop=15, step=2)
+        series = TimeSeries.from_times_and_values(
+            times=series_idx,
+            values=series_vals,
+        )
+
+        # expand if it's a list
+        vals_arr = np.array(vals) if isinstance(vals, list) else vals
+        vals_arr = expand_arr(vals_arr, ndim=3)
+
+        ts_method = getattr(TimeSeries, method)
+
+        if vals_arr.shape[1:] != series_vals.shape[1:]:
+            with pytest.raises(ValueError) as exc:
+                _ = ts_method(series, vals)
+            assert str(exc.value).startswith(
+                "The (expanded) values must have the same number of components and samples"
+            )
+            return
+
+        appended = ts_method(series, vals)
+
+        if method == "append_values":
+            expected_vals = np.concatenate([series_vals, vals_arr], axis=0)
+            expected_idx = generate_index(
+                start=series.start_time(),
+                length=len(series_vals) + len(vals),
+                freq=series.freq,
+            )
+        else:
+            expected_vals = np.concatenate([vals_arr, series_vals], axis=0)
+            expected_idx = generate_index(
+                end=series.end_time(),
+                length=len(series_vals) + len(vals),
+                freq=series.freq,
+            )
+
         assert np.allclose(appended.all_values(), expected_vals)
         assert appended.time_index.equals(expected_idx)
         assert appended.components.equals(series.components)
+        assert appended._xa.shape[1:] == series._xa.shape[1:]
+        assert appended.time_index.name == series.time_index.name
 
     def test_prepend(self):
         TestTimeSeries.helper_test_prepend(self, self.series1)
@@ -824,19 +921,6 @@ def test_prepend(self):
         assert prepended.time_index.equals(expected_idx)
         assert prepended.components.equals(series_1.components)
 
-    def test_prepend_values(self):
-        TestTimeSeries.helper_test_prepend_values(self, self.series1)
-        # Check `prepend_values` deals with `RangeIndex` series correctly:
-        series = linear_timeseries(start=1, length=5, freq=2)
-        prepended = series.prepend_values(np.ones((2, 1, 1)))
-        expected_vals = np.concatenate(
-            [np.ones((2, 1, 1)), series.all_values()], axis=0
-        )
-        expected_idx = pd.RangeIndex(start=-3, stop=11, step=2)
-        assert np.allclose(prepended.all_values(), expected_vals)
-        assert prepended.time_index.equals(expected_idx)
-        assert prepended.components.equals(series.components)
-
     @pytest.mark.parametrize(
         "config",
         [
diff --git a/darts/tests/utils/test_utils.py b/darts/tests/utils/test_utils.py
index 809bf84bf5..3b82c5f1b2 100644
--- a/darts/tests/utils/test_utils.py
+++ b/darts/tests/utils/test_utils.py
@@ -7,7 +7,7 @@
 from darts.utils import _with_sanity_checks
 from darts.utils.missing_values import extract_subseries
 from darts.utils.ts_utils import retain_period_common_to_all
-from darts.utils.utils import freqs, generate_index, n_steps_between
+from darts.utils.utils import expand_arr, freqs, generate_index, n_steps_between
 
 
 class TestUtils:
@@ -418,6 +418,25 @@ def test_generate_index_with_end_length(self, config):
         assert idx[0] == expected_start
         assert idx[-1] == expected_start + (n_steps - 1) * freq
 
+    @pytest.mark.parametrize(
+        "config",
+        [
+            ("2000-01-01", None),
+            (None, "2000-01-03"),
+            ("2000-01-01", "2000-01-03"),
+        ],
+    )
+    def test_generate_index_with_string(self, config):
+        """Test that index generation with strings as start or end gives same results as with pandas TimeStamps."""
+        start, end = config
+        length = 3 if (start is None or end is None) else None
+        idx = generate_index(start=start, end=end, length=length)
+
+        start_ts = pd.Timestamp(start) if start is not None else start
+        end_ts = pd.Timestamp(end) if end is not None else end
+        idx_expected = generate_index(start=start_ts, end=end_ts, length=length)
+        assert idx.equals(idx_expected)
+
     @pytest.mark.parametrize(
         "config",
         [
@@ -539,3 +558,32 @@ def test_n_steps_between(self, config):
         assert n_steps == expected_n_steps
         n_steps_reversed = n_steps_between(end=start, start=end, freq=freq)
         assert n_steps_reversed == -expected_n_steps
+
+    @pytest.mark.parametrize(
+        "config",
+        [
+            (np.array([0, 1, 2]), (3, 1, 1)),
+            (np.array([[0], [1], [2]]), (3, 1, 1)),
+            (np.array([[[0]], [[1]], [[2]]]), (3, 1, 1)),
+            (np.array([[0, 1], [2, 3], [3, 4]]), (3, 2, 1)),
+            (np.array([[[0], [1]], [[1], [2]], [[3], [4]]]), (3, 2, 1)),
+            (
+                np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]),
+                (3, 2, 2),
+            ),
+        ],
+    )
+    def test_expand_arr(self, config):
+        """tests array expansion to 3D."""
+        arr, shape_expected = config
+
+        if len(arr.shape) == 1:
+            arr_expected = arr[:, None, None]
+        elif len(arr.shape) == 2:
+            arr_expected = arr[:, :, None]
+        else:
+            arr_expected = arr
+
+        arr = expand_arr(arr, ndim=3)
+        assert arr.shape == shape_expected
+        np.testing.assert_array_almost_equal(arr, arr_expected)
diff --git a/darts/timeseries.py b/darts/timeseries.py
index b14e37a4b9..f0c9fc006d 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -53,7 +53,11 @@
 
 from darts.logging import get_logger, raise_if, raise_if_not, raise_log
 from darts.utils import _build_tqdm_iterator, _parallel_apply
-from darts.utils.utils import expand_arr, generate_index, n_steps_between
+from darts.utils.utils import (
+    expand_arr,
+    generate_index,
+    n_steps_between,
+)
 
 try:
     from typing import Literal
@@ -2855,10 +2859,10 @@ def append(self, other: Self) -> Self:
             "Both series must have the same number of components.",
             logger,
         )
-        if self._has_datetime_index:
+        if len(self) > 0 and len(other) > 0:
             raise_if_not(
                 other.start_time() == self.end_time() + self.freq,
-                "Appended TimeSeries must start one time step after current one.",
+                "Appended TimeSeries must start one (time) step after current one.",
                 logger,
             )
 
@@ -2892,17 +2896,28 @@ def append_values(self, values: np.ndarray) -> Self:
         TimeSeries
             A new TimeSeries with the new values appended
         """
-        if self._has_datetime_index:
-            idx = pd.DatetimeIndex(
-                [self.end_time() + i * self._freq for i in range(1, len(values) + 1)],
-                freq=self._freq,
-            )
-        else:
-            idx = pd.RangeIndex(
-                start=self.end_time() + self._freq,
-                stop=self.end_time() + (len(values) + 1) * self._freq,
-                step=self._freq,
+        if len(values) == 0:
+            return self.copy()
+
+        values = np.array(values) if not isinstance(values, np.ndarray) else values
+        values = expand_arr(values, ndim=len(DIMS))
+        if not values.shape[1:] == self._xa.values.shape[1:]:
+            raise_log(
+                ValueError(
+                    f"The (expanded) values must have the same number of components and samples "
+                    f"(second and third dims) as the series to append to. "
+                    f"Received shape: {values.shape}, expected: {self._xa.values.shape}"
+                ),
+                logger=logger,
             )
+
+        idx = generate_index(
+            start=self.end_time() + self.freq,
+            length=len(values),
+            freq=self.freq,
+            name=self._time_index.name,
+        )
+
         return self.append(
             self.__class__.from_times_and_values(
                 values=values,
@@ -2951,22 +2966,28 @@ def prepend_values(self, values: np.ndarray) -> Self:
         TimeSeries
             A new TimeSeries with the new values prepended.
         """
+        if len(values) == 0:
+            return self.copy()
 
-        if self._has_datetime_index:
-            idx = pd.DatetimeIndex(
-                [
-                    self.start_time() - i * self._freq
-                    for i in reversed(range(1, len(values) + 1))
-                ],
-                freq=self._freq,
-            )
-        else:
-            idx = pd.RangeIndex(
-                self.start_time() - self.freq * len(values),
-                self.start_time(),
-                step=self.freq,
+        values = np.array(values) if not isinstance(values, np.ndarray) else values
+        values = expand_arr(values, ndim=len(DIMS))
+        if not values.shape[1:] == self._xa.values.shape[1:]:
+            raise_log(
+                ValueError(
+                    f"The (expanded) values must have the same number of components and samples "
+                    f"(second and third dims) as the series to prepend to. "
+                    f"Received shape: {values.shape}, expected: {self._xa.values.shape}"
+                ),
+                logger=logger,
             )
 
+        idx = generate_index(
+            end=self.start_time() - self.freq,
+            length=len(values),
+            freq=self.freq,
+            name=self._time_index.name,
+        )
+
         return self.prepend(
             self.__class__.from_times_and_values(
                 values=values,
diff --git a/darts/utils/utils.py b/darts/utils/utils.py
index 643c0655f1..125e1b8afc 100644
--- a/darts/utils/utils.py
+++ b/darts/utils/utils.py
@@ -429,8 +429,8 @@ def n_steps_between(
 
 
 def generate_index(
-    start: Optional[Union[pd.Timestamp, int]] = None,
-    end: Optional[Union[pd.Timestamp, int]] = None,
+    start: Optional[Union[pd.Timestamp, str, int]] = None,
+    end: Optional[Union[pd.Timestamp, str, int]] = None,
     length: Optional[int] = None,
     freq: Union[str, int, pd.DateOffset] = None,
     name: str = None,
@@ -441,7 +441,7 @@ def generate_index(
     Parameters
     ----------
     start
-        The start of the returned index. If a pandas Timestamp is passed, the index will be a pandas
+        The start of the returned index. If a pandas Timestamp or a date string is passed, the index will be a pandas
         DatetimeIndex. If an integer is passed, the index will be a pandas RangeIndex index. Works only with
         either `length` or `end`.
     end
@@ -477,6 +477,9 @@ def generate_index(
         logger,
     )
 
+    start = pd.Timestamp(start) if isinstance(start, str) else start
+    end = pd.Timestamp(end) if isinstance(end, str) else end
+
     if isinstance(start, pd.Timestamp) or isinstance(end, pd.Timestamp):
         freq = "D" if freq is None else freq
         freq = pd.tseries.frequencies.to_offset(freq) if isinstance(freq, str) else freq