diff --git a/CHANGELOG.md b/CHANGELOG.md
index 64e6c5bcf1..dd3bc36089 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
 - Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` that allows to use an integer `start` either as the index position or index value/label for `series` indexed with a `pd.RangeIndex`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
 - Added `RINorm` (Reversible Instance Norm) as an input normalization option for all `TorchForecastingModel` except `RNNModel`. Activate it with model creation parameter `use_reversible_instance_norm`. [#1969](https://github.com/unit8co/darts/pull/1969) by [Dennis Bader](https://github.com/dennisbader).
-- Reduced the size of the Darts docker image `unit8/darts:latest`, and included all optional models as well as dev requirements. [#1878](https://github.com/unit8co/darts/pull/1878) by [Alex Colpitts](https://github.com/alexcolpitts96). 
+- Reduced the size of the Darts docker image `unit8/darts:latest`, and included all optional models as well as dev requirements. [#1878](https://github.com/unit8co/darts/pull/1878) by [Alex Colpitts](https://github.com/alexcolpitts96).
+- Added short examples in the docstring of all the models, including covariates usage and some model-specific parameters. [#1956](https://github.com/unit8co/darts/pull/1956) by [Antoine Madrona](https://github.com/madtoinou).
 
 **Fixed**
 - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
diff --git a/darts/models/forecasting/arima.py b/darts/models/forecasting/arima.py
index 87d3f21279..dbca628ca3 100644
--- a/darts/models/forecasting/arima.py
+++ b/darts/models/forecasting/arima.py
@@ -71,14 +71,37 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import ARIMA
+        >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+        >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # define ARIMA parameters
+        >>> model = ARIMA(p=12, d=1, q=2)
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> pred.values()
+        array([[451.36489334],
+               [416.88972829],
+               [443.10520391],
+               [481.07892911],
+               [502.11286509],
+               [555.50153984]])
         """
         super().__init__(add_encoders=add_encoders)
         self.order = p, d, q
diff --git a/darts/models/forecasting/auto_arima.py b/darts/models/forecasting/auto_arima.py
index 2266917579..4a9e7b6db2 100644
--- a/darts/models/forecasting/auto_arima.py
+++ b/darts/models/forecasting/auto_arima.py
@@ -54,14 +54,37 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import AutoARIMA
+        >>> from darts.utils.timeseries_generation import holidays_timeseries
+        >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # define some boundaries for the parameters
+        >>> model = AutoARIMA(start_p=8, max_p=12, start_q=1)
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> pred.values()
+        array([[449.79716178],
+               [416.31180633],
+               [445.28005229],
+               [485.27121314],
+               [507.61787454],
+               [561.26993332]])
         """
         super().__init__(add_encoders=add_encoders)
         self.model = PmdAutoARIMA(*autoarima_args, **autoarima_kwargs)
diff --git a/darts/models/forecasting/baselines.py b/darts/models/forecasting/baselines.py
index 9dd69902aa..56dbc00e27 100644
--- a/darts/models/forecasting/baselines.py
+++ b/darts/models/forecasting/baselines.py
@@ -26,6 +26,22 @@ def __init__(self):
 
         This model has no parameter, and always predicts the
         mean value of the training series.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import NaiveMean
+        >>> series = AirPassengersDataset().load()
+        >>> model = NaiveMean()
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[280.29861111],
+              [280.29861111],
+              [280.29861111],
+              [280.29861111],
+              [280.29861111],
+              [280.29861111]])
         """
         super().__init__()
         self.mean_val = None
@@ -63,6 +79,23 @@ def __init__(self, K: int = 1):
         ----------
         K
             the number of last time steps of the training set to repeat
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import NaiveSeasonal
+        >>> series = AirPassengersDataset().load()
+        # prior analysis suggested seasonality of 12
+        >>> model = NaiveSeasonal(K=12)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[417.],
+               [391.],
+               [419.],
+               [461.],
+               [472.],
+               [535.]])
         """
         super().__init__()
         self.last_k_vals = None
@@ -106,6 +139,22 @@ def __init__(self):
         and extends it in the future. For a training series of length :math:`T`, we have:
 
         .. math:: \\hat{y}_{T+h} = y_T + h\\left( \\frac{y_T - y_1}{T - 1} \\right)
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import NaiveDrift
+        >>> series = AirPassengersDataset().load()
+        >>> model = NaiveDrift()
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[434.23776224],
+               [436.47552448],
+               [438.71328671],
+               [440.95104895],
+               [443.18881119],
+               [445.42657343]])
         """
         super().__init__()
 
@@ -147,6 +196,22 @@ def __init__(self, input_chunk_length: int = 1):
         ----------
         input_chunk_length
             The size of the sliding window used to calculate the moving average
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import NaiveMovingAverage
+        >>> series = AirPassengersDataset().load()
+        # using the average of the last 6 months
+        >>> model = NaiveMovingAverage(input_chunk_length=6)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[503.16666667],
+               [483.36111111],
+               [462.9212963 ],
+               [455.40817901],
+               [454.47620885],
+               [465.22224366]])
         """
         super().__init__()
         self.input_chunk_length = input_chunk_length
@@ -217,6 +282,23 @@ def __init__(
             List of forecasting models whose predictions to ensemble
         show_warnings
             Whether to show warnings related to models covariates support.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import NaiveEnsembleModel, NaiveSeasonal, LinearRegressionModel
+        >>> series = AirPassengersDataset().load()
+        >>> # defining the ensemble
+        >>> model = NaiveEnsembleModel([NaiveSeasonal(K=12), LinearRegressionModel(lags=4)])
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[439.23152974],
+               [431.41161602],
+               [439.72888401],
+               [453.70180806],
+               [454.96757177],
+               [485.16604194]])
         """
         super().__init__(
             models=models,
diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py
index db5fa7281d..c5eff40a83 100644
--- a/darts/models/forecasting/block_rnn_model.py
+++ b/darts/models/forecasting/block_rnn_model.py
@@ -311,6 +311,36 @@ def encode_year(idx):
         ----------
         .. [1] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import BlockRNNModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> model = BlockRNNModel(
+        >>>     input_chunk_length=12,
+        >>>     output_chunk_length=6,
+        >>>     n_rnn_layers=2,
+        >>>     n_epochs=50,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[4.97979827],
+               [3.9707572 ],
+               [5.27869295],
+               [5.19697244],
+               [5.28424783],
+               [5.22497681]])
+
+        .. note::
+            `RNN example notebook <https://unit8co.github.io/darts/examples/04-RNN-examples.html>`_ presents techniques
+            that can be used to improve the forecasts quality compared to this simple usage example.
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/catboost_model.py b/darts/models/forecasting/catboost_model.py
index b528453793..b25b983a77 100644
--- a/darts/models/forecasting/catboost_model.py
+++ b/darts/models/forecasting/catboost_model.py
@@ -97,6 +97,35 @@ def encode_year(idx):
             that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
         **kwargs
             Additional keyword arguments passed to `catboost.CatBoostRegressor`.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import CatBoostModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = CatBoostModel(
+        >>>     lags=12,
+        >>>     lags_past_covariates=12,
+        >>>     lags_future_covariates=[0,1,2,3,4,5],
+        >>>     output_chunk_length=6
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1006.4153701 ],
+               [1006.41907237],
+               [1006.30872957],
+               [1006.28614154],
+               [1006.22355514],
+               [1006.21607546]])
         """
         kwargs["random_state"] = random_state  # seed for tree learner
         self.kwargs = kwargs
diff --git a/darts/models/forecasting/croston.py b/darts/models/forecasting/croston.py
index 75029f5ae2..c5b6482f62 100644
--- a/darts/models/forecasting/croston.py
+++ b/darts/models/forecasting/croston.py
@@ -58,11 +58,14 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
@@ -74,6 +77,23 @@ def __init__(
         .. [2] Ruud H. Teunter, Aris A. Syntetos, and M. Zied Babai.
                Intermittent demand: Linking forecasting to inventory obsolescence.
                European Journal of Operational Research, 214(3):606 – 615, 2011.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import Croston
+        >>> series = AirPassengersDataset().load()
+        >>> # use the optimized version to automatically select best alpha parameter
+        >>> model = Croston(version="optimized")
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[461.7666],
+               [461.7666],
+               [461.7666],
+               [461.7666],
+               [461.7666],
+               [461.7666]])
         """
         super().__init__(add_encoders=add_encoders)
         raise_if_not(
diff --git a/darts/models/forecasting/dlinear.py b/darts/models/forecasting/dlinear.py
index d10f0d0d46..7b78c1a16b 100644
--- a/darts/models/forecasting/dlinear.py
+++ b/darts/models/forecasting/dlinear.py
@@ -414,6 +414,39 @@ def encode_year(idx):
                Are Transformers Effective for Time Series Forecasting?. arXiv preprint arXiv:2205.13504.
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import DLinearModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = DLinearModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=20,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[667.20957388],
+               [666.76986848],
+               [666.67733306],
+               [666.06625381],
+               [665.8529289 ],
+               [665.75320573]])
+
+        .. note::
+            This simple usage example produces poor forecasts. In order to obtain better performance, user should
+            transform the input data, increase the number of epochs, use a validation set, optimize the hyper-
+            parameters, ...
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/exponential_smoothing.py b/darts/models/forecasting/exponential_smoothing.py
index e833c8546b..dda34b6992 100644
--- a/darts/models/forecasting/exponential_smoothing.py
+++ b/darts/models/forecasting/exponential_smoothing.py
@@ -66,6 +66,24 @@ def __init__(
             :func:`statsmodels.tsa.holtwinters.ExponentialSmoothing.fit()`.
             See `the documentation
             <https://www.statsmodels.org/stable/generated/statsmodels.tsa.holtwinters.ExponentialSmoothing.fit.html>`_.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import ExponentialSmoothing
+        >>> from darts.utils.utils import ModelMode, SeasonalityMode
+        >>> series = AirPassengersDataset().load()
+        >>> # using Holt's exponential smoothing
+        >>> model = ExponentialSmoothing(trend=ModelMode.ADDITIVE, seasonal=SeasonalityMode.MULTIPLICATIVE)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[445.24283838],
+               [418.22618932],
+               [465.31305075],
+               [494.95129261],
+               [505.4770514 ],
+               [573.31519186]])
         """
         super().__init__()
         self.trend = trend
diff --git a/darts/models/forecasting/fft.py b/darts/models/forecasting/fft.py
index 54f2b9d0af..a2b59d45e6 100644
--- a/darts/models/forecasting/fft.py
+++ b/darts/models/forecasting/fft.py
@@ -253,6 +253,30 @@ def __init__(
         global trend, and do not perform any frequency filtering:
 
         >>> FFT(required_matches={'month'}, trend='exp')
+
+        Simple usage example, using one of the dataset available in darts
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import FFT
+        >>> series = AirPassengersDataset().load()
+        >>> # increase the number of frequency and use a polynomial trend of degree 2
+        >>> model = FFT(
+        >>>     nr_freqs_to_keep=20,
+        >>>     trend= "poly",
+        >>>     trend_poly_degree=2
+        >>> )
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[471.79323146],
+               [494.6381425 ],
+               [504.5659999 ],
+               [515.82463265],
+               [520.59404623],
+               [547.26720705]])
+
+        .. note::
+            `FFT example notebook <https://unit8co.github.io/darts/examples/03-FFT-examples.html>`_ presents techniques
+            that can be used to improve the forecasts quality compared to this simple usage example.
         """
         super().__init__()
         self.nr_freqs_to_keep = nr_freqs_to_keep
diff --git a/darts/models/forecasting/kalman_forecaster.py b/darts/models/forecasting/kalman_forecaster.py
index 2da16f4421..7d1cc7ef93 100644
--- a/darts/models/forecasting/kalman_forecaster.py
+++ b/darts/models/forecasting/kalman_forecaster.py
@@ -67,14 +67,42 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import KalmanForecaster
+        >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+        >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # increasing the size of the state vector
+        >>> model = KalmanForecaster(dim_x=12)
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> pred.values()
+        array([[474.40680728],
+               [440.51801726],
+               [461.94512461],
+               [494.42090089],
+               [528.6436328 ],
+               [590.30647185]])
+
+        .. note::
+            `Kalman example notebook <https://unit8co.github.io/darts/examples/10-Kalman-filter-examples.html>`_
+            presents techniques that can be used to improve the forecasts quality compared to this simple usage
+            example.
         """
         super().__init__(add_encoders=add_encoders)
         self.dim_x = dim_x
diff --git a/darts/models/forecasting/lgbm.py b/darts/models/forecasting/lgbm.py
index 213ae6b638..7aa2e4cd76 100644
--- a/darts/models/forecasting/lgbm.py
+++ b/darts/models/forecasting/lgbm.py
@@ -117,6 +117,36 @@ def encode_year(idx):
             treated as categorical are integer-encoded.
         **kwargs
             Additional keyword arguments passed to `lightgbm.LGBRegressor`.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import LightGBMModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = LightGBMModel(
+        >>>     lags=12,
+        >>>     lags_past_covariates=12,
+        >>>     lags_future_covariates=[0,1,2,3,4,5],
+        >>>     output_chunk_length=6,
+        >>>     verbose=-1
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1006.85376674],
+               [1006.83998586],
+               [1006.63884831],
+               [1006.57201255],
+               [1006.52290556],
+               [1006.39550065]])
         """
         kwargs["random_state"] = random_state  # seed for tree learner
         self.kwargs = kwargs
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
index 0e242590a9..19fc4617b6 100644
--- a/darts/models/forecasting/linear_regression_model.py
+++ b/darts/models/forecasting/linear_regression_model.py
@@ -99,6 +99,36 @@ def encode_year(idx):
             Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to
             `sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to
             `sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`).
+
+        Examples
+        --------
+        Deterministic forecasting, using past/future covariates (optional)
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import LinearRegressionModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = LinearRegressionModel(
+        >>>     lags=12,
+        >>>     lags_past_covariates=12,
+        >>>     lags_future_covariates=[0,1,2,3,4,5],
+        >>>     output_chunk_length=6,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1005.72085839],
+               [1005.6548696 ],
+               [1005.65403772],
+               [1005.6846175 ],
+               [1005.75753605],
+               [1005.81830675]])
         """
         self.kwargs = kwargs
         self._median_idx = None
diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py
index b49f3f08f9..e5c06c3af7 100644
--- a/darts/models/forecasting/nbeats.py
+++ b/darts/models/forecasting/nbeats.py
@@ -734,6 +734,37 @@ def encode_year(idx):
         .. [1] https://openreview.net/forum?id=r1ecqn4YwB
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import NBEATSModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # changing the activation function of the encoder/decoder to LeakyReLU
+        >>> model = NBEATSModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=5,
+        >>>     activation='LeakyReLU'
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[ 929.78509085],
+               [1013.66339481],
+               [ 999.8843893 ],
+               [ 892.66032082],
+               [ 921.09781534],
+               [ 950.37965429]])
+
+        .. note::
+            `NBEATS example notebook <https://unit8co.github.io/darts/examples/07-NBEATS-examples.html>`_
+            presents techniques that can be used to improve the forecasts quality compared to this simple usage
+            example.
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/nhits.py b/darts/models/forecasting/nhits.py
index 7429d593cd..7b63cd3ca6 100644
--- a/darts/models/forecasting/nhits.py
+++ b/darts/models/forecasting/nhits.py
@@ -671,6 +671,32 @@ def encode_year(idx):
                https://arxiv.org/abs/2201.12886
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import NHiTSModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # increasing the number of blocks
+        >>> model = NHiTSModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     num_blocks=2,
+        >>>     n_epochs=5,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[958.2354389 ],
+               [939.23201079],
+               [987.51425784],
+               [919.41209025],
+               [925.09583093],
+               [938.95625528]])
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/nlinear.py b/darts/models/forecasting/nlinear.py
index 951f537b27..51fdb8a359 100644
--- a/darts/models/forecasting/nlinear.py
+++ b/darts/models/forecasting/nlinear.py
@@ -362,6 +362,34 @@ def encode_year(idx):
                Are Transformers Effective for Time Series Forecasting?. arXiv preprint arXiv:2205.13504.
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import NLinearModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = NLinearModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=20,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[429.56117169],
+               [428.93264096],
+               [428.35210616],
+               [428.13154426],
+               [427.98781641],
+               [428.00325481]])
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/prophet_model.py b/darts/models/forecasting/prophet_model.py
index 51ec270ccd..295e8286d3 100644
--- a/darts/models/forecasting/prophet_model.py
+++ b/darts/models/forecasting/prophet_model.py
@@ -28,11 +28,17 @@ def __init__(
         country_holidays: Optional[str] = None,
         suppress_stdout_stderror: bool = True,
         add_encoders: Optional[dict] = None,
-        cap: Union[
-            float, Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]]
+        cap: Optional[
+            Union[
+                float,
+                Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]],
+            ]
         ] = None,
-        floor: Union[
-            float, Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]]
+        floor: Optional[
+            Union[
+                float,
+                Callable[[Union[pd.DatetimeIndex, pd.RangeIndex]], Sequence[float]],
+            ]
         ] = None,
         **prophet_kwargs,
     ):
@@ -92,11 +98,14 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
@@ -124,6 +133,32 @@ def __init__(
             Some optional keyword arguments for Prophet.
             For information about the parameters see:
             `The Prophet source code <https://github.com/facebook/prophet/blob/master/python/prophet/forecaster.py>`_.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import Prophet
+        >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
+        >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # adding a seasonality (daily, weekly and yearly are included by default) and holidays
+        >>> model = Prophet(
+        >>>     add_seasonalities={
+        >>>         'name':"quarterly_seasonality",
+        >>>         'seasonal_periods':4,
+        >>>         'fourier_order':5
+        >>>         },
+        >>> )
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[472.26891239],
+               [467.56955721],
+               [494.47230467],
+               [493.10568429],
+               [497.54686113],
+               [539.11716811]])
         """
 
         super().__init__(add_encoders=add_encoders)
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
index 6fe1b86f3e..d4e0b4e58e 100644
--- a/darts/models/forecasting/random_forest.py
+++ b/darts/models/forecasting/random_forest.py
@@ -95,6 +95,36 @@ def encode_year(idx):
             that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
         **kwargs
             Additional keyword arguments passed to `sklearn.ensemble.RandomForest`.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import RandomForest
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # random forest with 200 trees trained with MAE
+        >>> model = RandomForest(
+        >>>     lags=12,
+        >>>     lags_past_covariates=12,
+        >>>     lags_future_covariates=[0,1,2,3,4,5],
+        >>>     output_chunk_length=6,
+        >>>     n_estimators=200,
+        >>>     criterion="absolute_error",
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1006.29805],
+               [1006.23675],
+               [1006.17325],
+               [1006.10295],
+               [1006.06505],
+               [1006.05465]])
         """
         self.n_estimators = n_estimators
         self.max_depth = max_depth
diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py
index b26fd10b92..ae749015e8 100644
--- a/darts/models/forecasting/regression_ensemble_model.py
+++ b/darts/models/forecasting/regression_ensemble_model.py
@@ -71,6 +71,28 @@ def __init__(
         References
         ----------
         .. [1] D. H. Wolpert, “Stacked generalization”, Neural Networks, vol. 5, no. 2, pp. 241–259, Jan. 1992
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import RegressionEnsembleModel, NaiveSeasonal, LinearRegressionModel
+        >>> series = AirPassengersDataset().load()
+        >>> model = RegressionEnsembleModel(
+        >>>     forecasting_models = [
+        >>>         NaiveSeasonal(K=12),
+        >>>         LinearRegressionModel(lags=4)
+        >>>     ],
+        >>>     regression_train_n_points=20
+        >>> )
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[494.24050364],
+               [464.3869697 ],
+               [496.53180506],
+               [544.82269341],
+               [557.35256055],
+               [630.24334385]])
         """
         super().__init__(
             models=forecasting_models,
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index f1903b51b0..940ec4c2bc 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -129,6 +129,36 @@ def encode_year(idx):
             Whether the model should use static covariate information in case the input `series` passed to ``fit()``
             contain static covariates. If ``True``, and static covariates are available at fitting time, will enforce
             that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import RegressionModel
+        >>> from sklearn.linear_model import Ridge
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # wrap around the sklearn Ridge model
+        >>> model = RegressionModel(
+        >>>     model=Ridge(),
+        >>>     lags=12,
+        >>>     lags_past_covariates=4,
+        >>>     lags_future_covariates=(0,6),
+        >>>     output_chunk_length=6
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1005.73340676],
+               [1005.71159051],
+               [1005.7322616 ],
+               [1005.76314504],
+               [1005.82204348],
+               [1005.89100967]])
         """
 
         super().__init__(add_encoders=add_encoders)
diff --git a/darts/models/forecasting/rnn_model.py b/darts/models/forecasting/rnn_model.py
index 7db46a1345..f26cd4a90b 100644
--- a/darts/models/forecasting/rnn_model.py
+++ b/darts/models/forecasting/rnn_model.py
@@ -390,6 +390,36 @@ def encode_year(idx):
         show_warnings
             whether to show warnings raised from PyTorch Lightning. Useful to detect potential issues of
             your forecasting use case. Default: ``False``.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import RNNModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # `training_length` > `input_chunk_length` to mimic inference constraints
+        >>> model = RNNModel(
+        >>>     model="RNN",
+        >>>     input_chunk_length=6,
+        >>>     training_length=18,
+        >>>     n_epochs=20,
+        >>> )
+        >>> model.fit(target, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[ 3.18922903],
+               [ 1.17791019],
+               [ 0.39992814],
+               [ 0.13277921],
+               [ 0.02523252],
+               [-0.01829086]])
+
+        .. note::
+            `RNN example notebook <https://unit8co.github.io/darts/examples/04-RNN-examples.html>`_ presents techniques
+            that can be used to improve the forecasts quality compared to this simple usage example.
         """
         # create copy of model parameters
         model_kwargs = {key: val for key, val in self.model_params.items()}
diff --git a/darts/models/forecasting/sf_auto_arima.py b/darts/models/forecasting/sf_auto_arima.py
index 4cf674a453..91cc12c0c0 100644
--- a/darts/models/forecasting/sf_auto_arima.py
+++ b/darts/models/forecasting/sf_auto_arima.py
@@ -51,11 +51,14 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
@@ -64,12 +67,23 @@ def __init__(
 
         Examples
         --------
-        >>> from darts.models import StatsForecastAutoARIMA
         >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import StatsForecastAutoARIMA
+        >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
         >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # define StatsForecastAutoARIMA parameters
         >>> model = StatsForecastAutoARIMA(season_length=12)
-        >>> model.fit(series[:-36])
-        >>> pred = model.predict(36, num_samples=100)
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> pred.values()
+        array([[450.55179949],
+               [415.00597806],
+               [454.61353249],
+               [486.51218795],
+               [504.09229632],
+               [555.06463942]])
         """
         super().__init__(add_encoders=add_encoders)
         self.model = SFAutoARIMA(*autoarima_args, **autoarima_kwargs)
diff --git a/darts/models/forecasting/sf_auto_ces.py b/darts/models/forecasting/sf_auto_ces.py
index 53e0205eec..d12db5d5a5 100644
--- a/darts/models/forecasting/sf_auto_ces.py
+++ b/darts/models/forecasting/sf_auto_ces.py
@@ -30,12 +30,20 @@ def __init__(self, *autoces_args, **autoces_kwargs):
 
         Examples
         --------
-        >>> from darts.models import StatsForecastAutoCES
         >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import StatsForecastAutoCES
         >>> series = AirPassengersDataset().load()
+        >>> # define StatsForecastAutoCES parameters
         >>> model = StatsForecastAutoCES(season_length=12, model="Z")
-        >>> model.fit(series[:-36])
-        >>> pred = model.predict(36, num_samples=100)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[453.03417969],
+               [429.34039307],
+               [488.64471436],
+               [500.28955078],
+               [519.79962158],
+               [586.47503662]])
         """
         super().__init__()
         self.model = SFAutoCES(*autoces_args, **autoces_kwargs)
diff --git a/darts/models/forecasting/sf_auto_ets.py b/darts/models/forecasting/sf_auto_ets.py
index 9876234d23..d5971fa813 100644
--- a/darts/models/forecasting/sf_auto_ets.py
+++ b/darts/models/forecasting/sf_auto_ets.py
@@ -56,11 +56,14 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
@@ -71,10 +74,21 @@ def __init__(
         --------
         >>> from darts.datasets import AirPassengersDataset
         >>> from darts.models import StatsForecastAutoETS
+        >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries
         >>> series = AirPassengersDataset().load()
+        >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series
+        >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6)
+        >>> # define StatsForecastAutoETS parameters
         >>> model = StatsForecastAutoETS(season_length=12, model="AZZ")
-        >>> model.fit(series[:-36])
-        >>> pred = model.predict(36)
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> pred.values()
+        array([[441.40323676],
+               [415.09871431],
+               [448.90785391],
+               [491.38584654],
+               [493.11817462],
+               [549.88974472]])
         """
         super().__init__(add_encoders=add_encoders)
         self.model = SFAutoETS(*autoets_args, **autoets_kwargs)
diff --git a/darts/models/forecasting/sf_auto_theta.py b/darts/models/forecasting/sf_auto_theta.py
index 3882d49470..559517b362 100644
--- a/darts/models/forecasting/sf_auto_theta.py
+++ b/darts/models/forecasting/sf_auto_theta.py
@@ -38,12 +38,20 @@ def __init__(self, *autotheta_args, **autotheta_kwargs):
 
         Examples
         --------
-        >>> from darts.models import StatsForecastAutoTheta
         >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import StatsForecastAutoTheta
         >>> series = AirPassengersDataset().load()
+        >>> # define StatsForecastAutoTheta parameters
         >>> model = StatsForecastAutoTheta(season_length=12)
-        >>> model.fit(series[:-36])
-        >>> pred = model.predict(36, num_samples=100)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[442.94078295],
+               [432.22936898],
+               [495.30609727],
+               [482.30625563],
+               [487.49312172],
+               [555.57902659]])
         """
         super().__init__()
         self.model = SFAutoTheta(*autotheta_args, **autotheta_kwargs)
diff --git a/darts/models/forecasting/tbats_model.py b/darts/models/forecasting/tbats_model.py
index d1eaf6d1f5..4dabf1fa05 100644
--- a/darts/models/forecasting/tbats_model.py
+++ b/darts/models/forecasting/tbats_model.py
@@ -173,6 +173,23 @@ def __init__(
             See https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
         random_state
             Sets the underlying random seed at model initialization time.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import TBATS # or BATS
+        >>> series = AirPassengersDataset().load()
+        >>> # based on preliminary analysis, the series contains a trend
+        >>> model = TBATS(use_trend=True)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[448.29856017],
+               [439.42215052],
+               [507.73465028],
+               [493.03751671],
+               [498.85885374],
+               [564.64871897]])
         """
         super().__init__()
 
diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py
index a75e04eac8..3b9795b033 100644
--- a/darts/models/forecasting/tcn_model.py
+++ b/darts/models/forecasting/tcn_model.py
@@ -436,6 +436,35 @@ def encode_year(idx):
         .. [1] https://arxiv.org/abs/1803.01271
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import TCNModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # `output_chunk_length` must be strictly smaller than `input_chunk_length`
+        >>> model = TCNModel(
+        >>>     input_chunk_length=12,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=20,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[-80.48476824],
+               [-80.47896667],
+               [-41.77135603],
+               [-41.76158729],
+               [-41.76854107],
+               [-41.78166819]])
+
+        .. note::
+            `DeepTCN example notebook <https://unit8co.github.io/darts/examples/09-DeepTCN-examples.html>`_ presents
+            techniques that can be used to improve the forecasts quality compared to this simple usage example.
         """
 
         raise_if_not(
diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py
index 646670dbb8..af89d05e8f 100644
--- a/darts/models/forecasting/tft_model.py
+++ b/darts/models/forecasting/tft_model.py
@@ -883,6 +883,43 @@ def encode_year(idx):
         .. [2] Shazeer, Noam, "GLU Variants Improve Transformer", 2020. arVix https://arxiv.org/abs/2002.05202.
         .. [3] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import TFTModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # by default, TFTModel is trained using a `QuantileRegression` making it a probabilistic forecasting model
+        >>> model = TFTModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=5,
+        >>> )
+        >>> # future_covariates are mandatory for `TFTModel`
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> # TFTModel is probabilistic by definition; using `num_samples >> 1` to generate probabilistic forecasts
+        >>> pred = model.predict(6, num_samples=100)
+        >>> # shape : (forecast horizon, components, num_samples)
+        >>> pred.all_values().shape
+        (6, 1, 100)
+        >>> # showing the first 3 samples for each timestamp
+        >>> pred.all_values()[:,:,:3]
+        array([[[-0.06414202, -0.7188093 ,  0.52541292]],
+               [[ 0.02928407, -0.40867163,  1.19650033]],
+               [[ 0.77252372, -0.50859694,  0.360166  ]],
+               [[ 0.9586113 ,  1.24147138, -0.01625545]],
+               [[ 1.06863863,  0.2987822 , -0.69213369]],
+               [[-0.83076568, -0.25780816, -0.28318784]]])
+
+        .. note::
+            `TFT example notebook <https://unit8co.github.io/darts/examples/13-TFT-examples.html>`_ presents
+            techniques that can be used to improve the forecasts quality compared to this simple usage example.
         """
         model_kwargs = {key: val for key, val in self.model_params.items()}
         if likelihood is None and loss_fn is None:
diff --git a/darts/models/forecasting/theta.py b/darts/models/forecasting/theta.py
index 92eac610f9..40f7425431 100644
--- a/darts/models/forecasting/theta.py
+++ b/darts/models/forecasting/theta.py
@@ -58,6 +58,23 @@ def __init__(
         References
         ----------
         .. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf`
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import Theta
+        >>> series = AirPassengersDataset().load()
+        >>> # using the canonical Theta method
+        >>> model = Theta(theta=2)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[442.7256909 ],
+               [433.74381763],
+               [494.54534585],
+               [480.36937856],
+               [481.06675142],
+               [545.80068173]])
         """
 
         super().__init__()
@@ -250,6 +267,22 @@ def __init__(
         -----
         Even though this model is an improvement of :class:`Theta`, it is a naive
         implementation of the algorithm, which can potentially be slower.
+
+        Examples
+        --------
+        >>> from darts.datasets import AirPassengersDataset
+        >>> from darts.models import FourTheta
+        >>> series = AirPassengersDataset().load()
+        >>> model = FourTheta(theta=2)
+        >>> model.fit(series)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[443.3949283 ],
+               [434.39769555],
+               [495.28886231],
+               [481.08962991],
+               [481.78610361],
+               [546.61463773]])
         """
 
         super().__init__()
diff --git a/darts/models/forecasting/tide_model.py b/darts/models/forecasting/tide_model.py
index 7ae8f9ff1f..0d0d478f07 100644
--- a/darts/models/forecasting/tide_model.py
+++ b/darts/models/forecasting/tide_model.py
@@ -519,6 +519,36 @@ def encode_year(idx):
                 http://arxiv.org/abs/2304.08424
         .. [2] T. Kim et al. "Reversible Instance Normalization for Accurate Time-Series Forecasting against
                 Distribution Shift", https://openreview.net/forum?id=cGDAkQo1C0p
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import TiDEModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> model = TiDEModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=20
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1008.1667634 ],
+               [ 997.08337201],
+               [1017.72035839],
+               [1005.10790392],
+               [ 998.90537286],
+               [1005.91534452]])
+
+        .. note::
+            `TiDE example notebook <https://unit8co.github.io/darts/examples/18-TiDE-examples.html>`_ presents
+            techniques that can be used to improve the forecasts quality compared to this simple usage example.
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py
index ec445a4a32..5287bcfc71 100644
--- a/darts/models/forecasting/transformer_model.py
+++ b/darts/models/forecasting/transformer_model.py
@@ -532,11 +532,40 @@ def encode_year(idx):
         Disclaimer:
         This current implementation is fully functional and can already produce some good predictions. However,
         it is still limited in how it uses the Transformer architecture because the `tgt` input of
-        `torch.nn.Transformer` is not utlized to its full extent. Currently, we simply pass the last value of the
+        `torch.nn.Transformer` is not utilized to its full extent. Currently, we simply pass the last value of the
         `src` input to `tgt`. To get closer to the way the Transformer is usually used in language models, we
         should allow the model to consume its own output as part of the `tgt` argument, such that when predicting
         sequences of values, the input to the `tgt` argument would grow as outputs of the transformer model would be
         added to it. Of course, the training of the model would have to be adapted accordingly.
+
+        Examples
+        --------
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import TransformerModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> model = TransformerModel(
+        >>>     input_chunk_length=6,
+        >>>     output_chunk_length=6,
+        >>>     n_epochs=20
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[5.40498034],
+               [5.36561899],
+               [5.80616883],
+               [6.48695488],
+               [7.63158655],
+               [5.65417736]])
+
+        .. note::
+            `Transformer example notebook <https://unit8co.github.io/darts/examples/06-Transformer-examples.html>`_
+            presents techniques that can be used to improve the forecasts quality compared to this simple usage
+            example.
         """
         super().__init__(**self._extract_torch_model_params(**self.model_params))
 
diff --git a/darts/models/forecasting/varima.py b/darts/models/forecasting/varima.py
index e310e4b06e..ece88dba80 100644
--- a/darts/models/forecasting/varima.py
+++ b/darts/models/forecasting/varima.py
@@ -63,14 +63,39 @@ def __init__(
             .. highlight:: python
             .. code-block:: python
 
+                def encode_year(idx):
+                    return (idx.year - 1950) / 50
+
                 add_encoders={
                     'cyclic': {'future': ['month']},
                     'datetime_attribute': {'future': ['hour', 'dayofweek']},
                     'position': {'future': ['relative']},
-                    'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
+                    'custom': {'future': [encode_year]},
                     'transformer': Scaler()
                 }
             ..
+
+        Examples
+        --------
+        >>> from darts.datasets import ETTh2Dataset
+        >>> from darts.models import VARIMA
+        >>> from darts.utils.timeseries_generation import holidays_timeseries
+        >>> # forecasting the High UseFul Load ("HUFL") and Oil Temperature ("OT")
+        >>> series = ETTh2Dataset().load()[:500][["HUFL", "OT"]]
+        >>> # optionally, use some future covariates; e.g. encode each timestep whether it is on a holiday
+        >>> future_cov = holidays_timeseries(series.time_index, "CN", add_length=6)
+        >>> # no clear trend in the dataset
+        >>> model = VARIMA(trend="n")
+        >>> model.fit(series, future_covariates=future_cov)
+        >>> pred = model.predict(6, future_covariates=future_cov)
+        >>> # the two targets are predicted together
+        >>> pred.values()
+        array([[48.11846185, 47.94272629],
+               [49.85314633, 47.97713346],
+               [51.16145791, 47.99804203],
+               [52.14674087, 48.00872598],
+               [52.88729152, 48.01166578],
+               [53.44242919, 48.00874069]])
         """
         super().__init__(add_encoders=add_encoders)
         self.p = p
diff --git a/darts/models/forecasting/xgboost.py b/darts/models/forecasting/xgboost.py
index e16a881bc3..962eddbeba 100644
--- a/darts/models/forecasting/xgboost.py
+++ b/darts/models/forecasting/xgboost.py
@@ -115,6 +115,36 @@ def encode_year(idx):
             that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
         **kwargs
             Additional keyword arguments passed to `xgb.XGBRegressor`.
+
+        Examples
+        --------
+        Deterministic forecasting, using past/future covariates (optional)
+        >>> from darts.datasets import WeatherDataset
+        >>> from darts.models import XGBModel
+        >>> series = WeatherDataset().load()
+        >>> # predicting atmospheric pressure
+        >>> target = series['p (mbar)'][:100]
+        >>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
+        >>> past_cov = series['rain (mm)'][:100]
+        >>> # optionally, use future temperatures (pretending this component is a forecast)
+        >>> future_cov = series['T (degC)'][:106]
+        >>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
+        >>> # values corresponding to the forecasted period
+        >>> model = XGBModel(
+        >>>     lags=12,
+        >>>     lags_past_covariates=12,
+        >>>     lags_future_covariates=[0,1,2,3,4,5],
+        >>>     output_chunk_length=6,
+        >>> )
+        >>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
+        >>> pred = model.predict(6)
+        >>> pred.values()
+        array([[1005.9185 ],
+               [1005.8315 ],
+               [1005.7878 ],
+               [1005.72626],
+               [1005.7475 ],
+               [1005.76074]])
         """
         kwargs["random_state"] = random_state  # seed for tree learner
         self.kwargs = kwargs