From 13d81132ce3f96114fcedaf1a785877e2ebd5022 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Fri, 24 Nov 2023 18:50:05 +1100
Subject: [PATCH 01/11] MLflow autologging issue # 1618

MLflow integration example making pytorch calls and activating tensorboard collection on Darts' s pytortch models to do autologging with MLflow and activate MLflow UI.
---
 docs/userguide/torch_forecasting_models.md | 103 +++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index af5c7b92f4..63e8102ab0 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -461,6 +461,109 @@ model.fit(...)
 
 *Note* : The callback will give one more element in the `loss_logger.val_loss` as the model trainer performs a validation sanity check before the training begins.
 
+#### Example with MLflow Autologging
+
+MLflow using interface (UI) and autologging to track Dart's pytorch models. 
+```python
+import pandas as pd
+from torchmetrics import MeanAbsolutePercentageError
+from darts.dataprocessing.transformers import Scaler
+from darts.datasets import AirPassengersDataset
+from darts.models import NBEATSModel
+
+# read data
+series = AirPassengersDataset().load()
+
+# create training and validation sets:
+train, val = series.split_after(pd.Timestamp(year=1957, month=12, day=1))
+
+# normalize the time series
+transformer = Scaler()
+train = transformer.fit_transform(train)
+val = transformer.transform(val)
+
+# any TorchMetric or val_loss can be used as the monitor
+torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError()
+
+# MLflow setup
+## Run this command with environment activated: mlflow ui --port xxxx  (e.g. 5000, 5001, 5002)
+# Copy and paste url from command line to web browser
+import mlflow
+import torchmetrics
+from mlflow.data.pandas_dataset import PandasDataset
+
+mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None, 
+                        log_models=True, log_datasets=True, disable=False, 
+                        exclusive=False, disable_for_unsupported_versions=False, 
+                        silent=False, registered_model_name=None, extra_tags=None
+                        )
+
+import mlflow.pytorch
+from mlflow.client import MlflowClient
+
+model_name = "Darts"
+
+with mlflow.start_run(nested=True) as run:
+
+    dataset: PandasDataset = mlflow.data.from_pandas(series, source="AirPassengersDataset")
+
+    # Log the dataset to the MLflow Run. Specify the "training" context to indicate that the
+    # dataset is used for model training
+    mlflow.log_input(dataset, context="training")
+   
+    mlflow.log_param("model_type", "Darts_Pytorch_model")
+    mlflow.log_param("input_chunk_length", 24)
+    mlflow.log_param("output_chunk_length", 12)                   
+    mlflow.log_param("n_epochs", 500)
+    mlflow.log_param("model_name", 'NBEATS_MLflow')
+    mlflow.log_param("log_tensorboard", True)
+    mlflow.log_param("torch_metrics", "torchmetrics.regression.MeanAbsolutePercentageError()")
+    mlflow.log_param("nr_epochs_val_period", 1)
+    mlflow.log_param("pl_trainer_kwargs", "{callbacks: [loss_logger]}")    
+   
+   
+    from pytorch_lightning.callbacks import Callback
+
+    class LossLogger(Callback):
+        def __init__(self):
+            self.train_loss = []
+            self.val_loss = []
+
+        # will automatically be called at the end of each epoch
+        def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+            self.train_loss.append(float(trainer.callback_metrics["train_loss"]))
+
+        def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+            self.val_loss.append(float(trainer.callback_metrics["val_loss"]))
+
+
+    loss_logger = LossLogger()    
+
+    # create the model
+    model = NBEATSModel(
+        input_chunk_length=24,
+        output_chunk_length=12,
+        n_epochs=500,
+        model_name='NBEATS_MLflow',
+        log_tensorboard=True,
+        torch_metrics=torch_metrics,
+        nr_epochs_val_period=1,
+        pl_trainer_kwargs={"callbacks": [loss_logger]})
+
+    # use validation dataset
+    model.fit(
+        series=train,
+        val_series=val,
+        )
+
+    # predit
+    forecast = model.predict(len(val))
+
+# Registering model
+model_uri = f"runs:/{run.info.run_id}/darts-NBEATS"
+mlflow.register_model(model_uri=model_uri, name=model_name)
+```
+
 ## Performance Recommendations
 This section recaps the main factors impacting the performance when
 training and using torch-based models.

From c15eb1e9244d29cca103d81700f2d9ecf6c5ecf4 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Wed, 21 Feb 2024 18:26:27 +1100
Subject: [PATCH 02/11] Update torch_forecasting_models.md

Adding first two suggestions raised in MLflow autologging issue # 1618 #2092 via comments.
---
 docs/userguide/torch_forecasting_models.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 63e8102ab0..5d145f149c 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -466,6 +466,7 @@ model.fit(...)
 MLflow using interface (UI) and autologging to track Dart's pytorch models. 
 ```python
 import pandas as pd
+import torchmetrics
 from torchmetrics import MeanAbsolutePercentageError
 from darts.dataprocessing.transformers import Scaler
 from darts.datasets import AirPassengersDataset
@@ -489,7 +490,6 @@ torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError()
 ## Run this command with environment activated: mlflow ui --port xxxx  (e.g. 5000, 5001, 5002)
 # Copy and paste url from command line to web browser
 import mlflow
-import torchmetrics
 from mlflow.data.pandas_dataset import PandasDataset
 
 mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None, 
@@ -505,7 +505,7 @@ model_name = "Darts"
 
 with mlflow.start_run(nested=True) as run:
 
-    dataset: PandasDataset = mlflow.data.from_pandas(series, source="AirPassengersDataset")
+    dataset: PandasDataset = mlflow.data.from_pandas(series.pd_dataframe(), source="AirPassengersDataset")
 
     # Log the dataset to the MLflow Run. Specify the "training" context to indicate that the
     # dataset is used for model training

From cb5dbaf9f857554c1fc166bbe5935faf330c8143 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 11 Mar 2024 22:34:33 +1100
Subject: [PATCH 03/11] Saving pip and conda env

Adding code to save pip requirements and conda environment required to run the model via MLFlow.
---
 docs/userguide/torch_forecasting_models.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 59ee0ed044..28b4f5e88c 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -559,6 +559,12 @@ with mlflow.start_run(nested=True) as run:
     # predit
     forecast = model.predict(len(val))
 
+# Save conda environment used to run the model
+mlflow.pytorch.get_default_conda_env()
+
+# Save pip requirements
+mlflow.pytorch.get_default_pip_requirements()
+
 # Registering model
 model_uri = f"runs:/{run.info.run_id}/darts-NBEATS"
 mlflow.register_model(model_uri=model_uri, name=model_name)

From 22f75f5a1fb7a2fe50cd23a9ea83ff19341539e7 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 11 Mar 2024 22:41:17 +1100
Subject: [PATCH 04/11] Track and save model

Adding a way to track and save model before registering it using MLFlow.
---
 docs/userguide/torch_forecasting_models.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 28b4f5e88c..169517b718 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -565,7 +565,14 @@ mlflow.pytorch.get_default_conda_env()
 # Save pip requirements
 mlflow.pytorch.get_default_pip_requirements()
 
+# Set tracking uri
+mlflow.set_tracking_uri("sqlite:///mlruns.db")
+
+# Save Darts model
+mlflow.log_artifact("NBeatsModel.pickle")
+
 # Registering model
+model_name = "NBEATS"
 model_uri = f"runs:/{run.info.run_id}/darts-NBEATS"
 mlflow.register_model(model_uri=model_uri, name=model_name)
 ```

From ee5593276bff8bf23c8aaed88f4d6b30490ed5e2 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 11 Mar 2024 22:55:01 +1100
Subject: [PATCH 05/11] Expanding comment

Expanding comment to create a new cell to log the artifact and register the model after a model run id has been created.
---
 docs/userguide/torch_forecasting_models.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 169517b718..3f2e5d22df 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -568,7 +568,7 @@ mlflow.pytorch.get_default_pip_requirements()
 # Set tracking uri
 mlflow.set_tracking_uri("sqlite:///mlruns.db")
 
-# Save Darts model
+# Save Darts model (this need to be added via new cell)
 mlflow.log_artifact("NBeatsModel.pickle")
 
 # Registering model

From ce635bc838f2bc187a9b1cff18f029df45bd7265 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Thu, 4 Apr 2024 21:55:41 +1100
Subject: [PATCH 06/11] Updating table of content

Updating table of content at the top of the file/page.
---
 docs/userguide/torch_forecasting_models.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 3f2e5d22df..e53caae827 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -25,6 +25,7 @@ We assume that you already know about covariates in Darts. If you're new to the
     - [Callbacks](#callbacks)
       - [Early Stopping](#example-with-early-stopping)
       - [Custom Callback](#example-of-custom-callback-to-store-losses)
+      - [MLFlow: train, track and monitor](#example-with-mlflow-autologging)
 
 4. [Performance optimisation section](#performance-recommendations) lists tricks to speed up the computation during training.
 

From f7982440b876ff6d07761a2ed6ac8717fe93a1c0 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Sat, 6 Apr 2024 16:20:05 +1100
Subject: [PATCH 07/11] Making log more concise

Making the log parameters method more concise as suggested.
---
 docs/userguide/torch_forecasting_models.md | 43 ++++++++--------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index e53caae827..7ce32fa134 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -512,33 +512,20 @@ with mlflow.start_run(nested=True) as run:
     # dataset is used for model training
     mlflow.log_input(dataset, context="training")
    
-    mlflow.log_param("model_type", "Darts_Pytorch_model")
-    mlflow.log_param("input_chunk_length", 24)
-    mlflow.log_param("output_chunk_length", 12)                   
-    mlflow.log_param("n_epochs", 500)
-    mlflow.log_param("model_name", 'NBEATS_MLflow')
-    mlflow.log_param("log_tensorboard", True)
-    mlflow.log_param("torch_metrics", "torchmetrics.regression.MeanAbsolutePercentageError()")
-    mlflow.log_param("nr_epochs_val_period", 1)
-    mlflow.log_param("pl_trainer_kwargs", "{callbacks: [loss_logger]}")    
-   
-   
-    from pytorch_lightning.callbacks import Callback
-
-    class LossLogger(Callback):
-        def __init__(self):
-            self.train_loss = []
-            self.val_loss = []
-
-        # will automatically be called at the end of each epoch
-        def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
-            self.train_loss.append(float(trainer.callback_metrics["train_loss"]))
-
-        def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
-            self.val_loss.append(float(trainer.callback_metrics["val_loss"]))
-
-
-    loss_logger = LossLogger()    
+    # Define model hyperparameters to log
+    params = {
+              "model_type": "Darts_Pytorch_model",
+              "input_chunk_length": 24,
+              "output_chunk_length": 12,
+              "n_epochs": 500,
+              "model_name": "NBEATS_MLflow",
+              "log_tensorboard": True,
+              "torch_metrics": "torchmetrics.regression.MeanAbsolutePercentageError()",
+              "nr_epochs_val_period": 1,
+        }
+    
+    # Log hyperparameters
+    mlflow.log_params(params)    
 
     # create the model
     model = NBEATSModel(
@@ -549,7 +536,7 @@ with mlflow.start_run(nested=True) as run:
         log_tensorboard=True,
         torch_metrics=torch_metrics,
         nr_epochs_val_period=1,
-        pl_trainer_kwargs={"callbacks": [loss_logger]})
+        )
 
     # use validation dataset
     model.fit(

From b839c67ce2f970b23d3d9b41e9b4a0ab4ae35315 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:23:05 +1000
Subject: [PATCH 08/11] Indentation fix

Fixed indentation that was off.
---
 docs/userguide/torch_forecasting_models.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 7ce32fa134..11961d3099 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -25,7 +25,7 @@ We assume that you already know about covariates in Darts. If you're new to the
     - [Callbacks](#callbacks)
       - [Early Stopping](#example-with-early-stopping)
       - [Custom Callback](#example-of-custom-callback-to-store-losses)
-      - [MLFlow: train, track and monitor](#example-with-mlflow-autologging)
+    - [MLFlow: train, track and monitor](#example-with-mlflow-autologging)
 
 4. [Performance optimisation section](#performance-recommendations) lists tricks to speed up the computation during training.
 

From 5390383ecc118ea5c98f59708d458083e0c442ca Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:30:58 +1000
Subject: [PATCH 09/11] Update docs/userguide/torch_forecasting_models.md

Adding suggestion, thanks!

Co-authored-by: madtoinou <32447896+madtoinou@users.noreply.github.com>
---
 docs/userguide/torch_forecasting_models.md | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 11961d3099..3a8987fcea 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -529,13 +529,8 @@ with mlflow.start_run(nested=True) as run:
 
     # create the model
     model = NBEATSModel(
-        input_chunk_length=24,
-        output_chunk_length=12,
-        n_epochs=500,
-        model_name='NBEATS_MLflow',
-        log_tensorboard=True,
+        **params,
         torch_metrics=torch_metrics,
-        nr_epochs_val_period=1,
         )
 
     # use validation dataset

From 65dbd483ef0e8e25ce8a593b9f58277a6ff6fdd8 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:37:16 +1000
Subject: [PATCH 10/11] Resolving torcmetrics

Resolving torchmetrics issue.
---
 docs/userguide/torch_forecasting_models.md | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index 037623cc93..b777aa1d03 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -485,9 +485,6 @@ transformer = Scaler()
 train = transformer.fit_transform(train)
 val = transformer.transform(val)
 
-# any TorchMetric or val_loss can be used as the monitor
-torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError()
-
 # MLflow setup
 ## Run this command with environment activated: mlflow ui --port xxxx  (e.g. 5000, 5001, 5002)
 # Copy and paste url from command line to web browser
@@ -515,13 +512,12 @@ with mlflow.start_run(nested=True) as run:
    
     # Define model hyperparameters to log
     params = {
-              "model_type": "Darts_Pytorch_model",
               "input_chunk_length": 24,
               "output_chunk_length": 12,
               "n_epochs": 500,
               "model_name": "NBEATS_MLflow",
               "log_tensorboard": True,
-              "torch_metrics": "torchmetrics.regression.MeanAbsolutePercentageError()",
+              "torch_metrics": MeanAbsolutePercentageError(),
               "nr_epochs_val_period": 1,
         }
     
@@ -531,7 +527,6 @@ with mlflow.start_run(nested=True) as run:
     # create the model
     model = NBEATSModel(
         **params,
-        torch_metrics=torch_metrics,
         )
 
     # use validation dataset

From 3593454143f61d42b132c1413a40b34fd30d2d09 Mon Sep 17 00:00:00 2001
From: cargecla1 <138342606+cargecla1@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:48:56 +1000
Subject: [PATCH 11/11] Update MLFlow saving method

Updating the MLFlow saving method including tracking uri.
---
 docs/userguide/torch_forecasting_models.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md
index b777aa1d03..2988295df6 100644
--- a/docs/userguide/torch_forecasting_models.md
+++ b/docs/userguide/torch_forecasting_models.md
@@ -500,7 +500,7 @@ mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None,
 import mlflow.pytorch
 from mlflow.client import MlflowClient
 
-model_name = "Darts"
+model_name = "darts-NBEATS"
 
 with mlflow.start_run(nested=True) as run:
 
@@ -545,14 +545,13 @@ mlflow.pytorch.get_default_conda_env()
 mlflow.pytorch.get_default_pip_requirements()
 
 # Set tracking uri
-mlflow.set_tracking_uri("sqlite:///mlruns.db")
+model_uri = f"runs:/{run.info.run_id}/darts-NBEATS"
 
-# Save Darts model (this need to be added via new cell)
-mlflow.log_artifact("NBeatsModel.pickle")
+# Save Darts model as an artifact
+model_path = 'nbeats_air_passengers'
+mlflow.sklearn.save_model(model, model_path)
 
 # Registering model
-model_name = "NBEATS"
-model_uri = f"runs:/{run.info.run_id}/darts-NBEATS"
 mlflow.register_model(model_uri=model_uri, name=model_name)
 ```