From 13d81132ce3f96114fcedaf1a785877e2ebd5022 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Fri, 24 Nov 2023 18:50:05 +1100 Subject: [PATCH 01/11] MLflow autologging issue # 1618 MLflow integration example making pytorch calls and activating tensorboard collection on Darts' s pytortch models to do autologging with MLflow and activate MLflow UI. --- docs/userguide/torch_forecasting_models.md | 103 +++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index af5c7b92f4..63e8102ab0 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -461,6 +461,109 @@ model.fit(...) *Note* : The callback will give one more element in the `loss_logger.val_loss` as the model trainer performs a validation sanity check before the training begins. +#### Example with MLflow Autologging + +MLflow using interface (UI) and autologging to track Dart's pytorch models. +```python +import pandas as pd +from torchmetrics import MeanAbsolutePercentageError +from darts.dataprocessing.transformers import Scaler +from darts.datasets import AirPassengersDataset +from darts.models import NBEATSModel + +# read data +series = AirPassengersDataset().load() + +# create training and validation sets: +train, val = series.split_after(pd.Timestamp(year=1957, month=12, day=1)) + +# normalize the time series +transformer = Scaler() +train = transformer.fit_transform(train) +val = transformer.transform(val) + +# any TorchMetric or val_loss can be used as the monitor +torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError() + +# MLflow setup +## Run this command with environment activated: mlflow ui --port xxxx (e.g. 5000, 5001, 5002) +# Copy and paste url from command line to web browser +import mlflow +import torchmetrics +from mlflow.data.pandas_dataset import PandasDataset + +mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None, + log_models=True, log_datasets=True, disable=False, + exclusive=False, disable_for_unsupported_versions=False, + silent=False, registered_model_name=None, extra_tags=None + ) + +import mlflow.pytorch +from mlflow.client import MlflowClient + +model_name = "Darts" + +with mlflow.start_run(nested=True) as run: + + dataset: PandasDataset = mlflow.data.from_pandas(series, source="AirPassengersDataset") + + # Log the dataset to the MLflow Run. Specify the "training" context to indicate that the + # dataset is used for model training + mlflow.log_input(dataset, context="training") + + mlflow.log_param("model_type", "Darts_Pytorch_model") + mlflow.log_param("input_chunk_length", 24) + mlflow.log_param("output_chunk_length", 12) + mlflow.log_param("n_epochs", 500) + mlflow.log_param("model_name", 'NBEATS_MLflow') + mlflow.log_param("log_tensorboard", True) + mlflow.log_param("torch_metrics", "torchmetrics.regression.MeanAbsolutePercentageError()") + mlflow.log_param("nr_epochs_val_period", 1) + mlflow.log_param("pl_trainer_kwargs", "{callbacks: [loss_logger]}") + + + from pytorch_lightning.callbacks import Callback + + class LossLogger(Callback): + def __init__(self): + self.train_loss = [] + self.val_loss = [] + + # will automatically be called at the end of each epoch + def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.train_loss.append(float(trainer.callback_metrics["train_loss"])) + + def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.val_loss.append(float(trainer.callback_metrics["val_loss"])) + + + loss_logger = LossLogger() + + # create the model + model = NBEATSModel( + input_chunk_length=24, + output_chunk_length=12, + n_epochs=500, + model_name='NBEATS_MLflow', + log_tensorboard=True, + torch_metrics=torch_metrics, + nr_epochs_val_period=1, + pl_trainer_kwargs={"callbacks": [loss_logger]}) + + # use validation dataset + model.fit( + series=train, + val_series=val, + ) + + # predit + forecast = model.predict(len(val)) + +# Registering model +model_uri = f"runs:/{run.info.run_id}/darts-NBEATS" +mlflow.register_model(model_uri=model_uri, name=model_name) +``` + ## Performance Recommendations This section recaps the main factors impacting the performance when training and using torch-based models. From c15eb1e9244d29cca103d81700f2d9ecf6c5ecf4 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:26:27 +1100 Subject: [PATCH 02/11] Update torch_forecasting_models.md Adding first two suggestions raised in MLflow autologging issue # 1618 #2092 via comments. --- docs/userguide/torch_forecasting_models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 63e8102ab0..5d145f149c 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -466,6 +466,7 @@ model.fit(...) MLflow using interface (UI) and autologging to track Dart's pytorch models. ```python import pandas as pd +import torchmetrics from torchmetrics import MeanAbsolutePercentageError from darts.dataprocessing.transformers import Scaler from darts.datasets import AirPassengersDataset @@ -489,7 +490,6 @@ torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError() ## Run this command with environment activated: mlflow ui --port xxxx (e.g. 5000, 5001, 5002) # Copy and paste url from command line to web browser import mlflow -import torchmetrics from mlflow.data.pandas_dataset import PandasDataset mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None, @@ -505,7 +505,7 @@ model_name = "Darts" with mlflow.start_run(nested=True) as run: - dataset: PandasDataset = mlflow.data.from_pandas(series, source="AirPassengersDataset") + dataset: PandasDataset = mlflow.data.from_pandas(series.pd_dataframe(), source="AirPassengersDataset") # Log the dataset to the MLflow Run. Specify the "training" context to indicate that the # dataset is used for model training From cb5dbaf9f857554c1fc166bbe5935faf330c8143 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 11 Mar 2024 22:34:33 +1100 Subject: [PATCH 03/11] Saving pip and conda env Adding code to save pip requirements and conda environment required to run the model via MLFlow. --- docs/userguide/torch_forecasting_models.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 59ee0ed044..28b4f5e88c 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -559,6 +559,12 @@ with mlflow.start_run(nested=True) as run: # predit forecast = model.predict(len(val)) +# Save conda environment used to run the model +mlflow.pytorch.get_default_conda_env() + +# Save pip requirements +mlflow.pytorch.get_default_pip_requirements() + # Registering model model_uri = f"runs:/{run.info.run_id}/darts-NBEATS" mlflow.register_model(model_uri=model_uri, name=model_name) From 22f75f5a1fb7a2fe50cd23a9ea83ff19341539e7 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 11 Mar 2024 22:41:17 +1100 Subject: [PATCH 04/11] Track and save model Adding a way to track and save model before registering it using MLFlow. --- docs/userguide/torch_forecasting_models.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 28b4f5e88c..169517b718 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -565,7 +565,14 @@ mlflow.pytorch.get_default_conda_env() # Save pip requirements mlflow.pytorch.get_default_pip_requirements() +# Set tracking uri +mlflow.set_tracking_uri("sqlite:///mlruns.db") + +# Save Darts model +mlflow.log_artifact("NBeatsModel.pickle") + # Registering model +model_name = "NBEATS" model_uri = f"runs:/{run.info.run_id}/darts-NBEATS" mlflow.register_model(model_uri=model_uri, name=model_name) ``` From ee5593276bff8bf23c8aaed88f4d6b30490ed5e2 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 11 Mar 2024 22:55:01 +1100 Subject: [PATCH 05/11] Expanding comment Expanding comment to create a new cell to log the artifact and register the model after a model run id has been created. --- docs/userguide/torch_forecasting_models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 169517b718..3f2e5d22df 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -568,7 +568,7 @@ mlflow.pytorch.get_default_pip_requirements() # Set tracking uri mlflow.set_tracking_uri("sqlite:///mlruns.db") -# Save Darts model +# Save Darts model (this need to be added via new cell) mlflow.log_artifact("NBeatsModel.pickle") # Registering model From ce635bc838f2bc187a9b1cff18f029df45bd7265 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Thu, 4 Apr 2024 21:55:41 +1100 Subject: [PATCH 06/11] Updating table of content Updating table of content at the top of the file/page. --- docs/userguide/torch_forecasting_models.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 3f2e5d22df..e53caae827 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -25,6 +25,7 @@ We assume that you already know about covariates in Darts. If you're new to the - [Callbacks](#callbacks) - [Early Stopping](#example-with-early-stopping) - [Custom Callback](#example-of-custom-callback-to-store-losses) + - [MLFlow: train, track and monitor](#example-with-mlflow-autologging) 4. [Performance optimisation section](#performance-recommendations) lists tricks to speed up the computation during training. From f7982440b876ff6d07761a2ed6ac8717fe93a1c0 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Sat, 6 Apr 2024 16:20:05 +1100 Subject: [PATCH 07/11] Making log more concise Making the log parameters method more concise as suggested. --- docs/userguide/torch_forecasting_models.md | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index e53caae827..7ce32fa134 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -512,33 +512,20 @@ with mlflow.start_run(nested=True) as run: # dataset is used for model training mlflow.log_input(dataset, context="training") - mlflow.log_param("model_type", "Darts_Pytorch_model") - mlflow.log_param("input_chunk_length", 24) - mlflow.log_param("output_chunk_length", 12) - mlflow.log_param("n_epochs", 500) - mlflow.log_param("model_name", 'NBEATS_MLflow') - mlflow.log_param("log_tensorboard", True) - mlflow.log_param("torch_metrics", "torchmetrics.regression.MeanAbsolutePercentageError()") - mlflow.log_param("nr_epochs_val_period", 1) - mlflow.log_param("pl_trainer_kwargs", "{callbacks: [loss_logger]}") - - - from pytorch_lightning.callbacks import Callback - - class LossLogger(Callback): - def __init__(self): - self.train_loss = [] - self.val_loss = [] - - # will automatically be called at the end of each epoch - def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: - self.train_loss.append(float(trainer.callback_metrics["train_loss"])) - - def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: - self.val_loss.append(float(trainer.callback_metrics["val_loss"])) - - - loss_logger = LossLogger() + # Define model hyperparameters to log + params = { + "model_type": "Darts_Pytorch_model", + "input_chunk_length": 24, + "output_chunk_length": 12, + "n_epochs": 500, + "model_name": "NBEATS_MLflow", + "log_tensorboard": True, + "torch_metrics": "torchmetrics.regression.MeanAbsolutePercentageError()", + "nr_epochs_val_period": 1, + } + + # Log hyperparameters + mlflow.log_params(params) # create the model model = NBEATSModel( @@ -549,7 +536,7 @@ with mlflow.start_run(nested=True) as run: log_tensorboard=True, torch_metrics=torch_metrics, nr_epochs_val_period=1, - pl_trainer_kwargs={"callbacks": [loss_logger]}) + ) # use validation dataset model.fit( From b839c67ce2f970b23d3d9b41e9b4a0ab4ae35315 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:23:05 +1000 Subject: [PATCH 08/11] Indentation fix Fixed indentation that was off. --- docs/userguide/torch_forecasting_models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 7ce32fa134..11961d3099 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -25,7 +25,7 @@ We assume that you already know about covariates in Darts. If you're new to the - [Callbacks](#callbacks) - [Early Stopping](#example-with-early-stopping) - [Custom Callback](#example-of-custom-callback-to-store-losses) - - [MLFlow: train, track and monitor](#example-with-mlflow-autologging) + - [MLFlow: train, track and monitor](#example-with-mlflow-autologging) 4. [Performance optimisation section](#performance-recommendations) lists tricks to speed up the computation during training. From 5390383ecc118ea5c98f59708d458083e0c442ca Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:30:58 +1000 Subject: [PATCH 09/11] Update docs/userguide/torch_forecasting_models.md Adding suggestion, thanks! Co-authored-by: madtoinou <32447896+madtoinou@users.noreply.github.com> --- docs/userguide/torch_forecasting_models.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 11961d3099..3a8987fcea 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -529,13 +529,8 @@ with mlflow.start_run(nested=True) as run: # create the model model = NBEATSModel( - input_chunk_length=24, - output_chunk_length=12, - n_epochs=500, - model_name='NBEATS_MLflow', - log_tensorboard=True, + **params, torch_metrics=torch_metrics, - nr_epochs_val_period=1, ) # use validation dataset From 65dbd483ef0e8e25ce8a593b9f58277a6ff6fdd8 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:37:16 +1000 Subject: [PATCH 10/11] Resolving torcmetrics Resolving torchmetrics issue. --- docs/userguide/torch_forecasting_models.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index 037623cc93..b777aa1d03 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -485,9 +485,6 @@ transformer = Scaler() train = transformer.fit_transform(train) val = transformer.transform(val) -# any TorchMetric or val_loss can be used as the monitor -torch_metrics = torchmetrics.regression.MeanAbsolutePercentageError() - # MLflow setup ## Run this command with environment activated: mlflow ui --port xxxx (e.g. 5000, 5001, 5002) # Copy and paste url from command line to web browser @@ -515,13 +512,12 @@ with mlflow.start_run(nested=True) as run: # Define model hyperparameters to log params = { - "model_type": "Darts_Pytorch_model", "input_chunk_length": 24, "output_chunk_length": 12, "n_epochs": 500, "model_name": "NBEATS_MLflow", "log_tensorboard": True, - "torch_metrics": "torchmetrics.regression.MeanAbsolutePercentageError()", + "torch_metrics": MeanAbsolutePercentageError(), "nr_epochs_val_period": 1, } @@ -531,7 +527,6 @@ with mlflow.start_run(nested=True) as run: # create the model model = NBEATSModel( **params, - torch_metrics=torch_metrics, ) # use validation dataset From 3593454143f61d42b132c1413a40b34fd30d2d09 Mon Sep 17 00:00:00 2001 From: cargecla1 <138342606+cargecla1@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:48:56 +1000 Subject: [PATCH 11/11] Update MLFlow saving method Updating the MLFlow saving method including tracking uri. --- docs/userguide/torch_forecasting_models.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index b777aa1d03..2988295df6 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -500,7 +500,7 @@ mlflow.pytorch.autolog(log_every_n_epoch=1, log_every_n_step=None, import mlflow.pytorch from mlflow.client import MlflowClient -model_name = "Darts" +model_name = "darts-NBEATS" with mlflow.start_run(nested=True) as run: @@ -545,14 +545,13 @@ mlflow.pytorch.get_default_conda_env() mlflow.pytorch.get_default_pip_requirements() # Set tracking uri -mlflow.set_tracking_uri("sqlite:///mlruns.db") +model_uri = f"runs:/{run.info.run_id}/darts-NBEATS" -# Save Darts model (this need to be added via new cell) -mlflow.log_artifact("NBeatsModel.pickle") +# Save Darts model as an artifact +model_path = 'nbeats_air_passengers' +mlflow.sklearn.save_model(model, model_path) # Registering model -model_name = "NBEATS" -model_uri = f"runs:/{run.info.run_id}/darts-NBEATS" mlflow.register_model(model_uri=model_uri, name=model_name) ```