From 29c2c3672470401d6581d3c0192980d69915b349 Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 26 Mar 2024 11:28:55 +0700 Subject: [PATCH 01/13] add vessl callback --- src/axolotl/cli/__init__.py | 3 +++ src/axolotl/core/trainer_builder.py | 6 ++++++ src/axolotl/utils/callbacks/puree_.py | 16 ++++++++++++++++ src/axolotl/utils/puree_.py | 12 ++++++++++++ 4 files changed, 37 insertions(+) create mode 100644 src/axolotl/utils/callbacks/puree_.py create mode 100644 src/axolotl/utils/puree_.py diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py index 79a9d31938..6fea4054d8 100644 --- a/src/axolotl/cli/__init__.py +++ b/src/axolotl/cli/__init__.py @@ -38,6 +38,7 @@ from axolotl.utils.distributed import is_main_process from axolotl.utils.mlflow_ import setup_mlflow_env_vars from axolotl.utils.models import load_tokenizer +from axolotl.utils.puree_ import setup_puree_env_vars from axolotl.utils.tokenization import check_dataset_labels from axolotl.utils.trainer import prepare_optim_env from axolotl.utils.wandb_ import setup_wandb_env_vars @@ -369,6 +370,8 @@ def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs): setup_mlflow_env_vars(cfg) + setup_puree_env_vars(cfg) + return cfg diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index b23a8a1245..94169af55b 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -892,6 +892,12 @@ def get_callbacks(self): callbacks.append( SaveAxolotlConfigtoMlflowCallback(self.cfg.axolotl_config_path) ) + if self.cfg.use_vessl: + from axolotl.utils.callbacks.puree_ import VesslLogCheckpointCallback + + callbacks.append( + VesslLogCheckpointCallback() + ) if self.cfg.loss_watchdog_threshold is not None: callbacks.append(LossWatchDogCallback(self.cfg)) diff --git a/src/axolotl/utils/callbacks/puree_.py b/src/axolotl/utils/callbacks/puree_.py new file mode 100644 index 0000000000..616693ffdb --- /dev/null +++ b/src/axolotl/utils/callbacks/puree_.py @@ -0,0 +1,16 @@ +"""Puree module for trainer callbacks""" +import logging +from typing import Dict + +import vessl +from transformers import TrainerCallback, TrainerControl, TrainerState +from transformers.training_args import TrainingArguments + +LOG = logging.getLogger("axolotl.callbacks") + +class VesslLogCheckpointCallback(TrainerCallback): + + def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): + if state.is_world_process_zero: + vessl.log(logs, state.global_step) + diff --git a/src/axolotl/utils/puree_.py b/src/axolotl/utils/puree_.py new file mode 100644 index 0000000000..ed8873b9e4 --- /dev/null +++ b/src/axolotl/utils/puree_.py @@ -0,0 +1,12 @@ +"""Module for puree utilities""" + +import os + +from axolotl.utils.dict import DictDefault + + +def setup_puree_env_vars(cfg: DictDefault): + # Enable VESSL if Run Config is present + credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") + if credential_path: + cfg.use_vessl = True From ebf2b364d861c252d052eac560514c596ca7bbf2 Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 26 Mar 2024 11:33:47 +0700 Subject: [PATCH 02/13] pass vessl credential --- src/axolotl/core/trainer_builder.py | 2 +- src/axolotl/utils/callbacks/puree_.py | 5 ++++- src/axolotl/utils/puree_.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 94169af55b..1833493f79 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -896,7 +896,7 @@ def get_callbacks(self): from axolotl.utils.callbacks.puree_ import VesslLogCheckpointCallback callbacks.append( - VesslLogCheckpointCallback() + VesslLogCheckpointCallback(self.cfg.vessl_credential_path) ) if self.cfg.loss_watchdog_threshold is not None: diff --git a/src/axolotl/utils/callbacks/puree_.py b/src/axolotl/utils/callbacks/puree_.py index 616693ffdb..163a76f6a0 100644 --- a/src/axolotl/utils/callbacks/puree_.py +++ b/src/axolotl/utils/callbacks/puree_.py @@ -10,7 +10,10 @@ class VesslLogCheckpointCallback(TrainerCallback): - def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): + def __init__(self, credential_path) -> None: + vessl.configure(credentials_file=credential_path) + + def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): if state.is_world_process_zero: vessl.log(logs, state.global_step) diff --git a/src/axolotl/utils/puree_.py b/src/axolotl/utils/puree_.py index ed8873b9e4..d7372cd3bd 100644 --- a/src/axolotl/utils/puree_.py +++ b/src/axolotl/utils/puree_.py @@ -10,3 +10,4 @@ def setup_puree_env_vars(cfg: DictDefault): credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") if credential_path: cfg.use_vessl = True + cfg.vessl_credential_path = credential_path From f7806475a4d6e34dbf3eb792a61bfd93f1457d3b Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 26 Mar 2024 15:01:43 +0700 Subject: [PATCH 03/13] rename to vessl and add default metrics --- src/axolotl/cli/__init__.py | 4 ++-- src/axolotl/core/trainer_builder.py | 16 ++++++++++------ .../utils/callbacks/{puree_.py => vessl_.py} | 16 ++++++++++++---- src/axolotl/utils/puree_.py | 13 ------------- src/axolotl/utils/vessl_.py | 17 +++++++++++++++++ 5 files changed, 41 insertions(+), 25 deletions(-) rename src/axolotl/utils/callbacks/{puree_.py => vessl_.py} (53%) delete mode 100644 src/axolotl/utils/puree_.py create mode 100644 src/axolotl/utils/vessl_.py diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py index 6fea4054d8..ffdc488117 100644 --- a/src/axolotl/cli/__init__.py +++ b/src/axolotl/cli/__init__.py @@ -38,9 +38,9 @@ from axolotl.utils.distributed import is_main_process from axolotl.utils.mlflow_ import setup_mlflow_env_vars from axolotl.utils.models import load_tokenizer -from axolotl.utils.puree_ import setup_puree_env_vars from axolotl.utils.tokenization import check_dataset_labels from axolotl.utils.trainer import prepare_optim_env +from axolotl.utils.vessl_ import setup_vessl_env_vars from axolotl.utils.wandb_ import setup_wandb_env_vars project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) @@ -370,7 +370,7 @@ def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs): setup_mlflow_env_vars(cfg) - setup_puree_env_vars(cfg) + setup_vessl_env_vars(cfg) return cfg diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 1833493f79..871e38a6b8 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -836,6 +836,16 @@ def get_callbacks(self) -> List[TrainerCallback]: SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path) ) + if self.cfg.use_vessl: + from axolotl.utils.callbacks.vessl_ import VesslLogStepMetricsCallback + + callbacks.append( + VesslLogStepMetricsCallback( + self.cfg.vessl_credential_path, + self.cfg.vessl_metrics, + ) + ) + return callbacks @abstractmethod @@ -892,12 +902,6 @@ def get_callbacks(self): callbacks.append( SaveAxolotlConfigtoMlflowCallback(self.cfg.axolotl_config_path) ) - if self.cfg.use_vessl: - from axolotl.utils.callbacks.puree_ import VesslLogCheckpointCallback - - callbacks.append( - VesslLogCheckpointCallback(self.cfg.vessl_credential_path) - ) if self.cfg.loss_watchdog_threshold is not None: callbacks.append(LossWatchDogCallback(self.cfg)) diff --git a/src/axolotl/utils/callbacks/puree_.py b/src/axolotl/utils/callbacks/vessl_.py similarity index 53% rename from src/axolotl/utils/callbacks/puree_.py rename to src/axolotl/utils/callbacks/vessl_.py index 163a76f6a0..5e3be0ac50 100644 --- a/src/axolotl/utils/callbacks/puree_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -1,6 +1,6 @@ """Puree module for trainer callbacks""" import logging -from typing import Dict +from typing import Dict, List import vessl from transformers import TrainerCallback, TrainerControl, TrainerState @@ -8,12 +8,20 @@ LOG = logging.getLogger("axolotl.callbacks") -class VesslLogCheckpointCallback(TrainerCallback): +class VesslLogStepMetricsCallback(TrainerCallback): - def __init__(self, credential_path) -> None: + _metrics: List[str] = [] + + def __init__(self, credential_path: str, metrics: List[str]) -> None: vessl.configure(credentials_file=credential_path) + self._metrics = metrics def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): if state.is_world_process_zero: - vessl.log(logs, state.global_step) + payload = {} + for metric in self._metrics: + if metric in logs: + payload[metric] = logs[metric] + + vessl.log(payload, state.global_step) diff --git a/src/axolotl/utils/puree_.py b/src/axolotl/utils/puree_.py deleted file mode 100644 index d7372cd3bd..0000000000 --- a/src/axolotl/utils/puree_.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Module for puree utilities""" - -import os - -from axolotl.utils.dict import DictDefault - - -def setup_puree_env_vars(cfg: DictDefault): - # Enable VESSL if Run Config is present - credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") - if credential_path: - cfg.use_vessl = True - cfg.vessl_credential_path = credential_path diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py new file mode 100644 index 0000000000..914efd3d30 --- /dev/null +++ b/src/axolotl/utils/vessl_.py @@ -0,0 +1,17 @@ +"""Module for vessl utilities""" + +import os + +from axolotl.utils.dict import DictDefault + + +def setup_vessl_env_vars(cfg: DictDefault): + # VESSL_RUN_INITIAL_CONFIG is a variable that contain path to + # default credential inside a VESSL Run + credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") + if credential_path: + cfg.use_vessl = True + cfg.vessl_credential_path = credential_path + + if not cfg.vessl_metrics: + cfg.vessl_metrics = ["grad_norm", "learning_rate", "loss"] From ecdff973d0902064e70a0f259ce76d7bcd2dce7a Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 26 Mar 2024 15:20:40 +0700 Subject: [PATCH 04/13] revert metrics filter --- src/axolotl/core/trainer_builder.py | 5 +---- src/axolotl/utils/callbacks/vessl_.py | 12 ++---------- src/axolotl/utils/vessl_.py | 3 --- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 871e38a6b8..4c8a98c056 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -840,10 +840,7 @@ def get_callbacks(self) -> List[TrainerCallback]: from axolotl.utils.callbacks.vessl_ import VesslLogStepMetricsCallback callbacks.append( - VesslLogStepMetricsCallback( - self.cfg.vessl_credential_path, - self.cfg.vessl_metrics, - ) + VesslLogStepMetricsCallback(self.cfg.vessl_credential_path) ) return callbacks diff --git a/src/axolotl/utils/callbacks/vessl_.py b/src/axolotl/utils/callbacks/vessl_.py index 5e3be0ac50..01c88aa853 100644 --- a/src/axolotl/utils/callbacks/vessl_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -10,18 +10,10 @@ class VesslLogStepMetricsCallback(TrainerCallback): - _metrics: List[str] = [] - def __init__(self, credential_path: str, metrics: List[str]) -> None: - vessl.configure(credentials_file=credential_path) - self._metrics = metrics + vessl.configure(credentials_file=credential_path) def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): if state.is_world_process_zero: - payload = {} - for metric in self._metrics: - if metric in logs: - payload[metric] = logs[metric] - - vessl.log(payload, state.global_step) + vessl.log(logs, state.global_step) diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py index 914efd3d30..fa44024faf 100644 --- a/src/axolotl/utils/vessl_.py +++ b/src/axolotl/utils/vessl_.py @@ -12,6 +12,3 @@ def setup_vessl_env_vars(cfg: DictDefault): if credential_path: cfg.use_vessl = True cfg.vessl_credential_path = credential_path - - if not cfg.vessl_metrics: - cfg.vessl_metrics = ["grad_norm", "learning_rate", "loss"] From 5b0aa9063abca5f7b9f20ff9a76c95c37df15f3f Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Wed, 27 Mar 2024 13:44:38 +0700 Subject: [PATCH 05/13] update name and module comment --- src/axolotl/core/trainer_builder.py | 4 ++-- src/axolotl/utils/callbacks/vessl_.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 4c8a98c056..414bb8c39c 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -837,10 +837,10 @@ def get_callbacks(self) -> List[TrainerCallback]: ) if self.cfg.use_vessl: - from axolotl.utils.callbacks.vessl_ import VesslLogStepMetricsCallback + from axolotl.utils.callbacks.vessl_ import VesslLogMetricsCallback callbacks.append( - VesslLogStepMetricsCallback(self.cfg.vessl_credential_path) + VesslLogMetricsCallback(self.cfg.vessl_credential_path) ) return callbacks diff --git a/src/axolotl/utils/callbacks/vessl_.py b/src/axolotl/utils/callbacks/vessl_.py index 01c88aa853..f5d051d8f7 100644 --- a/src/axolotl/utils/callbacks/vessl_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -1,4 +1,4 @@ -"""Puree module for trainer callbacks""" +"""Vessl module for trainer callbacks""" import logging from typing import Dict, List @@ -8,7 +8,7 @@ LOG = logging.getLogger("axolotl.callbacks") -class VesslLogStepMetricsCallback(TrainerCallback): +class VesslLogMetricsCallback(TrainerCallback): def __init__(self, credential_path: str, metrics: List[str]) -> None: vessl.configure(credentials_file=credential_path) From b917faaf401f7ab048bf6f1b99dbbc3bd3fd8e79 Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Thu, 28 Mar 2024 15:15:56 +0700 Subject: [PATCH 06/13] remove metrics from constructor --- src/axolotl/utils/callbacks/vessl_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/callbacks/vessl_.py b/src/axolotl/utils/callbacks/vessl_.py index f5d051d8f7..4d436d6161 100644 --- a/src/axolotl/utils/callbacks/vessl_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -10,7 +10,7 @@ class VesslLogMetricsCallback(TrainerCallback): - def __init__(self, credential_path: str, metrics: List[str]) -> None: + def __init__(self, credential_path: str) -> None: vessl.configure(credentials_file=credential_path) def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): From 74511a4784608ccdf92238420163fcabdae7492f Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Fri, 29 Mar 2024 09:26:36 +0700 Subject: [PATCH 07/13] remove use_vessl variable --- src/axolotl/core/trainer_builder.py | 2 +- src/axolotl/utils/vessl_.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 836a899303..172e795eed 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -854,7 +854,7 @@ def get_callbacks(self) -> List[TrainerCallback]: SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path) ) - if self.cfg.use_vessl: + if self.cfg.vessl_credential_path: from axolotl.utils.callbacks.vessl_ import VesslLogMetricsCallback callbacks.append( diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py index fa44024faf..ae50de3d1d 100644 --- a/src/axolotl/utils/vessl_.py +++ b/src/axolotl/utils/vessl_.py @@ -10,5 +10,4 @@ def setup_vessl_env_vars(cfg: DictDefault): # default credential inside a VESSL Run credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") if credential_path: - cfg.use_vessl = True cfg.vessl_credential_path = credential_path From 22d5f973461c781d99945c2716db6ddcbc4bba18 Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Fri, 29 Mar 2024 09:34:09 +0700 Subject: [PATCH 08/13] fix pre-commit --- src/axolotl/core/trainer_builder.py | 4 +--- src/axolotl/utils/callbacks/vessl_.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 172e795eed..25541ebe33 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -857,9 +857,7 @@ def get_callbacks(self) -> List[TrainerCallback]: if self.cfg.vessl_credential_path: from axolotl.utils.callbacks.vessl_ import VesslLogMetricsCallback - callbacks.append( - VesslLogMetricsCallback(self.cfg.vessl_credential_path) - ) + callbacks.append(VesslLogMetricsCallback(self.cfg.vessl_credential_path)) return callbacks diff --git a/src/axolotl/utils/callbacks/vessl_.py b/src/axolotl/utils/callbacks/vessl_.py index 4d436d6161..e68da3c68c 100644 --- a/src/axolotl/utils/callbacks/vessl_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -1,6 +1,6 @@ """Vessl module for trainer callbacks""" import logging -from typing import Dict, List +from typing import Dict import vessl from transformers import TrainerCallback, TrainerControl, TrainerState @@ -8,12 +8,20 @@ LOG = logging.getLogger("axolotl.callbacks") + class VesslLogMetricsCallback(TrainerCallback): + """Callback to send training metrics to VESSL AI""" def __init__(self, credential_path: str) -> None: vessl.configure(credentials_file=credential_path) - def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, float] = None, **kwargs): + def on_log( + self, + args: TrainingArguments, # pylint: disable=unused-argument + state: TrainerState, + control: TrainerControl, # pylint: disable=unused-argument + logs: Dict[str, float], + **kwargs # pylint: disable=unused-argument + ): if state.is_world_process_zero: vessl.log(logs, state.global_step) - From f4437b025f1a63e45a56f8c2f786b306bd28aacf Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Fri, 29 Mar 2024 09:58:11 +0700 Subject: [PATCH 09/13] ignore too-many-lines --- .pylintrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index ed973d2859..5387bff609 100644 --- a/.pylintrc +++ b/.pylintrc @@ -9,6 +9,6 @@ generated-members=numpy.*, torch.* [pylint.messages_control] -disable=missing-function-docstring, line-too-long, import-error, +disable=missing-function-docstring, line-too-long, import-error, too-many-lines, too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods, too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation, From b89a91abd1df64c93df46b6af0244f5c1d6027ae Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Fri, 29 Mar 2024 13:27:58 +0700 Subject: [PATCH 10/13] add vessl config class on AxolotlInputConfig --- src/axolotl/utils/config/models/input/v0_4_1/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index cce0cbc76a..3c38079075 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -395,6 +395,12 @@ def check_wandb_run(cls, data): return data +class VesslConfig(BaseModel): + """Vessl AI configuration subset""" + + vessl_credential_path: Optional[str] = None + + # pylint: disable=too-many-public-methods,too-many-ancestors class AxolotlInputConfig( ModelInputConfig, @@ -404,6 +410,7 @@ class AxolotlInputConfig( HyperparametersConfig, WandbConfig, MLFlowConfig, + VesslConfig, RemappedParameters, DeprecatedParameters, BaseModel, From 16d9813a47d52c1fcab43f57433a1f40661bfa73 Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 2 Apr 2024 12:32:21 +0700 Subject: [PATCH 11/13] apply review feedback --- src/axolotl/utils/vessl_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py index ae50de3d1d..346a6d2281 100644 --- a/src/axolotl/utils/vessl_.py +++ b/src/axolotl/utils/vessl_.py @@ -9,5 +9,5 @@ def setup_vessl_env_vars(cfg: DictDefault): # VESSL_RUN_INITIAL_CONFIG is a variable that contain path to # default credential inside a VESSL Run credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") - if credential_path: + if credential_path and not cfg.vessl_credential_path: cfg.vessl_credential_path = credential_path From 1f17adbb364bd0ebae591da975871a6788ab43da Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Tue, 2 Apr 2024 12:42:24 +0700 Subject: [PATCH 12/13] apply review feedback --- src/axolotl/utils/callbacks/vessl_.py | 2 ++ src/axolotl/utils/vessl_.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/axolotl/utils/callbacks/vessl_.py b/src/axolotl/utils/callbacks/vessl_.py index e68da3c68c..054f94cc4c 100644 --- a/src/axolotl/utils/callbacks/vessl_.py +++ b/src/axolotl/utils/callbacks/vessl_.py @@ -23,5 +23,7 @@ def on_log( logs: Dict[str, float], **kwargs # pylint: disable=unused-argument ): + # is_world_process_zero: Whether or not this process is the global main process (when training in a + # distributed fashion on several machines, this is only going to be `True` for one process). if state.is_world_process_zero: vessl.log(logs, state.global_step) diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py index 346a6d2281..ff4af9de9c 100644 --- a/src/axolotl/utils/vessl_.py +++ b/src/axolotl/utils/vessl_.py @@ -6,8 +6,9 @@ def setup_vessl_env_vars(cfg: DictDefault): - # VESSL_RUN_INITIAL_CONFIG is a variable that contain path to - # default credential inside a VESSL Run + # VESSL_RUN_INITIAL_CONFIG is a variable that contain path to default credential inside a VESSL Run. + # Currently there is no docs regarding this variable, but it exists inside the container. + # Ref: https://screen.yanolja.in/lrTGow4Pr8eXhAai.png credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") if credential_path and not cfg.vessl_credential_path: cfg.vessl_credential_path = credential_path From 213bc6a03ccbf050dc333b2459051b92aa0e245b Mon Sep 17 00:00:00 2001 From: "Rifqi Alfi/IAB(YA)" Date: Wed, 17 Apr 2024 09:42:58 +0700 Subject: [PATCH 13/13] apply review feedback --- src/axolotl/utils/vessl_.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/vessl_.py b/src/axolotl/utils/vessl_.py index ff4af9de9c..11169f3ba5 100644 --- a/src/axolotl/utils/vessl_.py +++ b/src/axolotl/utils/vessl_.py @@ -6,9 +6,12 @@ def setup_vessl_env_vars(cfg: DictDefault): + if cfg.vessl_credential_path: + return + # VESSL_RUN_INITIAL_CONFIG is a variable that contain path to default credential inside a VESSL Run. # Currently there is no docs regarding this variable, but it exists inside the container. # Ref: https://screen.yanolja.in/lrTGow4Pr8eXhAai.png credential_path = os.environ.get("VESSL_RUN_INITIAL_CONFIG") - if credential_path and not cfg.vessl_credential_path: + if credential_path: cfg.vessl_credential_path = credential_path