Skip to content

Commit

Permalink
Add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
irenedea committed Mar 30, 2024
1 parent 8594a80 commit 62f7930
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion llmfoundry/callbacks/hf_checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,18 @@ def _register_model_with_run_id_multiprocess(mlflow_logger: MLFlowLogger,
logging_level: int, model_uri: str,
name: str,
await_creation_for: int):
"""Function for calling MLFlowLogger.register_model_with_run_id from a.
spawned child process.
"""
# Setup logging for child process. This ensures that any logs from composer are surfaced.
logging.basicConfig(
format=
f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s'
)
logging.getLogger('composer').setLevel(logging_level)

# Register model.
mlflow_logger.register_model_with_run_id(
model_uri=model_uri, name=name, await_creation_for=await_creation_for)

Expand Down Expand Up @@ -411,7 +418,7 @@ def _save_checkpoint(self, state: State, logger: Logger):
os.path.join(local_save_path, license_filename),
)

# Register the model to mlflow in a child process.
# Spawn a new process to register the model.
process = SpawnProcess(
target=_register_model_with_run_id_multiprocess,
kwargs={
Expand Down

0 comments on commit 62f7930

Please sign in to comment.