Skip to content

Commit

Permalink
add log statements
Browse files Browse the repository at this point in the history
  • Loading branch information
nancyhung committed Oct 22, 2024
1 parent bc29278 commit 99589c7
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions llmfoundry/callbacks/hf_checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ def _log_model_multiprocess(
- await_creation_for: int: time to wait for model creation
- registered_model_name: Optional
"""
print("----------------- REACHED MLFLOW LOG MODEL -----------------")
# Setup logging for child process. This ensures that any logs from composer are surfaced.
if composer_logging_level > 0:
# If logging_level is 0, then the composer logger was unset.
Expand All @@ -143,24 +142,26 @@ def _log_model_multiprocess(
)
logging.getLogger('composer').setLevel(composer_logging_level)

log.info("----------------- REACHED MLFLOW LOG MODEL -----------------")
# monkey patch to prevent duplicate tokenizer upload
import mlflow
original_save_model = mlflow.transformers.save_model
def save_model_patch(*args: Any, **kwargs: Any):
original_save_model(*args, **kwargs)
log.debug(f"List of root path: {os.listdir(kwargs['path'])}")
log.info(f"List of root path: {os.listdir(kwargs['path'])}")
components_path = os.path.join(kwargs['path'], 'components')
if os.path.exists(components_path):
log.debug(f"List of components path: {components_path}: {os.listdir(components_path)}")
log.info(f"List of components path: {components_path}: {os.listdir(components_path)}")
tokenizer_path = os.path.join(kwargs['path'], 'components', 'tokenizer')
tokenizer_files = []
if os.path.exists(tokenizer_path):
tokenizer_files = os.listdir(os.path.join(kwargs['path'], 'components', 'tokenizer'))
log.debug(f"Tokenizer files: {tokenizer_files}")
log.info(f"Tokenizer files: {tokenizer_files}")
try:
for tokenizer_file_name in tokenizer_files:
dupe_file = os.path.isfile(os.path.join(kwargs['path'], 'model', tokenizer_file_name))
if dupe_file:
log.info(f"Removing duplicate tokenizer file: {tokenizer_file_name}")
os.remove(os.path.join(kwargs['path'], 'model', tokenizer_file_name))
except Exception as e:
log.error(f"Exception when removing duplicate tokenizer files in the model directory", e)
Expand Down

0 comments on commit 99589c7

Please sign in to comment.