diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index a186f67f14..5aa81acaa5 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -517,6 +517,7 @@ def tensor_hook( new_model_instance.generation_config.update( **original_model.generation_config.to_dict(), ) + new_model_instance.name_or_path = original_model.name_or_path # Then load the state dict in with "assign" so that the state dict # is loaded properly even though the model is initially on meta device.