From 59231d348d9c36bc1a014785faac7c8c2b8dec7b Mon Sep 17 00:00:00 2001
From: Daniel King <43149077+dakinggg@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:49:35 -0700
Subject: [PATCH] Avoid HF race condition (#1338)

---
 llmfoundry/models/hf/hf_causal_lm.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py
index 5f3a53ed18..6ab8249bac 100644
--- a/llmfoundry/models/hf/hf_causal_lm.py
+++ b/llmfoundry/models/hf/hf_causal_lm.py
@@ -291,20 +291,26 @@ def _autoset_attn_implementation_monkeypatch(
         if dist.get_local_rank() != 0 and init_device == 'mixed':
             pretrained = False
 
-        # If the HuggingFace model is coming from a local folder, Hugging Face copies the modules into the
+        # Hugging Face copies the modules into the
         # transformers modules cache. On particular systems, this operation seems to cause contention between
         # the different processes. To avoid this contention, we first create the model (on meta device) on local rank
         # zero. This will set up the transformers model cache and avoid the future contention.
-        if dist.get_local_rank(
-        ) == 0 and os.path.isdir(pretrained_model_name_or_path):
-            with init_empty_weights(include_buffers=False):
-                with warnings.catch_warnings():
-                    warnings.simplefilter('ignore', UserWarning)
-                    AutoModelForCausalLM.from_pretrained(
-                        pretrained_model_name_or_path,
+        if dist.get_local_rank() == 0:
+            if os.path.isdir(pretrained_model_name_or_path):
+                with init_empty_weights(include_buffers=False):
+                    with warnings.catch_warnings():
+                        warnings.simplefilter('ignore', UserWarning)
+                        AutoModelForCausalLM.from_pretrained(
+                            pretrained_model_name_or_path,
+                            trust_remote_code=trust_remote_code,
+                            use_auth_token=use_auth_token,
+                            config=config,
+                        )
+            else:
+                with init_empty_weights(include_buffers=False):
+                    AutoModelForCausalLM.from_config(
+                        config,
                         trust_remote_code=trust_remote_code,
-                        use_auth_token=use_auth_token,
-                        config=config,
                     )
 
         dist.barrier()