flash_attention + sample packing for stablelm 3b (#671)

* stablelm epoch fa patch * is causal for fa * working stablelm fa w packing * chore: pre-commit linting
axolotl-ai-cloud · Oct 5, 2023 · b838a67 · b838a67
1 parent 5b4e92b
commit b838a67
Show file tree

Hide file tree

Showing 3 changed files with 429 additions and 1 deletion.
diff --git a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
@@ -7,6 +7,7 @@
 from typing import Optional, Tuple
 
 import torch
+from accelerate import init_empty_weights
 from flash_attn.flash_attn_interface import flash_attn_func
 from transformers import AutoConfig, AutoModelForCausalLM
 
@@ -17,7 +18,8 @@ def replace_btlm_attn_with_flash_attn(model_name="cerebras/btlm-3b-8k-base"):
     # this is a wonky hack to get the remotely loaded module
     model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
     # we need to load the model here in order for modeling_btlm to be available
-    AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+    with init_empty_weights():
+        AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
     module_name = model_config.__class__.__module__.replace(
         ".configuration_btlm", ".modeling_btlm"
     )