mosaicml · irenedea · Sep 20, 2023 · Sep 18, 2023 · Sep 19, 2023 · Sep 20, 2023
@@ -10,7 +10,7 @@ models:
   model:
     name: hf_causal_lm
     pretrained_model_name_or_path: ${model_name_or_path}
-    init_device: cpu
+    init_device: mixed
     pretrained: true
     load_in_8bit: true
   tokenizer:

@@ -12,7 +12,7 @@ models:
   model:
     name: hf_causal_lm
     pretrained_model_name_or_path: ${model_name_or_path}
-    init_device: cpu
+    init_device: mixed
     pretrained: true
   tokenizer:
     name: ${model_name_or_path}

@@ -5,7 +5,9 @@ precision: amp_fp16
 # If you are using one model, put it here:
 model_name_or_path: EleutherAI/gpt-neo-125m
 # If you are using a seperated lora weight, put it here:
-lora_id_or_path: nathan0/lora-gpt-neo-125m-alpaca
+# lora weights must be compatible with the specified model
+lora_id_or_path: edbeeching/gpt-neo-125M-imdb-lora # Example lora weights for gpt-neo-125m
+
 # otherwise, write a block for each model you want to test in the `models` section
 
 models:
@@ -14,7 +16,7 @@ models:
   model:
     name: hf_causal_lm
     pretrained_model_name_or_path: ${model_name_or_path}
-    init_device: cpu
+    init_device: mixed
     pretrained: true
     pretrained_lora_id_or_path: ${lora_id_or_path}
   tokenizer:

@@ -3,7 +3,6 @@ tokenizer_name: EleutherAI/gpt-neox-20b
 seed: 1
 precision: amp_fp16
 
-
 models:
 -
   model_name: mpt_test
@@ -14,7 +13,8 @@ models:
       model_max_length: ${max_seq_len}
   model:
     name: mpt_causal_lm
-    init_device: meta
+    init_device: mixed
+    # Set the below model parameters to match the checkpoint specified with load_path
     d_model: 768
     n_heads: 12
     n_layers: 12