fallback between two convert scripts

SJTU-IPADS · Dec 20, 2023 · 2473dec · 2473dec
1 parent cbb4c4c
commit 2473dec
Showing 1 changed file with 14 additions and 3 deletions.
diff --git a/convert.py b/convert.py
@@ -15,6 +15,7 @@
 import re
 import signal
 import struct
+import subprocess
 import sys
 import time
 import zipfile
@@ -179,6 +180,7 @@ class Params:
     rope_finetuned: bool | None = None
 
     ftype: GGMLFileType | None = None
+    arch: str | None = None
 
     # path to the directory containing the model files
     path_model: Path | None = None
@@ -262,6 +264,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
             f_rope_scale      = f_rope_scale,
             n_orig_ctx        = n_orig_ctx,
             rope_finetuned    = rope_finetuned,
+            arch        = config.get("model_type"),
         )
 
     # LLaMA v2 70B params.json
@@ -1192,17 +1195,25 @@ def main(args_in: list[str] | None = None) -> None:
     parser.add_argument("--dump",        action="store_true",    help="don't convert, just show what's in the model")
     parser.add_argument("--dump-single", action="store_true",    help="don't convert, just show what's in a single model file")
     parser.add_argument("--vocab-only",  action="store_true",    help="extract only the vocab")
-    parser.add_argument("--outtype",     choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
+    parser.add_argument("--outtype",     choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)", default="f16")
     parser.add_argument("--vocab-dir",   type=Path,              help="directory containing tokenizer.model, if separate from model file")
     parser.add_argument("--outfile",     type=Path,              help="path to write to; default: based on input")
     parser.add_argument("model",         type=Path,              help="directory containing model file, or model file itself (*.pth, *.pt, *.bin, *.safetensors)")
-    parser.add_argument("mlp_model",     type=Path,              help="MLP model for sparse attention")
-    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
     parser.add_argument("--ctx",         type=int,               help="model training context (default: based on input)")
     parser.add_argument("--concurrency", type=int,               help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
     parser.add_argument("--bigendian",   action="store_true",    help="model is executed on big endian machine")
+    parser.add_argument("mlp_model",     type=Path,              help="MLP model for sparse attention")
+    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
 
     args = parser.parse_args(args_in)
+    base_model_params = Params.load(lazy_load_file(args.model))
+    if base_model_params.arch != "llama":
+        # invoke another script to convert other models
+        print(f"Model architecture {base_model_params.arch} is not supported by this script. Trying with `convert-hf-to-powerinfer-gguf.py`...")
+        script_path = Path(__file__).resolve().parent / "convert-hf-to-powerinfer-gguf.py"
+        subprocess.run(["python3", str(script_path.absolute())] + sys.argv[1:])
+        return
+
     if args.dump_single:
         model_plus = lazy_load_file(args.model)
         do_dump_model(model_plus)