Skip to content

Commit

Permalink
fallback between two convert scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
hodlen committed Dec 20, 2023
1 parent cbb4c4c commit 2473dec
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import re
import signal
import struct
import subprocess
import sys
import time
import zipfile
Expand Down Expand Up @@ -179,6 +180,7 @@ class Params:
rope_finetuned: bool | None = None

ftype: GGMLFileType | None = None
arch: str | None = None

# path to the directory containing the model files
path_model: Path | None = None
Expand Down Expand Up @@ -262,6 +264,7 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
f_rope_scale = f_rope_scale,
n_orig_ctx = n_orig_ctx,
rope_finetuned = rope_finetuned,
arch = config.get("model_type"),
)

# LLaMA v2 70B params.json
Expand Down Expand Up @@ -1192,17 +1195,25 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)", default="f16")
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin, *.safetensors)")
parser.add_argument("mlp_model", type=Path, help="MLP model for sparse attention")
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
parser.add_argument("mlp_model", type=Path, help="MLP model for sparse attention")
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")

args = parser.parse_args(args_in)
base_model_params = Params.load(lazy_load_file(args.model))
if base_model_params.arch != "llama":
# invoke another script to convert other models
print(f"Model architecture {base_model_params.arch} is not supported by this script. Trying with `convert-hf-to-powerinfer-gguf.py`...")
script_path = Path(__file__).resolve().parent / "convert-hf-to-powerinfer-gguf.py"
subprocess.run(["python3", str(script_path.absolute())] + sys.argv[1:])
return

if args.dump_single:
model_plus = lazy_load_file(args.model)
do_dump_model(model_plus)
Expand Down

0 comments on commit 2473dec

Please sign in to comment.