switch to exllamav2 from exl2conv

xhedit · Jun 25, 2024 · e54cdf9 · e54cdf9
1 parent 169b6f3
commit e54cdf9
Showing 1 changed file with 7 additions and 11 deletions.
diff --git a/quantkit/quantkit.py b/quantkit/quantkit.py
@@ -1,12 +1,15 @@
 # quantkit can download a hf model, convert a model to safetensors, and quantize
-# supports: AWQ, GPTQ, and EXL2
+# supports: GGUF, APT, GPTQ, EXL2, and HQQ
 
 import gc
 import os
+import json
 import site
 import time
 import datetime
 
+from huggingface_hub import snapshot_download
+
 from pathlib import Path
 from quantkit.safetensor import convert_multi
 from quantkit.convert import do_gguf_conversion
@@ -19,7 +22,6 @@ def run_download(model, output, hf_cache, force_download, resume_download, safet
     else:
         path = Path(output)
 
-    from huggingface_hub import snapshot_download
     snapshot_download(model, revision=branch, local_dir=path, local_dir_use_symlinks=hf_cache, force_download=force_download, resume_download=resume_download, ignore_patterns=['pytorch_model*', 'consolidated*.pt'] if safetensors_only else None)
 
 def run_safetensor(model, delete_original):
@@ -34,7 +36,6 @@ def run_safetensor(model, delete_original):
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=False, resume_download=True)
         convert_multi(model_dir, del_pytorch_model=delete_original)
 
@@ -56,11 +57,9 @@ def run_gguf(model, quant_type, output, keep, f32, built_in_imatrix, imatrix, ca
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)
 
     do_step_two = False
-    import json
     with open(path / "config.json") as f:
         config = json.load(f)
         if 'torch_dtype' in config:
@@ -187,7 +186,6 @@ def run_awq(model, output, hf_cache, bits, group_size, zero_point, gemm):
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)
 
     import torch
@@ -236,7 +234,6 @@ def run_gptq(model, output, hf_cache, bits, group_size, damp, sym, true_seq, act
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)
 
     import torch
@@ -293,15 +290,13 @@ def run_exl2(model, output, hf_cache, bits, head_bits, rope_alpha, rope_scale, o
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)
 
         if not Path(path / "model.safetensors").is_file() and not Path(path / "model.safetensors.index.json").is_file():
             convert_multi(model_dir, del_pytorch_model=True)
 
     import torch
-    from exl2conv.conversion.qparams import qparams_headoptions
-    from exl2conv.conversion.convert import convert_hf_to_exl2
+    from quantkit.convert_exl2 import convert_hf_to_exl2
 
     if output is None:
         compile_full = model_dir + "-exl2"
@@ -336,6 +331,8 @@ def run_exl2(model, output, hf_cache, bits, head_bits, rope_alpha, rope_scale, o
         "compile_full": compile_full,
         "no_resume": no_resume,
         "output_measurement": output_measurement,
+        "status_output": True,
+        "hidden_state_offload_layers": 0,
     }
 
     if rope_alpha is not None:
@@ -378,7 +375,6 @@ def run_hqq(model, output, hf_cache, bits, group_size, zero_point, scale, offloa
         model_dir = model.split("/")[1]
         path = Path(model_dir)
 
-        from huggingface_hub import snapshot_download
         snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)
 
     import torch