Skip to content

Commit

Permalink
switch to exllamav2 from exl2conv
Browse files Browse the repository at this point in the history
  • Loading branch information
xhedit authored Jun 25, 2024
1 parent 169b6f3 commit e54cdf9
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions quantkit/quantkit.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# quantkit can download a hf model, convert a model to safetensors, and quantize
# supports: AWQ, GPTQ, and EXL2
# supports: GGUF, APT, GPTQ, EXL2, and HQQ

import gc
import os
import json
import site
import time
import datetime

from huggingface_hub import snapshot_download

from pathlib import Path
from quantkit.safetensor import convert_multi
from quantkit.convert import do_gguf_conversion
Expand All @@ -19,7 +22,6 @@ def run_download(model, output, hf_cache, force_download, resume_download, safet
else:
path = Path(output)

from huggingface_hub import snapshot_download
snapshot_download(model, revision=branch, local_dir=path, local_dir_use_symlinks=hf_cache, force_download=force_download, resume_download=resume_download, ignore_patterns=['pytorch_model*', 'consolidated*.pt'] if safetensors_only else None)

def run_safetensor(model, delete_original):
Expand All @@ -34,7 +36,6 @@ def run_safetensor(model, delete_original):
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=False, resume_download=True)
convert_multi(model_dir, del_pytorch_model=delete_original)

Expand All @@ -56,11 +57,9 @@ def run_gguf(model, quant_type, output, keep, f32, built_in_imatrix, imatrix, ca
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)

do_step_two = False
import json
with open(path / "config.json") as f:
config = json.load(f)
if 'torch_dtype' in config:
Expand Down Expand Up @@ -187,7 +186,6 @@ def run_awq(model, output, hf_cache, bits, group_size, zero_point, gemm):
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)

import torch
Expand Down Expand Up @@ -236,7 +234,6 @@ def run_gptq(model, output, hf_cache, bits, group_size, damp, sym, true_seq, act
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)

import torch
Expand Down Expand Up @@ -293,15 +290,13 @@ def run_exl2(model, output, hf_cache, bits, head_bits, rope_alpha, rope_scale, o
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)

if not Path(path / "model.safetensors").is_file() and not Path(path / "model.safetensors.index.json").is_file():
convert_multi(model_dir, del_pytorch_model=True)

import torch
from exl2conv.conversion.qparams import qparams_headoptions
from exl2conv.conversion.convert import convert_hf_to_exl2
from quantkit.convert_exl2 import convert_hf_to_exl2

if output is None:
compile_full = model_dir + "-exl2"
Expand Down Expand Up @@ -336,6 +331,8 @@ def run_exl2(model, output, hf_cache, bits, head_bits, rope_alpha, rope_scale, o
"compile_full": compile_full,
"no_resume": no_resume,
"output_measurement": output_measurement,
"status_output": True,
"hidden_state_offload_layers": 0,
}

if rope_alpha is not None:
Expand Down Expand Up @@ -378,7 +375,6 @@ def run_hqq(model, output, hf_cache, bits, group_size, zero_point, scale, offloa
model_dir = model.split("/")[1]
path = Path(model_dir)

from huggingface_hub import snapshot_download
snapshot_download(model, local_dir=path, local_dir_use_symlinks=True, resume_download=True)

import torch
Expand Down

0 comments on commit e54cdf9

Please sign in to comment.