From 8ac705b5a2ea08787f670656a1f0359edee9e1e7 Mon Sep 17 00:00:00 2001 From: xhedit <71542719+xhedit@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:40:38 -0400 Subject: [PATCH] handle site packages better --- quantkit/cli.py | 2 +- quantkit/quantkit.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/quantkit/cli.py b/quantkit/cli.py index 3af9cf2..27e5b7e 100644 --- a/quantkit/cli.py +++ b/quantkit/cli.py @@ -36,7 +36,7 @@ def safetensor(model, delete_original): @click.option('--output', '-out', help='output name') @click.option('--keep/--delete', help='keep intermediate conversion GGUF') @click.option('--f32/--f16', default=False, help='intermediate conversion step uses f32 (requires much more disk space)') -@click.option('--built-in-imatrix/--disable-built-in-imatrix', default=True, help='use built in imatrix') +@click.option('--built-in-imatrix/--disable-built-in-imatrix', default=False, help='use built in imatrix') @click.option('--imatrix', help='Specify pre-generated imatrix') @click.option('--cal-file', help='Specify calibration dataset') @click.option('--n-gpu-layers', "-ngl", default=0, help='how many layers to offload to GPU for imatrix') diff --git a/quantkit/quantkit.py b/quantkit/quantkit.py index 2638cfb..9a5e281 100644 --- a/quantkit/quantkit.py +++ b/quantkit/quantkit.py @@ -38,9 +38,10 @@ def run_safetensor(model, delete_original): convert_multi(model_dir, del_pytorch_model=delete_original) def run_gguf(model, quant_type, output, keep, f32, built_in_imatrix, imatrix, cal_file, n_gpu_layers): - if not Path(cal_file).is_file(): - print(f"quantkit: could not load {cal_file}") - return + if cal_file is not None: + if not Path(cal_file).is_file(): + print(f"quantkit: could not load {cal_file}") + return path = Path(model) if path.is_dir(): @@ -98,6 +99,13 @@ def run_imatrix(cal_file, n_gpu_layers): site_dir = site.getusersitepackages() imatrix = Path(site_dir) / "bin" / "imatrix" + + if not imatrix.is_file(): + for d in site.getsitepackages(): + if(Path(d) / "bin" / "imatrix").is_file(): + site_dir = d + imatrix = d / "bin" / "imatrix" + print(f"Attempting to execute {imatrix}") if cal_file is None: @@ -125,6 +133,13 @@ def quantize(gguf_file, output, quant_type, imatrix): site_dir = site.getusersitepackages() quantize = Path(site_dir) / "bin" / "quantize" + + if not quantize.is_file(): + for d in site.getsitepackages(): + if(Path(d) / "bin" / "quantize").is_file(): + site_dir = d + quantize = d / "bin" / "quantize" + print(f"Attempting to execute {quantize}") if imatrix is None: