feat: supprot tq1_0 tq2_0

Signed-off-by: thxCode <[email protected]>
gpustack · Sep 11, 2024 · a4de86d · a4de86d
1 parent a364472
commit a4de86d
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -471,8 +471,8 @@ CONVERT    --type=F16 --class=lora --base=Qwen2-1.5B Qwen2-1.5B-MAC-lora Qwen2-1
 - `CONVERT [--class=<model|lora>] <src> <dest>`, specify the class for the model, default is `model`.
     + `CONVERT --class=lora --base=<path> <src> <dest>`, convert a PEFT LoRA adapter to GGUF file, must provide the
       `base` model.
-- `CONVERT [--type=<type>] <src> <dest>`, specify the output type for `<dest>`, select from `F32`, `F16`, `BF16` and
-  `Q8_0`, default is `F16`.
+- `CONVERT [--type=<type>] <src> <dest>`, specify the output type for `<dest>`, select from `F32`, `F16`, `BF16`,
+  `Q8_0`, `TQ1_0`, and `TQ2_0`, default is `F16`.
 
 #### FROM
 

diff --git a/buildkit/frontend/ggufpackerfile/ggufpackerfile2llb/convert.go b/buildkit/frontend/ggufpackerfile/ggufpackerfile2llb/convert.go
@@ -1461,12 +1461,14 @@ func dispatchConvert(d *dispatchState, c *instructions.ConvertCommand, opt *disp
 		"lora",
 		"adapter",
 	}
-	// Extract from https://github.com/ggerganov/llama.cpp/blob/01245f5b1629075543bc4478418c7d72a0b4b3c7/convert_hf_to_gguf.py#L3553-L3556.
+	// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/convert_hf_to_gguf.py#L4032-L4035.
 	types := []string{
 		"F32",
 		"F16",
 		"BF16",
 		"Q8_0",
+		"TQ1_0",
+		"TQ2_0",
 	}
 
 	commitMessage := bytes.NewBufferString("CONVERT")
@@ -1571,7 +1573,7 @@ func dispatchLabel(d *dispatchState, c *instructions.LabelCommand, lint *linter.
 }
 
 func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *dispatchOpt, sources []llb.State) (err error) {
-	// Extract from https://github.com/ggerganov/llama.cpp/blob/c887d8b01726b11ea03dbcaa9d44fa74422d0076/examples/quantize/quantize.cpp#L19-L51.
+	// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/examples/quantize/quantize.cpp#L19-L53.
 	types := []string{
 		"Q4_0",
 		"Q4_1",
@@ -1583,6 +1585,8 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
 		"IQ2_M",
 		"IQ1_S",
 		"IQ1_M",
+		"TQ1_0",
+		"TQ2_0",
 		"Q2_K",
 		"Q2_K_S",
 		"IQ3_XXS",
@@ -1607,7 +1611,7 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
 		"Q4_0_4_8",
 		"Q4_0_8_8",
 	}
-	// Extract from https://github.com/ggerganov/llama.cpp/blob/c887d8b01726b11ea03dbcaa9d44fa74422d0076/ggml/src/ggml.c#L579-L974.
+	// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/src/ggml.c#L663-L1082.
 	ggmlTypes := map[string]string{
 		"I8":       "i8",
 		"I16":      "i16",
@@ -1641,6 +1645,8 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
 		"Q4_0_4_4": "q4_0_4x4",
 		"Q4_0_4_8": "q4_0_4x8",
 		"Q4_0_8_8": "q4_0_8x8",
+		"TQ1_0":    "tq1_0",
+		"TQ2_0":    "tq2_0",
 	}
 
 	commitMessage := bytes.NewBufferString("QUANTIZE")

diff --git a/cmd/gguf-packer/go.mod b/cmd/gguf-packer/go.mod
@@ -14,7 +14,7 @@ require (
 	github.com/dustin/go-humanize v1.0.1
 	github.com/google/go-containerregistry v0.20.2
 	github.com/gpustack/gguf-packer-go v0.0.0-00010101000000-000000000000
-	github.com/gpustack/gguf-parser-go v0.11.0
+	github.com/gpustack/gguf-parser-go v0.11.1
 	github.com/jedib0t/go-pretty/v6 v6.5.9
 	github.com/moby/buildkit v0.15.2
 	github.com/opencontainers/go-digest v1.0.0
@@ -132,7 +132,7 @@ require (
 	golang.org/x/net v0.28.0 // indirect
 	golang.org/x/oauth2 v0.22.0 // indirect
 	golang.org/x/sync v0.8.0 // indirect
-	golang.org/x/sys v0.24.0 // indirect
+	golang.org/x/sys v0.25.0 // indirect
 	golang.org/x/term v0.23.0 // indirect
 	golang.org/x/text v0.17.0 // indirect
 	golang.org/x/tools v0.24.0 // indirect

diff --git a/cmd/gguf-packer/go.sum b/cmd/gguf-packer/go.sum
diff --git a/go.mod b/go.mod
@@ -6,7 +6,7 @@ require (
 	github.com/containerd/platforms v0.2.1
 	github.com/distribution/reference v0.6.0
 	github.com/docker/go-units v0.5.0
-	github.com/gpustack/gguf-parser-go v0.11.0
+	github.com/gpustack/gguf-parser-go v0.11.1
 	github.com/mitchellh/hashstructure/v2 v2.0.2
 	github.com/moby/buildkit v0.15.2
 	github.com/moby/patternmatcher v0.6.0
@@ -16,7 +16,7 @@ require (
 	github.com/tonistiigi/go-csvvalue v0.0.0-20240814133006-030d3b2625d0
 	golang.org/x/exp v0.0.0-20240823005443-9b4947da3948
 	golang.org/x/sync v0.8.0
-	golang.org/x/sys v0.24.0
+	golang.org/x/sys v0.25.0
 )
 
 require (

diff --git a/go.sum b/go.sum