Skip to content

Commit

Permalink
feat: supprot tq1_0 tq2_0
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode committed Sep 11, 2024
1 parent a364472 commit a4de86d
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 17 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ CONVERT --type=F16 --class=lora --base=Qwen2-1.5B Qwen2-1.5B-MAC-lora Qwen2-1
- `CONVERT [--class=<model|lora>] <src> <dest>`, specify the class for the model, default is `model`.
+ `CONVERT --class=lora --base=<path> <src> <dest>`, convert a PEFT LoRA adapter to GGUF file, must provide the
`base` model.
- `CONVERT [--type=<type>] <src> <dest>`, specify the output type for `<dest>`, select from `F32`, `F16`, `BF16` and
`Q8_0`, default is `F16`.
- `CONVERT [--type=<type>] <src> <dest>`, specify the output type for `<dest>`, select from `F32`, `F16`, `BF16`,
`Q8_0`, `TQ1_0`, and `TQ2_0`, default is `F16`.

#### FROM

Expand Down
12 changes: 9 additions & 3 deletions buildkit/frontend/ggufpackerfile/ggufpackerfile2llb/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -1461,12 +1461,14 @@ func dispatchConvert(d *dispatchState, c *instructions.ConvertCommand, opt *disp
"lora",
"adapter",
}
// Extract from https://github.com/ggerganov/llama.cpp/blob/01245f5b1629075543bc4478418c7d72a0b4b3c7/convert_hf_to_gguf.py#L3553-L3556.
// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/convert_hf_to_gguf.py#L4032-L4035.
types := []string{
"F32",
"F16",
"BF16",
"Q8_0",
"TQ1_0",
"TQ2_0",
}

commitMessage := bytes.NewBufferString("CONVERT")
Expand Down Expand Up @@ -1571,7 +1573,7 @@ func dispatchLabel(d *dispatchState, c *instructions.LabelCommand, lint *linter.
}

func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *dispatchOpt, sources []llb.State) (err error) {
// Extract from https://github.com/ggerganov/llama.cpp/blob/c887d8b01726b11ea03dbcaa9d44fa74422d0076/examples/quantize/quantize.cpp#L19-L51.
// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/examples/quantize/quantize.cpp#L19-L53.
types := []string{
"Q4_0",
"Q4_1",
Expand All @@ -1583,6 +1585,8 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
"IQ2_M",
"IQ1_S",
"IQ1_M",
"TQ1_0",
"TQ2_0",
"Q2_K",
"Q2_K_S",
"IQ3_XXS",
Expand All @@ -1607,7 +1611,7 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
"Q4_0_4_8",
"Q4_0_8_8",
}
// Extract from https://github.com/ggerganov/llama.cpp/blob/c887d8b01726b11ea03dbcaa9d44fa74422d0076/ggml/src/ggml.c#L579-L974.
// Extract from https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/src/ggml.c#L663-L1082.
ggmlTypes := map[string]string{
"I8": "i8",
"I16": "i16",
Expand Down Expand Up @@ -1641,6 +1645,8 @@ func dispatchQuantize(d *dispatchState, c *instructions.QuantizeCommand, opt *di
"Q4_0_4_4": "q4_0_4x4",
"Q4_0_4_8": "q4_0_4x8",
"Q4_0_8_8": "q4_0_8x8",
"TQ1_0": "tq1_0",
"TQ2_0": "tq2_0",
}

commitMessage := bytes.NewBufferString("QUANTIZE")
Expand Down
4 changes: 2 additions & 2 deletions cmd/gguf-packer/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/dustin/go-humanize v1.0.1
github.com/google/go-containerregistry v0.20.2
github.com/gpustack/gguf-packer-go v0.0.0-00010101000000-000000000000
github.com/gpustack/gguf-parser-go v0.11.0
github.com/gpustack/gguf-parser-go v0.11.1
github.com/jedib0t/go-pretty/v6 v6.5.9
github.com/moby/buildkit v0.15.2
github.com/opencontainers/go-digest v1.0.0
Expand Down Expand Up @@ -132,7 +132,7 @@ require (
golang.org/x/net v0.28.0 // indirect
golang.org/x/oauth2 v0.22.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/term v0.23.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/tools v0.24.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions cmd/gguf-packer/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/containerd/platforms v0.2.1
github.com/distribution/reference v0.6.0
github.com/docker/go-units v0.5.0
github.com/gpustack/gguf-parser-go v0.11.0
github.com/gpustack/gguf-parser-go v0.11.1
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/moby/buildkit v0.15.2
github.com/moby/patternmatcher v0.6.0
Expand All @@ -16,7 +16,7 @@ require (
github.com/tonistiigi/go-csvvalue v0.0.0-20240814133006-030d3b2625d0
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948
golang.org/x/sync v0.8.0
golang.org/x/sys v0.24.0
golang.org/x/sys v0.25.0
)

require (
Expand Down
8 changes: 4 additions & 4 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a4de86d

Please sign in to comment.