-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathfile_estimate__llamacpp_test.go
125 lines (115 loc) · 3.12 KB
/
file_estimate__llamacpp_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package gguf_parser
import (
"context"
"testing"
"github.com/davecgh/go-spew/spew"
)
func TestGGUFFile_EstimateLLaMACppRun(t *testing.T) {
ctx := context.Background()
cases := []struct {
name string
given *GGUFFile
}{
{
name: "mixtral 7B",
given: func() *GGUFFile {
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
"Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
}
return f
}(),
},
{
name: "mixtral 8x7B",
given: func() *GGUFFile {
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF",
"Nous-Hermes-2-Mixtral-8x7B-DPO.Q5_K_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
}
return f
}(),
},
{
name: "wizardlm 8x22B",
given: func() *GGUFFile {
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"MaziyarPanahi/WizardLM-2-8x22B-GGUF",
"WizardLM-2-8x22B.IQ1_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
}
return f
}(),
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
f := tc.given
t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun()), "\n")
})
}
}
func TestGGUFFile_EstimateLLaMACppRun_ContextSize(t *testing.T) {
ctx := context.Background()
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
"Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
return
}
cases := []struct {
name string
opts []GGUFRunEstimateOption
}{
{"1024(fp16)", []GGUFRunEstimateOption{WithLLaMACppContextSize(1024)}},
{"1024(fp32)", []GGUFRunEstimateOption{WithLLaMACppContextSize(1024), WithLLaMACppCacheKeyType(GGMLTypeF32), WithLLaMACppCacheValueType(GGMLTypeF32)}},
{"4096(fp16)", []GGUFRunEstimateOption{WithLLaMACppContextSize(4096)}},
{"4096(fp32)", []GGUFRunEstimateOption{WithLLaMACppContextSize(4096), WithLLaMACppCacheKeyType(GGMLTypeF32), WithLLaMACppCacheValueType(GGMLTypeF32)}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun(tc.opts...)), "\n")
})
}
}
func TestGGUFFile_EstimateLLaMACppRun_OffloadLayers(t *testing.T) {
ctx := context.Background()
f, err := ParseGGUFFileFromHuggingFace(
ctx,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
"Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf",
SkipLargeMetadata())
if err != nil {
t.Fatal(err)
return
}
cases := []struct {
name string
opts []GGUFRunEstimateOption
}{
{"offload 0 layer", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(0)}},
{"offload 1 layer", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(1)}},
{"offload 10 layers", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(10)}},
{"offload all layers", []GGUFRunEstimateOption{}},
{"offload 33 layers", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(33)}}, // exceeds the number of layers
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun(tc.opts...)), "\n")
})
}
}