Skip to content

Commit

Permalink
Merge branch 'master' into test-authv2
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-gray101 authored Sep 19, 2024
2 parents 0656fab + 191bc2e commit 8f3849a
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/secscan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/[email protected].2
uses: securego/[email protected].0
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=23e0d70bacaaca1429d365a44aa9e7434f17823b
CPPLLAMA_VERSION?=64c6af3195c3cd4aa3328a1282d29cd2635c34c9

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
WHISPER_CPP_VERSION?=5b1ce40fa882e9cb8630b48032067a1ed2f1534f

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
2 changes: 1 addition & 1 deletion aio/cpu/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
2 changes: 1 addition & 1 deletion aio/gpu-8g/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
2 changes: 1 addition & 1 deletion aio/intel/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
mmap: false
f16: false
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
2 changes: 2 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ message PredictOptions {
repeated string Images = 42;
bool UseTokenizerTemplate = 43;
repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
}

// The response message containing the result
Expand Down
4 changes: 3 additions & 1 deletion core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
threads := c.Threads
if *threads == 0 && o.Threads != 0 {
Expand Down Expand Up @@ -101,6 +101,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
opts.Images = images
opts.Videos = videos
opts.Audios = audios

tokenUsage := TokenUsage{}

Expand Down
10 changes: 9 additions & 1 deletion core/http/endpoints/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,8 +640,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
for _, m := range input.Messages {
images = append(images, m.StringImages...)
}
videos := []string{}
for _, m := range input.Messages {
videos = append(videos, m.StringVideos...)
}
audios := []string{}
for _, m := range input.Messages {
audios = append(audios, m.StringAudios...)
}

predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
if err != nil {
log.Error().Err(err).Msg("model inference failed")
return "", err
Expand Down
10 changes: 9 additions & 1 deletion core/http/endpoints/openai/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,17 @@ func ComputeChoices(
for _, m := range req.Messages {
images = append(images, m.StringImages...)
}
videos := []string{}
for _, m := range req.Messages {
videos = append(videos, m.StringVideos...)
}
audios := []string{}
for _, m := range req.Messages {
audios = append(audios, m.StringAudios...)
}

// get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
if err != nil {
return result, backend.TokenUsage{}, err
}
Expand Down
46 changes: 35 additions & 11 deletions core/http/endpoints/openai/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
}

// Decode each request's message content
index := 0
imgIndex, vidIndex, audioIndex := 0, 0, 0
for i, m := range input.Messages {
switch content := m.Content.(type) {
case string:
Expand All @@ -144,20 +144,44 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
dat, _ := json.Marshal(content)
c := []schema.Content{}
json.Unmarshal(dat, &c)
CONTENT:
for _, pp := range c {
if pp.Type == "text" {
switch pp.Type {
case "text":
input.Messages[i].StringContent = pp.Text
} else if pp.Type == "image_url" {
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
if err == nil {
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
index++
} else {
case "video", "video_url":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding video: %s", err)
continue CONTENT
}
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[vid-%d]", vidIndex) + input.Messages[i].StringContent
vidIndex++
case "audio_url", "audio":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding image: %s", err)
continue CONTENT
}
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[audio-%d]", audioIndex) + input.Messages[i].StringContent
audioIndex++
case "image_url", "image":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding image: %s", err)
continue CONTENT
}
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", imgIndex) + input.Messages[i].StringContent
imgIndex++
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions core/schema/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ type Content struct {
Type string `json:"type" yaml:"type"`
Text string `json:"text" yaml:"text"`
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
}

type ContentURL struct {
Expand All @@ -76,6 +78,8 @@ type Message struct {

StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`

// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
Expand Down
10 changes: 2 additions & 8 deletions pkg/utils/base64.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,8 @@ var base64DownloadClient http.Client = http.Client{
Timeout: 30 * time.Second,
}

// this function check if the string is an URL, if it's an URL downloads the image in memory
// encodes it in base64 and returns the base64 string

// This may look weird down in pkg/utils while it is currently only used in core/config
//
// but I believe it may be useful for MQTT as well in the near future, so I'm
// extracting it while I'm thinking of it.
func GetImageURLAsBase64(s string) (string, error) {
// GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers
func GetContentURIAsBase64(s string) (string, error) {
if strings.HasPrefix(s, "http") {
// download the image
resp, err := base64DownloadClient.Get(s)
Expand Down
8 changes: 4 additions & 4 deletions pkg/utils/base64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,28 @@ var _ = Describe("utils/base64 tests", func() {
It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() {
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
input := ""
b64, err := GetImageURLAsBase64(input)
b64, err := GetContentURIAsBase64(input)
Expect(err).To(BeNil())
Expect(b64).To(Equal("FOO"))
})
It("GetImageURLAsBase64 can strip png data url prefixes", func() {
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
input := ""
b64, err := GetImageURLAsBase64(input)
b64, err := GetContentURIAsBase64(input)
Expect(err).To(BeNil())
Expect(b64).To(Equal("BAR"))
})
It("GetImageURLAsBase64 returns an error for bogus data", func() {
input := "FOO"
b64, err := GetImageURLAsBase64(input)
b64, err := GetContentURIAsBase64(input)
Expect(b64).To(Equal(""))
Expect(err).ToNot(BeNil())
Expect(err).To(MatchError("not valid string"))
})
It("GetImageURLAsBase64 can actually download images and calculates something", func() {
// This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before...
input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg"
b64, err := GetImageURLAsBase64(input)
b64, err := GetContentURIAsBase64(input)
Expect(err).To(BeNil())
Expect(b64).ToNot(BeNil())
})
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e-aio/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ var _ = Describe("E2E test", func() {
})
Context("vision", func() {
It("correctly", func() {
model := "gpt-4-vision-preview"
model := "gpt-4o"
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{
Model: model, Messages: []openai.ChatCompletionMessage{
Expand Down

0 comments on commit 8f3849a

Please sign in to comment.