From d5b46cfd9ddd0892f197c1b162bddc2780771915 Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Wed, 27 Mar 2024 12:55:17 +0500 Subject: [PATCH 1/7] refactor: add some comments for entities --- agency.go | 5 +- providers/openai/image_to_text.go | 2 + providers/openai/provider.go | 6 +++ providers/openai/speech_to_text.go | 35 +++++++------ providers/openai/text_to_image.go | 49 +++++++++--------- providers/openai/text_to_speech.go | 43 ++++++++-------- providers/openai/text_to_text.go | 82 ++++++++++++++++-------------- 7 files changed, 121 insertions(+), 101 deletions(-) diff --git a/agency.go b/agency.go index 20cc351..106c355 100644 --- a/agency.go +++ b/agency.go @@ -11,12 +11,11 @@ type Operation struct { config *OperationConfig } -// OperationHandler is a function that implements logic. +// OperationHandler is a function that implements operation's logic. // It could be thought of as an interface that providers must implement. type OperationHandler func(context.Context, Message, *OperationConfig) (Message, error) -// OperationConfig represents abstract operation configuration. -// It contains fields for all possible modalities but nothing specific to concrete model implementations. +// OperationConfig represents abstract operation configuration for all possible models. type OperationConfig struct { Prompt string Messages []Message diff --git a/providers/openai/image_to_text.go b/providers/openai/image_to_text.go index 95b74c6..beab054 100644 --- a/providers/openai/image_to_text.go +++ b/providers/openai/image_to_text.go @@ -15,6 +15,8 @@ type ImageToTextParams struct { MaxTokens int } + +// ImageToText is an operation builder that creates operation than can convert image to text. func (f *Provider) ImageToText(params ImageToTextParams) *agency.Operation { return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { openaiMsg := openai.ChatCompletionMessage{ diff --git a/providers/openai/provider.go b/providers/openai/provider.go index 62a60dc..3b2377a 100644 --- a/providers/openai/provider.go +++ b/providers/openai/provider.go @@ -6,15 +6,19 @@ import ( "github.com/sashabaranov/go-openai" ) +// Provider is a set of operation builders. type Provider struct { client *openai.Client } +// Params is a set of parameters specific for creating this concrete provider. +// They are shared across all operation builders. type Params struct { Key string // Required if not using local LLM. BaseURL string // Optional. If not set then default openai base url is used } +// New creates a new Provider instance. func New(params Params) *Provider { cfg := openai.DefaultConfig(params.Key) if params.BaseURL != "" { @@ -25,6 +29,8 @@ func New(params Params) *Provider { } } +// === Helpers === + // NullableFloat32 is a type that exists to distinguish between undefined values and real zeros. // It fixes sashabaranov/go-openai issue with zero temp not included in api request due to how json unmarshal work. type NullableFloat32 *float32 diff --git a/providers/openai/speech_to_text.go b/providers/openai/speech_to_text.go index 312dc9f..922cbf4 100644 --- a/providers/openai/speech_to_text.go +++ b/providers/openai/speech_to_text.go @@ -13,22 +13,25 @@ type SpeechToTextParams struct { Temperature NullableFloat32 } +// SpeechToText is an operation builder that creates operation than can convert speech to text. func (f Provider) SpeechToText(params SpeechToTextParams) *agency.Operation { - return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { - resp, err := f.client.CreateTranscription(ctx, openai.AudioRequest{ - Model: params.Model, - Prompt: cfg.Prompt, - FilePath: "speech.ogg", // TODO move to cfg? - Reader: bytes.NewReader(msg.Content), - Temperature: getTemperature(params.Temperature), - }) - if err != nil { - return agency.Message{}, err - } + return agency.NewOperation( + func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { + resp, err := f.client.CreateTranscription(ctx, openai.AudioRequest{ + Model: params.Model, + Prompt: cfg.Prompt, + FilePath: "speech.ogg", + Reader: bytes.NewReader(msg.Content), + Temperature: getTemperature(params.Temperature), + }) + if err != nil { + return agency.Message{}, err + } - return agency.Message{ - Role: agency.AssistantRole, - Content: []byte(resp.Text), - }, nil - }) + return agency.Message{ + Role: agency.AssistantRole, + Content: []byte(resp.Text), + }, nil + }, + ) } diff --git a/providers/openai/text_to_image.go b/providers/openai/text_to_image.go index bdc5bfc..ef8c54b 100644 --- a/providers/openai/text_to_image.go +++ b/providers/openai/text_to_image.go @@ -16,31 +16,34 @@ type TextToImageParams struct { Style string } +// TextToImage is an operation builder that creates operation than can convert text to image. func (p Provider) TextToImage(params TextToImageParams) *agency.Operation { - return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { - reqBase64 := openai.ImageRequest{ - Prompt: fmt.Sprintf("%s\n\n%s", cfg.Prompt, string(msg.Content)), - Size: params.ImageSize, - ResponseFormat: openai.CreateImageResponseFormatB64JSON, - N: 1, // DALL·E-3 only support n=1, for other models support needed - Model: params.Model, - Quality: params.Quality, - Style: params.Style, - } + return agency.NewOperation( + func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { + reqBase64 := openai.ImageRequest{ + Prompt: fmt.Sprintf("%s\n\n%s", cfg.Prompt, string(msg.Content)), + Size: params.ImageSize, + ResponseFormat: openai.CreateImageResponseFormatB64JSON, + N: 1, // DALL·E-3 only support n=1, for other models support needed + Model: params.Model, + Quality: params.Quality, + Style: params.Style, + } - respBase64, err := p.client.CreateImage(ctx, reqBase64) - if err != nil { - return agency.Message{}, err - } + respBase64, err := p.client.CreateImage(ctx, reqBase64) + if err != nil { + return agency.Message{}, err + } - imgBytes, err := base64.StdEncoding.DecodeString(respBase64.Data[0].B64JSON) - if err != nil { - return agency.Message{}, err - } + imgBytes, err := base64.StdEncoding.DecodeString(respBase64.Data[0].B64JSON) + if err != nil { + return agency.Message{}, err + } - return agency.Message{ - Role: agency.AssistantRole, - Content: imgBytes, - }, nil - }) + return agency.Message{ + Role: agency.AssistantRole, + Content: imgBytes, + }, nil + }, + ) } diff --git a/providers/openai/text_to_speech.go b/providers/openai/text_to_speech.go index c0402e0..f0cbd92 100644 --- a/providers/openai/text_to_speech.go +++ b/providers/openai/text_to_speech.go @@ -15,27 +15,30 @@ type TextToSpeechParams struct { Voice string } +// TextToSpeech is an operation builder that creates operation than can convert text to speech. func (f Provider) TextToSpeech(params TextToSpeechParams) *agency.Operation { - return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { - resp, err := f.client.CreateSpeech(ctx, openai.CreateSpeechRequest{ - Model: openai.SpeechModel(params.Model), - Input: msg.String(), - Voice: openai.SpeechVoice(params.Voice), - ResponseFormat: openai.SpeechResponseFormat(params.ResponseFormat), - Speed: params.Speed, - }) - if err != nil { - return agency.Message{}, err - } + return agency.NewOperation( + func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { + resp, err := f.client.CreateSpeech(ctx, openai.CreateSpeechRequest{ + Model: openai.SpeechModel(params.Model), + Input: msg.String(), + Voice: openai.SpeechVoice(params.Voice), + ResponseFormat: openai.SpeechResponseFormat(params.ResponseFormat), + Speed: params.Speed, + }) + if err != nil { + return agency.Message{}, err + } - bb, err := io.ReadAll(resp) - if err != nil { - return agency.Message{}, err - } + bb, err := io.ReadAll(resp) + if err != nil { + return agency.Message{}, err + } - return agency.Message{ - Role: agency.AssistantRole, - Content: bb, - }, nil - }) + return agency.Message{ + Role: agency.AssistantRole, + Content: bb, + }, nil + }, + ) } diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index 1943ec7..830d22a 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -9,54 +9,58 @@ import ( "github.com/neurocult/agency" ) +// TextToTextParams represents parameters that are specific for this operation. type TextToTextParams struct { Model string Temperature NullableFloat32 MaxTokens int } +// TextToText is an operation builder that creates operation than can convert text to text. func (p Provider) TextToText(params TextToTextParams) *agency.Operation { - return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { - openAIMessages := make([]openai.ChatCompletionMessage, 0, len(cfg.Messages)+2) + return agency.NewOperation( + func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { + openAIMessages := make([]openai.ChatCompletionMessage, 0, len(cfg.Messages)+2) - openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleSystem, - Content: cfg.Prompt, - }) + openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleSystem, + Content: cfg.Prompt, + }) + + for _, textMsg := range cfg.Messages { + openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ + Role: string(textMsg.Role), + Content: string(textMsg.Content), + }) + } - for _, textMsg := range cfg.Messages { openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: string(textMsg.Role), - Content: string(textMsg.Content), + Role: openai.ChatMessageRoleUser, + Content: msg.String(), }) - } - - openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleUser, - Content: msg.String(), - }) - - resp, err := p.client.CreateChatCompletion( - ctx, - openai.ChatCompletionRequest{ - Model: params.Model, - Temperature: getTemperature(params.Temperature), - MaxTokens: params.MaxTokens, - Messages: openAIMessages, - }, - ) - if err != nil { - return agency.Message{}, err - } - - if len(resp.Choices) < 1 { - return agency.Message{}, errors.New("no choice") - } - choice := resp.Choices[0].Message // TODO what about other choices? - - return agency.Message{ - Role: agency.Role(choice.Role), - Content: []byte(choice.Content), - }, nil - }) + + resp, err := p.client.CreateChatCompletion( + ctx, + openai.ChatCompletionRequest{ + Model: params.Model, + Temperature: getTemperature(params.Temperature), + MaxTokens: params.MaxTokens, + Messages: openAIMessages, + }, + ) + if err != nil { + return agency.Message{}, err + } + + if len(resp.Choices) < 1 { + return agency.Message{}, errors.New("no choice") + } + choice := resp.Choices[0].Message // TODO what about other choices? + + return agency.Message{ + Role: agency.Role(choice.Role), + Content: []byte(choice.Content), + }, nil + }, + ) } From b410c4f8a2a75b589b25af96d326783be40eea71 Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Wed, 27 Mar 2024 21:14:51 +0500 Subject: [PATCH 2/7] feat(t2t): add funcs --- process.go | 1 + providers/openai/text_to_text.go | 116 +++++++++++++++++++++++++------ 2 files changed, 97 insertions(+), 20 deletions(-) diff --git a/process.go b/process.go index fecee17..af300dd 100644 --- a/process.go +++ b/process.go @@ -9,6 +9,7 @@ type Process struct { operations []*Operation } +// NewProcess creates a new Process with given operations. func NewProcess(operations ...*Operation) *Process { return &Process{ operations: operations, diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index 830d22a..84b7a77 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -2,7 +2,9 @@ package openai import ( "context" + "encoding/json" "errors" + "fmt" "github.com/sashabaranov/go-openai" @@ -14,10 +16,25 @@ type TextToTextParams struct { Model string Temperature NullableFloat32 MaxTokens int + FuncDefs []FuncDef +} + +// FuncDef represents a function definition that can be called during the conversation. +type FuncDef struct { + Name string + Description string + Parameters any // Parameters is a structure that defines the schema of the parameters that the function accepts. + // Body is the actual function that get's called. + // Parameters must be pointer to a structure that matches `Parameters` schema via json-tags. + // Returned result must be json-marshallable object. + Body func(ctx context.Context, params any) (any, error) } // TextToText is an operation builder that creates operation than can convert text to text. +// It can also call provided functions if needed, as many times as needed until the final answer is generated. func (p Provider) TextToText(params TextToTextParams) *agency.Operation { + openAITools := castFuncDefsToOpenAITools(params.FuncDefs) + return agency.NewOperation( func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) { openAIMessages := make([]openai.ChatCompletionMessage, 0, len(cfg.Messages)+2) @@ -39,28 +56,87 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { Content: msg.String(), }) - resp, err := p.client.CreateChatCompletion( - ctx, - openai.ChatCompletionRequest{ - Model: params.Model, - Temperature: getTemperature(params.Temperature), - MaxTokens: params.MaxTokens, - Messages: openAIMessages, - }, - ) - if err != nil { - return agency.Message{}, err - } + for { + openAIResponse, err := p.client.CreateChatCompletion( + ctx, + openai.ChatCompletionRequest{ + Model: params.Model, + Temperature: getTemperature(params.Temperature), + MaxTokens: params.MaxTokens, + Messages: openAIMessages, + Tools: openAITools, + }, + ) + if err != nil { + return agency.Message{}, err + } - if len(resp.Choices) < 1 { - return agency.Message{}, errors.New("no choice") - } - choice := resp.Choices[0].Message // TODO what about other choices? + if len(openAIResponse.Choices) < 1 { + return agency.Message{}, errors.New("no choice") + } + answer := openAIResponse.Choices[0] + + if answer.FinishReason != openai.FinishReasonFunctionCall { + return agency.Message{ + Role: agency.Role(answer.Message.Role), + Content: []byte(answer.Message.Content), + }, nil + } + + funcToCall := getFuncDefByName(params.FuncDefs, answer.Message.FunctionCall.Name) + if funcToCall == nil { + return agency.Message{}, errors.New("function not found") + } + + var params = funcToCall.Parameters + if err = json.Unmarshal([]byte(answer.Message.FunctionCall.Arguments), ¶ms); err != nil { + return agency.Message{}, fmt.Errorf( + "unmarshal %s arguments: %w", + answer.Message.FunctionCall.Name, err, + ) + } + + funcResult, err := funcToCall.Body(ctx, params) + if err != nil { + return agency.Message{}, fmt.Errorf("call function %s: %w", funcToCall.Name, err) + } - return agency.Message{ - Role: agency.Role(choice.Role), - Content: []byte(choice.Content), - }, nil + bb, err := json.Marshal(funcResult) + if err != nil { + return agency.Message{}, fmt.Errorf("marshal function result: %w", err) + } + + openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleAssistant, + Content: string(bb), + }) + } }, ) } + +// === Helpers === + +func castFuncDefsToOpenAITools(funcDefs []FuncDef) []openai.Tool { + tools := make([]openai.Tool, 0, len(funcDefs)) + for _, f := range funcDefs { + tools = append(tools, openai.Tool{ + Type: openai.ToolTypeFunction, + Function: openai.FunctionDefinition{ + Name: f.Name, + Description: f.Description, + Parameters: f.Parameters, + }, + }) + } + return tools +} + +func getFuncDefByName(funcDefs []FuncDef, name string) *FuncDef { + for _, f := range funcDefs { + if f.Name == name { + return &f + } + } + return nil +} From f5c53ed2fc29be823db47027c2ecfce8c0ba7d7b Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Wed, 27 Mar 2024 21:59:02 +0500 Subject: [PATCH 3/7] feat(t2t): implement function calling --- examples/func_call/main.go | 81 ++++++++++++++++++++++++++++++++ providers/openai/text_to_text.go | 44 ++++++++--------- 2 files changed, 104 insertions(+), 21 deletions(-) create mode 100644 examples/func_call/main.go diff --git a/examples/func_call/main.go b/examples/func_call/main.go new file mode 100644 index 0000000..abf847e --- /dev/null +++ b/examples/func_call/main.go @@ -0,0 +1,81 @@ +// based on user input +// 1) call 2 functions, A then B then answer +// 2) call 1 function A or B +// 3) just answer (no function call) + +package main + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os" + + _ "github.com/joho/godotenv/autoload" + "github.com/sashabaranov/go-openai/jsonschema" + + "github.com/neurocult/agency" + "github.com/neurocult/agency/providers/openai" +) + +func main() { + t2tOp := openai. + New(openai.Params{Key: os.Getenv("OPENAI_API_KEY")}). + TextToText(openai.TextToTextParams{ + Model: "gpt-3.5-turbo", + FuncDefs: []openai.FuncDef{ + // function without parameters + { + Name: "GetMeaningOfLife", + Description: "Answer questions about meaning of life", + Body: func(ctx context.Context, _ []byte) (any, error) { + return 42, nil + }, + }, + // function with parameters + { + Name: "SumNumbers", + Description: "Sum given numbers when asked", + Parameters: &jsonschema.Definition{ + Type: "object", + Properties: map[string]jsonschema.Definition{ + "a": {Type: "integer"}, + "b": {Type: "integer"}, + }, + }, + Body: func(ctx context.Context, params []byte) (any, error) { + var pp struct{ A, B int } + if err := json.Unmarshal(params, &pp); err != nil { + return nil, err + } + return (pp.A + pp.B) * 10, nil // *10 is just to distinguish from normal response + }, + }, + }, + }). + SetPrompt("You are helpful assistant.") + + messages := []agency.Message{} + reader := bufio.NewReader(os.Stdin) + ctx := context.Background() + + for { + fmt.Print("User: ") + + text, err := reader.ReadString('\n') + if err != nil { + panic(err) + } + + input := agency.UserMessage(text) + answer, err := t2tOp.SetMessages(messages).Execute(ctx, input) + if err != nil { + panic(err) + } + + fmt.Println("Assistant: ", answer) + + messages = append(messages, input, answer) + } +} diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index 84b7a77..c72e725 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/sashabaranov/go-openai" + "github.com/sashabaranov/go-openai/jsonschema" "github.com/neurocult/agency" ) @@ -23,11 +24,12 @@ type TextToTextParams struct { type FuncDef struct { Name string Description string - Parameters any // Parameters is a structure that defines the schema of the parameters that the function accepts. + // Parameters is an optional structure that defines the schema of the parameters that the function accepts. + Parameters *jsonschema.Definition // Body is the actual function that get's called. - // Parameters must be pointer to a structure that matches `Parameters` schema via json-tags. - // Returned result must be json-marshallable object. - Body func(ctx context.Context, params any) (any, error) + // Parameters passed are bytes that can be unmarshalled to type that implements provided json schema. + // Returned result must be anything that can be marshalled, including primitive values. + Body func(ctx context.Context, params []byte) (any, error) } // TextToText is an operation builder that creates operation than can convert text to text. @@ -74,29 +76,22 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { if len(openAIResponse.Choices) < 1 { return agency.Message{}, errors.New("no choice") } - answer := openAIResponse.Choices[0] + firstChoice := openAIResponse.Choices[0] - if answer.FinishReason != openai.FinishReasonFunctionCall { + if len(firstChoice.Message.ToolCalls) == 0 { return agency.Message{ - Role: agency.Role(answer.Message.Role), - Content: []byte(answer.Message.Content), + Role: agency.Role(firstChoice.Message.Role), + Content: []byte(firstChoice.Message.Content), }, nil } - funcToCall := getFuncDefByName(params.FuncDefs, answer.Message.FunctionCall.Name) + firstToolCall := firstChoice.Message.ToolCalls[0] + funcToCall := getFuncDefByName(params.FuncDefs, firstToolCall.Function.Name) if funcToCall == nil { return agency.Message{}, errors.New("function not found") } - var params = funcToCall.Parameters - if err = json.Unmarshal([]byte(answer.Message.FunctionCall.Arguments), ¶ms); err != nil { - return agency.Message{}, fmt.Errorf( - "unmarshal %s arguments: %w", - answer.Message.FunctionCall.Name, err, - ) - } - - funcResult, err := funcToCall.Body(ctx, params) + funcResult, err := funcToCall.Body(ctx, []byte(firstToolCall.Function.Arguments)) if err != nil { return agency.Message{}, fmt.Errorf("call function %s: %w", funcToCall.Name, err) } @@ -120,14 +115,21 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { func castFuncDefsToOpenAITools(funcDefs []FuncDef) []openai.Tool { tools := make([]openai.Tool, 0, len(funcDefs)) for _, f := range funcDefs { - tools = append(tools, openai.Tool{ + tool := openai.Tool{ Type: openai.ToolTypeFunction, Function: openai.FunctionDefinition{ Name: f.Name, Description: f.Description, - Parameters: f.Parameters, }, - }) + } + if f.Parameters != nil { + tool.Function.Parameters = f.Parameters + } else { + tool.Function.Parameters = jsonschema.Definition{ + Type: jsonschema.Object, // because we can't pass empty parameters + } + } + tools = append(tools, tool) } return tools } From 4d420b588da07b0c8ac04a05c6d20e430c9040ae Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Wed, 27 Mar 2024 23:43:20 +0500 Subject: [PATCH 4/7] tmp(t2t): add debug logs for func calls --- examples/func_call/main.go | 5 ----- providers/openai/text_to_text.go | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/func_call/main.go b/examples/func_call/main.go index abf847e..9b88ebd 100644 --- a/examples/func_call/main.go +++ b/examples/func_call/main.go @@ -1,8 +1,3 @@ -// based on user input -// 1) call 2 functions, A then B then answer -// 2) call 1 function A or B -// 3) just answer (no function call) - package main import ( diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index c72e725..5e66289 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -59,6 +59,8 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { }) for { + fmt.Println("===ITERATION===") + openAIResponse, err := p.client.CreateChatCompletion( ctx, openai.ChatCompletionRequest{ @@ -78,6 +80,8 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { } firstChoice := openAIResponse.Choices[0] + fmt.Println(firstChoice.Message.ToolCalls) + if len(firstChoice.Message.ToolCalls) == 0 { return agency.Message{ Role: agency.Role(firstChoice.Message.Role), From a3d0d268a4ffaa3942c8ba8f34addc812e202f67 Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Fri, 29 Mar 2024 13:45:57 +0500 Subject: [PATCH 5/7] fix(t2t): remove useless iterations in meaning of life example by adding missing func name --- providers/openai/text_to_text.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index 5e66289..da23ebe 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -80,8 +80,6 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { } firstChoice := openAIResponse.Choices[0] - fmt.Println(firstChoice.Message.ToolCalls) - if len(firstChoice.Message.ToolCalls) == 0 { return agency.Message{ Role: agency.Role(firstChoice.Message.Role), @@ -106,8 +104,9 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { } openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleAssistant, + Role: openai.ChatMessageRoleFunction, Content: string(bb), + Name: firstToolCall.Function.Name, }) } }, From a12c1ff4f0619ffa6892dcae215c27555c478b75 Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Fri, 29 Mar 2024 14:21:37 +0500 Subject: [PATCH 6/7] fix(t2t): remove useless iterations from sum numbers example by adding missing intermidiate message to internal history --- providers/openai/text_to_text.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index da23ebe..0c9fb29 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -59,8 +59,6 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { }) for { - fmt.Println("===ITERATION===") - openAIResponse, err := p.client.CreateChatCompletion( ctx, openai.ChatCompletionRequest{ @@ -87,6 +85,8 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { }, nil } + openAIMessages = append(openAIMessages, firstChoice.Message) + firstToolCall := firstChoice.Message.ToolCalls[0] funcToCall := getFuncDefByName(params.FuncDefs, firstToolCall.Function.Name) if funcToCall == nil { @@ -104,9 +104,10 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { } openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleFunction, - Content: string(bb), - Name: firstToolCall.Function.Name, + Role: openai.ChatMessageRoleTool, + Content: string(bb), + Name: firstToolCall.Function.Name, + ToolCallID: firstToolCall.ID, }) } }, From ecccd2b0354ddb4501e2960cac6331ca896ae9b4 Mon Sep 17 00:00:00 2001 From: Emil Valeev Date: Wed, 3 Apr 2024 15:41:28 +0500 Subject: [PATCH 7/7] fix(t2t): multiple function calls at the same time now working --- examples/func_call/main.go | 59 ++++++++++++++++++++------------ providers/openai/text_to_text.go | 43 +++++++++++------------ 2 files changed, 60 insertions(+), 42 deletions(-) diff --git a/examples/func_call/main.go b/examples/func_call/main.go index 9b88ebd..4e6aa26 100644 --- a/examples/func_call/main.go +++ b/examples/func_call/main.go @@ -1,7 +1,6 @@ package main import ( - "bufio" "context" "encoding/json" "fmt" @@ -30,8 +29,8 @@ func main() { }, // function with parameters { - Name: "SumNumbers", - Description: "Sum given numbers when asked", + Name: "ChangeNumbers", + Description: "Change given numbers when asked", Parameters: &jsonschema.Definition{ Type: "object", Properties: map[string]jsonschema.Definition{ @@ -49,28 +48,46 @@ func main() { }, }, }). - SetPrompt("You are helpful assistant.") + SetPrompt(` +Answer questions about meaning of life and summing numbers. +Always use GetMeaningOfLife and ChangeNumbers functions results as answers. +Examples: +- User: what is the meaning of life? +- Assistant: 42 +- User: 1+1 +- Assistant: 20 +- User: 1+1 and what is the meaning of life? +- Assistant: 20 and 42`) - messages := []agency.Message{} - reader := bufio.NewReader(os.Stdin) ctx := context.Background() - for { - fmt.Print("User: ") - - text, err := reader.ReadString('\n') - if err != nil { - panic(err) - } - - input := agency.UserMessage(text) - answer, err := t2tOp.SetMessages(messages).Execute(ctx, input) - if err != nil { - panic(err) - } + // test for first function call + answer, err := t2tOp.Execute( + ctx, + agency.UserMessage("what is the meaning of life?"), + ) + if err != nil { + panic(err) + } + fmt.Println(answer) - fmt.Println("Assistant: ", answer) + // test for second function call + answer, err = t2tOp.Execute( + ctx, + agency.UserMessage("1+1?"), + ) + if err != nil { + panic(err) + } + fmt.Println(answer) - messages = append(messages, input, answer) + // test for both function calls at the same time + answer, err = t2tOp.Execute( + ctx, + agency.UserMessage("1+1 and what is the meaning of life?"), + ) + if err != nil { + panic(err) } + fmt.Println(answer) } diff --git a/providers/openai/text_to_text.go b/providers/openai/text_to_text.go index 0c9fb29..28e88b1 100644 --- a/providers/openai/text_to_text.go +++ b/providers/openai/text_to_text.go @@ -87,28 +87,29 @@ func (p Provider) TextToText(params TextToTextParams) *agency.Operation { openAIMessages = append(openAIMessages, firstChoice.Message) - firstToolCall := firstChoice.Message.ToolCalls[0] - funcToCall := getFuncDefByName(params.FuncDefs, firstToolCall.Function.Name) - if funcToCall == nil { - return agency.Message{}, errors.New("function not found") + for _, toolCall := range firstChoice.Message.ToolCalls { + funcToCall := getFuncDefByName(params.FuncDefs, toolCall.Function.Name) + if funcToCall == nil { + return agency.Message{}, errors.New("function not found") + } + + funcResult, err := funcToCall.Body(ctx, []byte(toolCall.Function.Arguments)) + if err != nil { + return agency.Message{}, fmt.Errorf("call function %s: %w", funcToCall.Name, err) + } + + bb, err := json.Marshal(funcResult) + if err != nil { + return agency.Message{}, fmt.Errorf("marshal function result: %w", err) + } + + openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleTool, + Content: string(bb), + Name: toolCall.Function.Name, + ToolCallID: toolCall.ID, + }) } - - funcResult, err := funcToCall.Body(ctx, []byte(firstToolCall.Function.Arguments)) - if err != nil { - return agency.Message{}, fmt.Errorf("call function %s: %w", funcToCall.Name, err) - } - - bb, err := json.Marshal(funcResult) - if err != nil { - return agency.Message{}, fmt.Errorf("marshal function result: %w", err) - } - - openAIMessages = append(openAIMessages, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleTool, - Content: string(bb), - Name: firstToolCall.Function.Name, - ToolCallID: firstToolCall.ID, - }) } }, )