Skip to content

Commit

Permalink
Merge pull request #10 from neurocult/func_call
Browse files Browse the repository at this point in the history
Func call
  • Loading branch information
emil14 authored Apr 3, 2024
2 parents e7c0951 + ecccd2b commit 36e5aaa
Show file tree
Hide file tree
Showing 9 changed files with 293 additions and 96 deletions.
5 changes: 2 additions & 3 deletions agency.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ type Operation struct {
config *OperationConfig
}

// OperationHandler is a function that implements logic.
// OperationHandler is a function that implements operation's logic.
// It could be thought of as an interface that providers must implement.
type OperationHandler func(context.Context, Message, *OperationConfig) (Message, error)

// OperationConfig represents abstract operation configuration.
// It contains fields for all possible modalities but nothing specific to concrete model implementations.
// OperationConfig represents abstract operation configuration for all possible models.
type OperationConfig struct {
Prompt string
Messages []Message
Expand Down
93 changes: 93 additions & 0 deletions examples/func_call/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package main

import (
"context"
"encoding/json"
"fmt"
"os"

_ "github.com/joho/godotenv/autoload"
"github.com/sashabaranov/go-openai/jsonschema"

"github.com/neurocult/agency"
"github.com/neurocult/agency/providers/openai"
)

func main() {
t2tOp := openai.
New(openai.Params{Key: os.Getenv("OPENAI_API_KEY")}).
TextToText(openai.TextToTextParams{
Model: "gpt-3.5-turbo",
FuncDefs: []openai.FuncDef{
// function without parameters
{
Name: "GetMeaningOfLife",
Description: "Answer questions about meaning of life",
Body: func(ctx context.Context, _ []byte) (any, error) {
return 42, nil
},
},
// function with parameters
{
Name: "ChangeNumbers",
Description: "Change given numbers when asked",
Parameters: &jsonschema.Definition{
Type: "object",
Properties: map[string]jsonschema.Definition{
"a": {Type: "integer"},
"b": {Type: "integer"},
},
},
Body: func(ctx context.Context, params []byte) (any, error) {
var pp struct{ A, B int }
if err := json.Unmarshal(params, &pp); err != nil {
return nil, err
}
return (pp.A + pp.B) * 10, nil // *10 is just to distinguish from normal response
},
},
},
}).
SetPrompt(`
Answer questions about meaning of life and summing numbers.
Always use GetMeaningOfLife and ChangeNumbers functions results as answers.
Examples:
- User: what is the meaning of life?
- Assistant: 42
- User: 1+1
- Assistant: 20
- User: 1+1 and what is the meaning of life?
- Assistant: 20 and 42`)

ctx := context.Background()

// test for first function call
answer, err := t2tOp.Execute(
ctx,
agency.UserMessage("what is the meaning of life?"),
)
if err != nil {
panic(err)
}
fmt.Println(answer)

// test for second function call
answer, err = t2tOp.Execute(
ctx,
agency.UserMessage("1+1?"),
)
if err != nil {
panic(err)
}
fmt.Println(answer)

// test for both function calls at the same time
answer, err = t2tOp.Execute(
ctx,
agency.UserMessage("1+1 and what is the meaning of life?"),
)
if err != nil {
panic(err)
}
fmt.Println(answer)
}
1 change: 1 addition & 0 deletions process.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type Process struct {
operations []*Operation
}

// NewProcess creates a new Process with given operations.
func NewProcess(operations ...*Operation) *Process {
return &Process{
operations: operations,
Expand Down
2 changes: 2 additions & 0 deletions providers/openai/image_to_text.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ type ImageToTextParams struct {
MaxTokens int
}


// ImageToText is an operation builder that creates operation than can convert image to text.
func (f *Provider) ImageToText(params ImageToTextParams) *agency.Operation {
return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
openaiMsg := openai.ChatCompletionMessage{
Expand Down
6 changes: 6 additions & 0 deletions providers/openai/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@ import (
"github.com/sashabaranov/go-openai"
)

// Provider is a set of operation builders.
type Provider struct {
client *openai.Client
}

// Params is a set of parameters specific for creating this concrete provider.
// They are shared across all operation builders.
type Params struct {
Key string // Required if not using local LLM.
BaseURL string // Optional. If not set then default openai base url is used
}

// New creates a new Provider instance.
func New(params Params) *Provider {
cfg := openai.DefaultConfig(params.Key)
if params.BaseURL != "" {
Expand All @@ -25,6 +29,8 @@ func New(params Params) *Provider {
}
}

// === Helpers ===

// NullableFloat32 is a type that exists to distinguish between undefined values and real zeros.
// It fixes sashabaranov/go-openai issue with zero temp not included in api request due to how json unmarshal work.
type NullableFloat32 *float32
Expand Down
35 changes: 19 additions & 16 deletions providers/openai/speech_to_text.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,25 @@ type SpeechToTextParams struct {
Temperature NullableFloat32
}

// SpeechToText is an operation builder that creates operation than can convert speech to text.
func (f Provider) SpeechToText(params SpeechToTextParams) *agency.Operation {
return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
resp, err := f.client.CreateTranscription(ctx, openai.AudioRequest{
Model: params.Model,
Prompt: cfg.Prompt,
FilePath: "speech.ogg", // TODO move to cfg?
Reader: bytes.NewReader(msg.Content),
Temperature: getTemperature(params.Temperature),
})
if err != nil {
return agency.Message{}, err
}
return agency.NewOperation(
func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
resp, err := f.client.CreateTranscription(ctx, openai.AudioRequest{
Model: params.Model,
Prompt: cfg.Prompt,
FilePath: "speech.ogg",
Reader: bytes.NewReader(msg.Content),
Temperature: getTemperature(params.Temperature),
})
if err != nil {
return agency.Message{}, err
}

return agency.Message{
Role: agency.AssistantRole,
Content: []byte(resp.Text),
}, nil
})
return agency.Message{
Role: agency.AssistantRole,
Content: []byte(resp.Text),
}, nil
},
)
}
49 changes: 26 additions & 23 deletions providers/openai/text_to_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,34 @@ type TextToImageParams struct {
Style string
}

// TextToImage is an operation builder that creates operation than can convert text to image.
func (p Provider) TextToImage(params TextToImageParams) *agency.Operation {
return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
reqBase64 := openai.ImageRequest{
Prompt: fmt.Sprintf("%s\n\n%s", cfg.Prompt, string(msg.Content)),
Size: params.ImageSize,
ResponseFormat: openai.CreateImageResponseFormatB64JSON,
N: 1, // DALL·E-3 only support n=1, for other models support needed
Model: params.Model,
Quality: params.Quality,
Style: params.Style,
}
return agency.NewOperation(
func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
reqBase64 := openai.ImageRequest{
Prompt: fmt.Sprintf("%s\n\n%s", cfg.Prompt, string(msg.Content)),
Size: params.ImageSize,
ResponseFormat: openai.CreateImageResponseFormatB64JSON,
N: 1, // DALL·E-3 only support n=1, for other models support needed
Model: params.Model,
Quality: params.Quality,
Style: params.Style,
}

respBase64, err := p.client.CreateImage(ctx, reqBase64)
if err != nil {
return agency.Message{}, err
}
respBase64, err := p.client.CreateImage(ctx, reqBase64)
if err != nil {
return agency.Message{}, err
}

imgBytes, err := base64.StdEncoding.DecodeString(respBase64.Data[0].B64JSON)
if err != nil {
return agency.Message{}, err
}
imgBytes, err := base64.StdEncoding.DecodeString(respBase64.Data[0].B64JSON)
if err != nil {
return agency.Message{}, err
}

return agency.Message{
Role: agency.AssistantRole,
Content: imgBytes,
}, nil
})
return agency.Message{
Role: agency.AssistantRole,
Content: imgBytes,
}, nil
},
)
}
43 changes: 23 additions & 20 deletions providers/openai/text_to_speech.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,30 @@ type TextToSpeechParams struct {
Voice string
}

// TextToSpeech is an operation builder that creates operation than can convert text to speech.
func (f Provider) TextToSpeech(params TextToSpeechParams) *agency.Operation {
return agency.NewOperation(func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
resp, err := f.client.CreateSpeech(ctx, openai.CreateSpeechRequest{
Model: openai.SpeechModel(params.Model),
Input: msg.String(),
Voice: openai.SpeechVoice(params.Voice),
ResponseFormat: openai.SpeechResponseFormat(params.ResponseFormat),
Speed: params.Speed,
})
if err != nil {
return agency.Message{}, err
}
return agency.NewOperation(
func(ctx context.Context, msg agency.Message, cfg *agency.OperationConfig) (agency.Message, error) {
resp, err := f.client.CreateSpeech(ctx, openai.CreateSpeechRequest{
Model: openai.SpeechModel(params.Model),
Input: msg.String(),
Voice: openai.SpeechVoice(params.Voice),
ResponseFormat: openai.SpeechResponseFormat(params.ResponseFormat),
Speed: params.Speed,
})
if err != nil {
return agency.Message{}, err
}

bb, err := io.ReadAll(resp)
if err != nil {
return agency.Message{}, err
}
bb, err := io.ReadAll(resp)
if err != nil {
return agency.Message{}, err
}

return agency.Message{
Role: agency.AssistantRole,
Content: bb,
}, nil
})
return agency.Message{
Role: agency.AssistantRole,
Content: bb,
}, nil
},
)
}
Loading

0 comments on commit 36e5aaa

Please sign in to comment.