From 195aad9a06a8524d007446737520a462c85fc72a Mon Sep 17 00:00:00 2001 From: Tyler Thomas Date: Tue, 27 Feb 2024 23:34:53 -0700 Subject: [PATCH 1/6] remove response structs --- src/GoogleGenAI.jl | 47 +++++++++++++++++++++++----------------------- test/runtests.jl | 3 --- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/GoogleGenAI.jl b/src/GoogleGenAI.jl index da2af92..5ba7d62 100644 --- a/src/GoogleGenAI.jl +++ b/src/GoogleGenAI.jl @@ -25,18 +25,6 @@ Base.@kwdef struct GoogleProvider <: AbstractGoogleProvider base_url::String = "https://generativelanguage.googleapis.com" api_version::String = "v1beta" end -struct GoogleTextResponse - candidates::Vector{Dict{Symbol,Any}} - safety_ratings::Dict{Pair{Symbol,String},Pair{Symbol,String}} - text::String - response_status::Int - finish_reason::String -end - -struct GoogleEmbeddingResponse - values::Vector{Float64} - response_status::Int -end #TODO: Add support for exception struct BlockedPromptException <: Exception end @@ -75,18 +63,23 @@ function _parse_response(response::HTTP.Messages.Response) candidates = [Dict(i) for i in parsed_response[:candidates]] finish_reason = candidates[end][:finishReason] safety_rating = Dict(parsed_response.promptFeedback.safetyRatings) - return GoogleTextResponse( - candidates, safety_rating, concatenated_texts, response.status, finish_reason + + return ( + candidates=candidates, + safety_ratings=safety_rating, + text=concatenated_texts, + response_status=response.status, + finish_reason=finish_reason, ) end #TODO: Should we use different function names? """ - generate_content(provider::AbstractGoogleProvider, model_name::String, prompt::String, image_path::String; kwargs...) -> GoogleTextResponse - generate_content(api_key::String, model_name::String, prompt::String, image_path::String; kwargs...) -> GoogleTextResponse + generate_content(provider::AbstractGoogleProvider, model_name::String, prompt::String, image_path::String; kwargs...) -> NamedTuple + generate_content(api_key::String, model_name::String, prompt::String, image_path::String; kwargs...) -> NamedTuple - generate_content(provider::AbstractGoogleProvider, model_name::String, conversation::Vector{Dict{Symbol,Any}}; kwargs...) -> GoogleTextResponse - generate_content(api_key::String, model_name::String, conversation::Vector{Dict{Symbol,Any}}; kwargs...) -> GoogleTextResponse + generate_content(provider::AbstractGoogleProvider, model_name::String, conversation::Vector{Dict{Symbol,Any}}; kwargs...) -> NamedTuple + generate_content(api_key::String, model_name::String, conversation::Vector{Dict{Symbol,Any}}; kwargs...) -> NamedTuple Generate content based on a combination of text prompt and an image (optional). @@ -105,7 +98,12 @@ Generate content based on a combination of text prompt and an image (optional). - `safety_settings::Vector{Dict}` (optional): Settings to control the safety aspects of the generated content, such as filtering out unsafe or inappropriate content. # Returns -- `GoogleTextResponse`: The generated content response. +- `NamedTuple`: A named tuple containing the following keys: + - `candidates`: A vector of dictionaries, each representing a generation candidate. + - `safety_ratings`: A dictionary containing safety ratings for the prompt feedback. + - `text`: A string representing the concatenated text from all candidates. + - `response_status`: An integer representing the HTTP response status code. + - `finish_reason`: A string indicating the reason why the generation process was finished. """ function generate_content( provider::AbstractGoogleProvider, model_name::String, prompt::String; kwargs... @@ -237,10 +235,9 @@ function count_tokens(api_key::String, model_name::String, prompt::String) return count_tokens(GoogleProvider(; api_key), model_name, prompt) end -#TODO: Do we want an embeddings struct, or just the array of embeddings? """ - embed_content(provider::AbstractGoogleProvider, model_name::String, prompt::String) -> GoogleEmbeddingResponse - embed_content(api_key::String, model_name::String, prompt::String) -> GoogleEmbeddingResponse + embed_content(provider::AbstractGoogleProvider, model_name::String, prompt::String) -> NamedTuple + embed_content(api_key::String, model_name::String, prompt::String) -> NamedTuple Generate an embedding for the given prompt text using the specified model. @@ -251,7 +248,9 @@ Generate an embedding for the given prompt text using the specified model. - `prompt::String`: The prompt prompt based on which the text is generated. # Returns -- `GoogleEmbeddingResponse` +- `NamedTuple`: A named tuple containing the following keys: + - `values`: A vector of `Float64` representing the embedding values for the given prompt. + - `response_status`: An integer representing the HTTP response status code. """ function embed_content(provider::AbstractGoogleProvider, model_name::String, prompt::String) endpoint = "models/$model_name:embedContent" @@ -263,7 +262,7 @@ function embed_content(provider::AbstractGoogleProvider, model_name::String, pro embedding_values = get( get(JSON3.read(response.body), "embedding", Dict()), "values", Vector{Float64}() ) - return GoogleEmbeddingResponse(embedding_values, response.status) + return (values=embedding_values, response_status=response.status) end function embed_content(api_key::String, model_name::String, prompt::String) return embed_content(GoogleProvider(; api_key), model_name, prompt) diff --git a/test/runtests.jl b/test/runtests.jl index 022f826..f1e1595 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,7 +7,6 @@ const secret_key = ENV["GOOGLE_API_KEY"] @testset "GoogleGenAI.jl" begin # Generate text from text response = generate_content(secret_key, "gemini-pro", "Hello"; max_output_tokens=50) - @test typeof(response) == GoogleGenAI.GoogleTextResponse # Generate text from text+image response = generate_content( @@ -17,7 +16,6 @@ const secret_key = ENV["GOOGLE_API_KEY"] "example.jpg"; max_output_tokens=50, ) - @test typeof(response) == GoogleGenAI.GoogleTextResponse # Multi-turn conversation conversation = [Dict(:role => "user", :parts => [Dict(:text => "Hello")])] @@ -29,7 +27,6 @@ const secret_key = ENV["GOOGLE_API_KEY"] @test n_tokens == 1 embeddings = embed_content(secret_key, "embedding-001", "Hello") - @test typeof(embeddings) == GoogleGenAI.GoogleEmbeddingResponse @test size(embeddings.values) == (768,) models = list_models(secret_key) From 1d8c60b9548b821620ab117859c0b763ea0160da Mon Sep 17 00:00:00 2001 From: tylerjthomas9 Date: Thu, 29 Feb 2024 17:00:24 -0700 Subject: [PATCH 2/6] add CI (no env key) --- .github/workflows/CI.yml | 55 ++++++++++++++++++++++++++ .github/workflows/Documenter.yml | 1 - .github/workflows/FormatCheck.yml | 2 - .github/workflows/TagBot.yml | 15 ++++++++ test/runtests.jl | 64 ++++++++++++++++--------------- 5 files changed, 104 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/CI.yml create mode 100644 .github/workflows/TagBot.yml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..46e4d19 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,55 @@ +name: CI +on: + workflow_dispatch: + push: + branches: + - main + paths: + - '.github/workflows/CI.yml' + - 'test/**' + - 'src/**' + - 'Project.toml' + pull_request: + types: [opened, synchronize, reopened] + paths: + - '.github/workflows/CI.yml' + - 'test/**' + - 'src/**' + - 'Project.toml' +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + # - '1.6' + - '1' + # - 'nightly' + os: + - ubuntu-latest + arch: + - x64 + include: + - os: windows-latest + version: '1' + arch: x64 + - os: macos-latest + version: '1' + arch: x64 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 # https://github.com/julia-actions/cache + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + file: lcov.info \ No newline at end of file diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 7a1d108..380e641 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -4,7 +4,6 @@ on: push: branches: - 'main' - - 'release-' tags: '*' pull_request: release: diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml index a30cb5b..49d158a 100644 --- a/.github/workflows/FormatCheck.yml +++ b/.github/workflows/FormatCheck.yml @@ -4,8 +4,6 @@ on: push: branches: - 'main' - - 'master' - - /^release-.*$/ tags: '*' pull_request: jobs: diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..623860f --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index f1e1595..9b973eb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,36 +2,40 @@ using Aqua using GoogleGenAI using Test -const secret_key = ENV["GOOGLE_API_KEY"] - -@testset "GoogleGenAI.jl" begin - # Generate text from text - response = generate_content(secret_key, "gemini-pro", "Hello"; max_output_tokens=50) - - # Generate text from text+image - response = generate_content( - secret_key, - "gemini-pro-vision", - "What is this picture?", - "example.jpg"; - max_output_tokens=50, - ) - - # Multi-turn conversation - conversation = [Dict(:role => "user", :parts => [Dict(:text => "Hello")])] - response = generate_content( - secret_key, "gemini-pro", conversation; max_output_tokens=50 - ) - - n_tokens = count_tokens(secret_key, "gemini-pro", "Hello") - @test n_tokens == 1 - - embeddings = embed_content(secret_key, "embedding-001", "Hello") - @test size(embeddings.values) == (768,) - - models = list_models(secret_key) - @test length(models) > 0 - @test haskey(models[1], :name) +if haskey(ENV, "GOOGLE_API_KEY") + const secret_key = ENV["GOOGLE_API_KEY"] + + @testset "GoogleGenAI.jl" begin + # Generate text from text + response = generate_content(secret_key, "gemini-pro", "Hello"; max_output_tokens=50) + + # Generate text from text+image + response = generate_content( + secret_key, + "gemini-pro-vision", + "What is this picture?", + "example.jpg"; + max_output_tokens=50, + ) + + # Multi-turn conversation + conversation = [Dict(:role => "user", :parts => [Dict(:text => "Hello")])] + response = generate_content( + secret_key, "gemini-pro", conversation; max_output_tokens=50 + ) + + n_tokens = count_tokens(secret_key, "gemini-pro", "Hello") + @test n_tokens == 1 + + embeddings = embed_content(secret_key, "embedding-001", "Hello") + @test size(embeddings.values) == (768,) + + models = list_models(secret_key) + @test length(models) > 0 + @test haskey(models[1], :name) + end +else + @info "Skipping GoogleGenAI.jl tests because GOOGLE_API_KEY is not set" end Aqua.test_all(GoogleGenAI) From 050bc0403cd05175ea054c93ebd06bffea3f61d5 Mon Sep 17 00:00:00 2001 From: Tyler Thomas Date: Thu, 21 Mar 2024 22:13:47 -0700 Subject: [PATCH 3/6] throw an error if the api key is empty --- src/GoogleGenAI.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/GoogleGenAI.jl b/src/GoogleGenAI.jl index 5ba7d62..a519d6f 100644 --- a/src/GoogleGenAI.jl +++ b/src/GoogleGenAI.jl @@ -31,12 +31,15 @@ struct BlockedPromptException <: Exception end function status_error(resp, log=nothing) logs = !isnothing(log) ? ": $log" : "" - return error("Request failed with status $(resp.status) $(resp.message)$logs") + return error("Request failed with status $(resp.status) $(resp.message) $logs") end function _request( provider::AbstractGoogleProvider, endpoint::String, method::Symbol, body::Dict ) + if isempty(provider.api_key) + throw(ArgumentError("api cannot be empty")) + end url = "$(provider.base_url)/$(provider.api_version)/$endpoint?key=$(provider.api_key)" headers = Dict("Content-Type" => "application/json") serialized_body = isempty(body) ? UInt8[] : JSON3.write(body) From bc6a2b3cfef77184e614497a844bdb287a63a79d Mon Sep 17 00:00:00 2001 From: Tyler Thomas Date: Thu, 21 Mar 2024 22:25:57 -0700 Subject: [PATCH 4/6] Add batch embeddings --- README.md | 16 +++++++++++++++- src/GoogleGenAI.jl | 22 ++++++++++++++++++++++ test/runtests.jl | 4 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d69f51..281adea 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ outputs ```julia using GoogleGenAI -embeddings = create_embeddings(ENV["GOOGLE_API_KEY"], "gemini-pro", "Hello") +embeddings = embed_content(ENV["GOOGLE_API_KEY"], "embedding-001", "Hello") println(size(embeddings.values)) ``` outputs @@ -116,6 +116,20 @@ outputs (768,) ``` +```julia +using GoogleGenAI +embeddings = embed_content(ENV["GOOGLE_API_KEY"], "embedding-001", ["Hello", "world"]) +println(embeddings.response_status) +println(size(embeddings.values[1])) +println(size(embeddings.values[2])) +``` +outputs +```julia +200 +(768,) +(768,) +``` + ### List Models ```julia diff --git a/src/GoogleGenAI.jl b/src/GoogleGenAI.jl index a519d6f..11c3e12 100644 --- a/src/GoogleGenAI.jl +++ b/src/GoogleGenAI.jl @@ -241,6 +241,8 @@ end """ embed_content(provider::AbstractGoogleProvider, model_name::String, prompt::String) -> NamedTuple embed_content(api_key::String, model_name::String, prompt::String) -> NamedTuple + embed_content(provider::AbstractGoogleProvider, model_name::String, prompts::Vector{String}) -> Vector{NamedTuple} + embed_content(api_key::String, model_name::String, prompts::Vector{String}) -> Vector{NamedTuple} Generate an embedding for the given prompt text using the specified model. @@ -271,6 +273,26 @@ function embed_content(api_key::String, model_name::String, prompt::String) return embed_content(GoogleProvider(; api_key), model_name, prompt) end +function embed_content(provider::AbstractGoogleProvider, model_name::String, prompts::Vector{String}) + endpoint = "models/$model_name:batchEmbedContents" + body = Dict( + "requests" => [ + Dict( + "model" => "models/$model_name", + "content" => Dict("parts" => [Dict("text" => prompt)]) + ) for prompt in prompts + ] + ) + response = _request(provider, endpoint, :POST, body) + embedding_values = [ + get(embedding, "values", Vector{Float64}()) for embedding in JSON3.read(response.body)["embeddings"] + ] + return (values=embedding_values, response_status=response.status) +end +function embed_content(api_key::String, model_name::String, prompts::Vector{String}) + return embed_content(GoogleProvider(; api_key), model_name, prompts) +end + """ list_models(provider::AbstractGoogleProvider) -> Vector{Dict} list_models(api_key::String) -> Vector{Dict} diff --git a/test/runtests.jl b/test/runtests.jl index 9b973eb..a7ff0ec 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,6 +30,10 @@ if haskey(ENV, "GOOGLE_API_KEY") embeddings = embed_content(secret_key, "embedding-001", "Hello") @test size(embeddings.values) == (768,) + embeddings = embed_content(secret_key, "embedding-001", ["Hello", "world"]) + @test size(embeddings.values[1]) == (768,) + @test size(embeddings.values[2]) == (768,) + models = list_models(secret_key) @test length(models) > 0 @test haskey(models[1], :name) From 33c9308cad1ac0c632e3d15ce9f78ec3554e4710 Mon Sep 17 00:00:00 2001 From: Tyler Thomas Date: Thu, 21 Mar 2024 22:26:49 -0700 Subject: [PATCH 5/6] add README.md documentation to the docs --- docs/src/index.md | 128 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 13b4859..423e40f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -20,6 +20,9 @@ Pkg> add https://github.com/tylerjthomas9/GoogleGenAI.jl/ Create a [secret API key in Google AI Studio](https://makersuite.google.com/) + +### Generate Content + ```julia using GoogleGenAI @@ -29,7 +32,130 @@ prompt = "Hello" response = generate_content(secret_key, model, prompt) println(response.text) ``` -returns +outputs ```julia "Hello there! How may I assist you today? Feel free to ask me any questions you may have or give me a command. I'm here to help! 😊" ``` + +```julia +response = generate_content(secret_key, model, prompt; max_output_tokens=10) +println(response.text) +``` +outputs +```julia +"Hello! How can I assist you today?" +``` + +```julia +using GoogleGenAI + +secret_key = ENV["GOOGLE_API_KEY"] +model = "gemini-pro-vision" +prompt = "What is this image?" +image_path = "test/example.jpg" +response = generate_content(secret_key, model, prompt, image_path) +println(response.text) +``` +outputs +```julia +"The logo for the Julia programming language." +``` + +### Multi-turn conversations + +```julia +# Define the provider with your API key (placeholder here) +provider = GoogleProvider(api_key=ENV["GOOGLE_API_KEY"]) +model_name = "gemini-pro" +conversation = [ + Dict(:role => "user", :parts => [Dict(:text => "When was Julia 1.0 released?")]) +] + +response = generate_content(provider, model_name, conversation) +push!(conversation, Dict(:role => "model", :parts => [Dict(:text => response.text)])) +println("Model: ", response.text) + +push!(conversation, Dict(:role => "user", :parts => [Dict(:text => "Who created the language?")])) +response = generate_content(provider, model_name, conversation, max_output_tokens=100) +println("Model: ", response.text) +``` +outputs +```julia +"Model: August 8, 2018" + +"Model: Jeff Bezanson, Alan Edelman, Viral B. Shah, Stefan Karpinski, and Keno Fischer + +Julia Computing, Inc. is the company that provides commercial support for Julia." +``` + +### Count Tokens +```julia +using GoogleGenAI +n_tokens = count_tokens(ENV["GOOGLE_API_KEY"], "gemini-pro", "Hello") +println(n_tokens) +``` +outputs +```julia +1 +``` + +### Create Embeddings + +```julia +using GoogleGenAI +embeddings = embed_content(ENV["GOOGLE_API_KEY"], "embedding-001", "Hello") +println(size(embeddings.values)) +``` +outputs +```julia +(768,) +``` + +```julia +using GoogleGenAI +embeddings = embed_content(ENV["GOOGLE_API_KEY"], "embedding-001", ["Hello", "world"]) +println(embeddings.response_status) +println(size(embeddings.values[1])) +println(size(embeddings.values[2])) +``` +outputs +```julia +200 +(768,) +(768,) +``` + +### List Models + +```julia +using GoogleGenAI +models = list_models(ENV["GOOGLE_API_KEY"]) +for m in models + if "generateContent" in m[:supported_generation_methods] + println(m[:name]) + end +end +``` +outputs +```julia +gemini-pro +gemini-pro-vision +``` + +### Safety Settings + +More information about the safety settings can be found [here](https://ai.google.dev/docs/safety_setting_gemini). + +```julia +using GoogleGenAI +secret_key = ENV["GOOGLE_API_KEY"] +safety_settings = [ + Dict("category" => "HARM_CATEGORY_HATE_SPEECH", "threshold" => "HARM_BLOCK_THRESHOLD_UNSPECIFIED"), + Dict("category" => "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold" => "BLOCK_ONLY_HIGH"), + Dict("category" => "HARM_CATEGORY_HARASSMENT", "threshold" => "BLOCK_MEDIUM_AND_ABOVE"), + Dict("category" => "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold" => "BLOCK_LOW_AND_ABOVE") +] +model = "gemini-pro" +prompt = "Hello" +response = generate_content(secret_key, model, prompt; safety_settings=safety_settings) +``` From d0890e242dd2b19c69f0435741b96613bc0017bd Mon Sep 17 00:00:00 2001 From: Tyler Thomas Date: Thu, 21 Mar 2024 22:45:52 -0700 Subject: [PATCH 6/6] reformat --- README.md | 1 + src/GoogleGenAI.jl | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 281adea..a025a39 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) + [![CI](https://github.com/tylerjthomas9/GoogleGenAI.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/tylerjthomas9/GoogleGenAI.jl/actions/workflows/CI.yml) [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle) [![Docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://tylerjthomas9.github.io/GoogleGenAI.jl) diff --git a/src/GoogleGenAI.jl b/src/GoogleGenAI.jl index 11c3e12..40c95f7 100644 --- a/src/GoogleGenAI.jl +++ b/src/GoogleGenAI.jl @@ -273,19 +273,22 @@ function embed_content(api_key::String, model_name::String, prompt::String) return embed_content(GoogleProvider(; api_key), model_name, prompt) end -function embed_content(provider::AbstractGoogleProvider, model_name::String, prompts::Vector{String}) +function embed_content( + provider::AbstractGoogleProvider, model_name::String, prompts::Vector{String} +) endpoint = "models/$model_name:batchEmbedContents" body = Dict( "requests" => [ Dict( "model" => "models/$model_name", - "content" => Dict("parts" => [Dict("text" => prompt)]) + "content" => Dict("parts" => [Dict("text" => prompt)]), ) for prompt in prompts - ] + ], ) response = _request(provider, endpoint, :POST, body) embedding_values = [ - get(embedding, "values", Vector{Float64}()) for embedding in JSON3.read(response.body)["embeddings"] + get(embedding, "values", Vector{Float64}()) for + embedding in JSON3.read(response.body)["embeddings"] ] return (values=embedding_values, response_status=response.status) end