-
Notifications
You must be signed in to change notification settings - Fork 244
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #474 from TEN-framework/feat/gemini-v2v
feat: support gemini v2v
- Loading branch information
Showing
16 changed files
with
1,030 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# | ||
# | ||
# Agora Real Time Engagement | ||
# Created by Wei Hu in 2022-11. | ||
# Copyright (c) 2024 Agora IO. All rights reserved. | ||
# | ||
# | ||
import("//build/feature/ten_package.gni") | ||
|
||
ten_package("gemini_v2v_python") { | ||
package_kind = "extension" | ||
|
||
resources = [ | ||
"__init__.py", | ||
"addon.py", | ||
"extension.py", | ||
"log.py", | ||
"manifest.json", | ||
"property.json", | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# gemini_v2v_python | ||
|
||
An extension for integrating Gemini's Next Generation of **Multimodal** AI into your application, providing configurable AI-driven features such as conversational agents, task automation, and tool integration. | ||
|
||
## Features | ||
|
||
- Gemini **Multimodal** Integration: Leverage Gemini **Multimodal** models for voice-to-voice as well as text processing. | ||
- Configurable: Easily customize API keys, model settings, prompts, temperature, etc. | ||
- Async Queue Processing: Supports real-time message processing with task cancellation and prioritization. | ||
|
||
## API | ||
|
||
Refer to the `api` definition in [manifest.json] and default values in [property.json](property.json). | ||
|
||
| **Property** | **Type** | **Description** | | ||
|----------------------------|------------|-------------------------------------------| | ||
| `api_key` | `string` | API key for authenticating with Gemini | | ||
| `temperature` | `float32` | Sampling temperature, higher values mean more randomness | | ||
| `model` | `string` | Model identifier (e.g., GPT-4, Gemini-1) | | ||
| `max_tokens` | `int32` | Maximum number of tokens to generate | | ||
| `system_message` | `string` | Default system message to send to the model | | ||
| `voice` | `string` | Voice that Gemini model uses, such as `alloy`, `echo`, `shimmer`, etc. | | ||
| `server_vad` | `bool` | Flag to enable or disable server VAD for Gemini | | ||
| `language` | `string` | Language that Gemini model responds in, such as `en-US`, `zh-CN`, etc. | | ||
| `dump` | `bool` | Flag to enable or disable audio dump for debugging purposes | | ||
| `base_uri` | `string` | Base URI for connecting to the Gemini service | | ||
| `audio_out` | `bool` | Flag to enable or disable audio output | | ||
| `input_transcript` | `bool` | Flag to enable input transcript processing | | ||
| `sample_rate` | `int32` | Sample rate for audio processing | | ||
| `stream_id` | `int32` | Stream ID for identifying audio streams | | ||
| `greeting` | `string` | Greeting message for initial interaction | | ||
|
||
### Data Out | ||
|
||
| **Name** | **Property** | **Type** | **Description** | | ||
|----------------|--------------|------------|-------------------------------| | ||
| `text_data` | `text` | `string` | Outgoing text data | | ||
| `append` | `text` | `string` | Additional text appended to the output | | ||
|
||
### Command Out | ||
|
||
| **Name** | **Description** | | ||
|----------------|---------------------------------------------| | ||
| `flush` | Response after flushing the current state | | ||
| `tool_call` | Invokes a tool with specific arguments | | ||
|
||
### Audio Frame In | ||
|
||
| **Name** | **Description** | | ||
|------------------|-------------------------------------------| | ||
| `pcm_frame` | Audio frame input for voice processing | | ||
|
||
### Video Frame In | ||
|
||
| **Name** | **Description** | | ||
|------------------|-------------------------------------------| | ||
| `video_frame` | Video frame input for processing | | ||
|
||
### Audio Frame Out | ||
|
||
| **Name** | **Description** | | ||
|------------------|-------------------------------------------| | ||
| `pcm_frame` | Audio frame output after voice processing | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# | ||
# | ||
# Agora Real Time Engagement | ||
# Created by Wei Hu in 2024-08. | ||
# Copyright (c) 2024 Agora IO. All rights reserved. | ||
# | ||
# | ||
from . import addon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# | ||
# | ||
# Agora Real Time Engagement | ||
# Created by Wei Hu in 2024-08. | ||
# Copyright (c) 2024 Agora IO. All rights reserved. | ||
# | ||
# | ||
from ten import ( | ||
Addon, | ||
register_addon_as_extension, | ||
TenEnv, | ||
) | ||
|
||
|
||
@register_addon_as_extension("gemini_v2v_python") | ||
class GeminiRealtimeExtensionAddon(Addon): | ||
|
||
def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: | ||
from .extension import GeminiRealtimeExtension | ||
ten_env.log_info("GeminiRealtimeExtensionAddon on_create_instance") | ||
ten_env.on_create_instance_done(GeminiRealtimeExtension(name), context) |
Oops, something went wrong.