Skip to content

Commit

Permalink
feat: support gemini for demo
Browse files Browse the repository at this point in the history
  • Loading branch information
plutoless committed Dec 13, 2024
1 parent f55cc7e commit 1a7d2aa
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 11 deletions.
220 changes: 220 additions & 0 deletions agents/examples/demo/property.json
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,226 @@
]
}
]
},
{
"name": "va_gemini_v2v",
"auto_start": true,
"nodes": [
{
"type": "extension",
"name": "agora_rtc",
"addon": "agora_rtc",
"extension_group": "rtc",
"property": {
"app_id": "${env:AGORA_APP_ID}",
"token": "",
"channel": "ten_agent_test",
"stream_id": 1234,
"remote_stream_id": 123,
"subscribe_audio": true,
"publish_audio": true,
"publish_data": true,
"subscribe_audio_sample_rate": 24000,
"subscribe_video_pix_fmt": 4,
"subscribe_video": true
}
},
{
"type": "extension",
"name": "v2v",
"addon": "gemini_v2v_python",
"extension_group": "llm",
"property": {
"api_key": "${env:GEMINI_API_KEY}",
"api_version": "v1alpha",
"base_uri": "generativelanguage.googleapis.com",
"dump": true,
"language": "en-US",
"max_tokens": 2048,
"model": "gemini-2.0-flash-exp",
"server_vad": true,
"temperature": 0.9,
"voice": "Puck"
}
},
{
"type": "extension",
"name": "message_collector",
"addon": "message_collector",
"extension_group": "transcriber",
"property": {}
},
{
"type": "extension",
"name": "bingsearch_tool_python",
"addon": "bingsearch_tool_python",
"extension_group": "default",
"property": {
"api_key": "${env:BING_API_KEY|}"
}
},
{
"type": "extension",
"name": "weatherapi_tool_python",
"addon": "weatherapi_tool_python",
"extension_group": "default",
"property": {
"api_key": "${env:WEATHERAPI_API_KEY|}"
}
}
],
"connections": [
{
"extension_group": "rtc",
"extension": "agora_rtc",
"cmd": [
{
"name": "on_user_joined",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
},
{
"name": "on_user_left",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
},
{
"name": "on_connection_failure",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
}
],
"audio_frame": [
{
"name": "pcm_frame",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
}
],
"video_frame": [
{
"name": "video_frame",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
}
]
},
{
"extension_group": "llm",
"extension": "v2v",
"cmd": [
{
"name": "flush",
"dest": [
{
"extension_group": "rtc",
"extension": "agora_rtc"
}
]
},
{
"name": "tool_call",
"dest": [
{
"extension_group": "default",
"extension": "bingsearch_tool_python"
},
{
"extension_group": "default",
"extension": "weatherapi_tool_python"
}
]
}
],
"data": [
{
"name": "text_data",
"dest": [
{
"extension_group": "transcriber",
"extension": "message_collector"
}
]
}
],
"audio_frame": [
{
"name": "pcm_frame",
"dest": [
{
"extension_group": "rtc",
"extension": "agora_rtc"
}
]
}
]
},
{
"extension_group": "transcriber",
"extension": "message_collector",
"data": [
{
"name": "data",
"dest": [
{
"extension_group": "rtc",
"extension": "agora_rtc"
}
]
}
]
},
{
"extension_group": "default",
"extension": "bingsearch_tool_python",
"cmd": [
{
"name": "tool_register",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
}
]
},
{
"extension_group": "default",
"extension": "weatherapi_tool_python",
"cmd": [
{
"name": "tool_register",
"dest": [
{
"extension_group": "llm",
"extension": "v2v"
}
]
}
]
}
]
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ async def _on_video(self, ten_env:AsyncTenEnv):
}]
try:
if self.connected:
# ten_env.log_info(f"send image")
await self.session.send(media_chunks)
except Exception as e:
self.ten_env.log_error(f"Failed to send image {e}")
Expand Down
13 changes: 7 additions & 6 deletions demo/src/app/api/agents/start/graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ export const getGraphProperties = (
"agora_asr_language": language,
},
"llm": {
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
},
Expand All @@ -117,7 +116,6 @@ export const getGraphProperties = (
"agora_asr_language": language,
},
"coze_python_async": {
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
},
Expand All @@ -132,7 +130,6 @@ export const getGraphProperties = (
},
"llm": {
"model": "gpt-4o",
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
},
Expand All @@ -146,7 +143,6 @@ export const getGraphProperties = (
"model": "gpt-4o-realtime-preview",
"voice": voiceNameMap[language]["openai"][voiceType],
"language": language,
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
}
Expand All @@ -157,7 +153,6 @@ export const getGraphProperties = (
"model": "gpt-4o-realtime-preview",
"voice": voiceNameMap[language]["openai"][voiceType],
"language": language,
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
},
Expand All @@ -172,7 +167,6 @@ export const getGraphProperties = (
},
"llm": {
"model": "gpt-4o",
...localizationOptions,
"prompt": prompt,
"greeting": combined_greeting,
},
Expand All @@ -189,6 +183,13 @@ export const getGraphProperties = (
"azure_synthesis_voice_name": voiceNameMap[language]["azure"][voiceType]
}
}
} else if (graphName == "va_gemini_v2v") {
return {
"v2v": {
"prompt": prompt,
"greeting": combined_greeting,
}
}
}
return {}
}
12 changes: 12 additions & 0 deletions demo/src/common/constant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,24 @@ export const GRAPH_OPTIONS: GraphOptionItem[] = [
label: "Voice Agent Coze Bot + Azure TTS",
value: "va_coze_azure",
},
{
label: "Voice Agent Gemini 2.0 Realtime",
value: "va_gemini_v2v",
},
]

export const isRagGraph = (graphName: string) => {
return graphName === "va_qwen_rag"
}

export const isLanguageSupported = (graphName: string) => {
return !["va_gemini_v2v"].includes(graphName)
}

export const isVoiceGenderSupported = (graphName: string) => {
return !["va_gemini_v2v"].includes(graphName)
}

export const VOICE_OPTIONS: VoiceOptionItem[] = [
{
label: "Male",
Expand Down
8 changes: 6 additions & 2 deletions demo/src/components/Chat/ChatCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as React from "react"
import { cn } from "@/lib/utils"
import { LanguageSelect, GraphSelect } from "@/components/Chat/ChatCfgSelect"
import PdfSelect from "@/components/Chat/PdfSelect"
import { useAppDispatch, useAppSelector, isRagGraph } from "@/common"
import { useAppDispatch, useAppSelector, isRagGraph, isLanguageSupported } from "@/common"
import { setRtmConnected, addChatItem } from "@/store/reducers/global"
import MessageList from "@/components/Chat/MessageList"
import { Button } from "@/components/ui/button"
Expand Down Expand Up @@ -132,7 +132,11 @@ export default function ChatCard(props: { className?: string }) {
{/* Action Bar */}
<div className="flex w-full flex-wrap items-center justify-end gap-x-4 gap-y-2">
<GraphSelect />
<LanguageSelect />
{
isLanguageSupported(graphName) ?
<LanguageSelect /> :
null
}
{isRagGraph(graphName) && <PdfSelect />}
</div>
{/* Chat messages would go here */}
Expand Down
10 changes: 7 additions & 3 deletions demo/src/components/Dynamic/RTCCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import * as React from "react"
import { cn } from "@/lib/utils"
import { ICameraVideoTrack, IMicrophoneAudioTrack } from "agora-rtc-sdk-ng"
import { useAppSelector, useAppDispatch, VOICE_OPTIONS } from "@/common"
import { useAppSelector, useAppDispatch } from "@/common/hooks"
import { isVoiceGenderSupported } from "@/common/constant"
import { ITextItem, EMessageType } from "@/types"
import { rtcManager, IUserTracks, IRtcUser } from "@/manager"
import {
Expand All @@ -25,7 +26,7 @@ export default function RTCCard(props: { className?: string }) {
const dispatch = useAppDispatch()
const options = useAppSelector((state) => state.global.options)
const voiceType = useAppSelector((state) => state.global.voiceType)
const agentConnected = useAppSelector((state) => state.global.agentConnected)
const selectedGraphId = useAppSelector((state) => state.global.graphName)
const { userId, channel } = options
const [videoTrack, setVideoTrack] = React.useState<ICameraVideoTrack>()
const [audioTrack, setAudioTrack] = React.useState<IMicrophoneAudioTrack>()
Expand Down Expand Up @@ -124,7 +125,10 @@ export default function RTCCard(props: { className?: string }) {
<div className="w-full">
<div className="flex w-full items-center justify-between p-2">
<h2 className="mb-2 text-xl font-semibold">Audio & Video</h2>
<AgentVoicePresetSelect />
{
isVoiceGenderSupported(selectedGraphId) ?
<AgentVoicePresetSelect /> :
null}
</div>
<AgentView audioTrack={remoteuser?.audioTrack} />
</div>
Expand Down

0 comments on commit 1a7d2aa

Please sign in to comment.