From 5d99b08dc9958af7f624207d68b2abfeb4d5c470 Mon Sep 17 00:00:00 2001 From: nulyang Date: Mon, 21 Oct 2024 15:52:20 +0800 Subject: [PATCH 1/3] service(openai_realtime): add text delta handle --- src/pipecat/services/openai_realtime_beta/openai.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 0c57ed0bb..a11ddb5a1 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -308,6 +308,8 @@ async def _receive_task_handler(self): await self._handle_evt_audio_delta(evt) elif evt.type == "response.audio.done": await self._handle_evt_audio_done(evt) + elif evt.type == "response.text.delta": + await self._handle_evt_text_delta(evt) elif evt.type == "conversation.item.created": await self._handle_evt_conversation_item_created(evt) elif evt.type == "conversation.item.input_audio_transcription.completed": @@ -371,6 +373,10 @@ async def _handle_evt_audio_done(self, evt): # Don't clear the self._current_audio_response here. We need to wait until we # receive a BotStoppedSpeakingFrame from the output transport. + async def _handle_evt_text_delta(self, evt): + await self.stop_ttfb_metrics() + await self.push_frame(TextFrame(evt.delta)) + async def _handle_evt_conversation_item_created(self, evt): # This will get sent from the server every time a new "message" is added # to the server's conversation state, whether we create it via the API From cb72a615f683b20ef97f86afa5d958d017084245 Mon Sep 17 00:00:00 2001 From: nulyang Date: Mon, 21 Oct 2024 15:52:38 +0800 Subject: [PATCH 2/3] service(openai_realtime): add text content append --- src/pipecat/services/openai_realtime_beta/context.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pipecat/services/openai_realtime_beta/context.py b/src/pipecat/services/openai_realtime_beta/context.py index 2b6ff968f..758076407 100644 --- a/src/pipecat/services/openai_realtime_beta/context.py +++ b/src/pipecat/services/openai_realtime_beta/context.py @@ -139,6 +139,8 @@ def add_assistant_content_item_as_message(self, item): for content in item.content: if content.type == "audio": message["content"].append({"type": "text", "text": content.transcript}) + if content.type == "text": + message["content"].append({"type": "text", "text": content.text}) else: logger.error(f"Unhandled content type in assistant item: {content.type} - {item}") self.add_message(message) From 737bf3b06595d228b38d76cefc05f696dd3c5885 Mon Sep 17 00:00:00 2001 From: nulyang Date: Mon, 28 Oct 2024 19:39:14 +0800 Subject: [PATCH 3/3] service(openai_realtime): fix condition structure --- src/pipecat/services/openai_realtime_beta/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipecat/services/openai_realtime_beta/context.py b/src/pipecat/services/openai_realtime_beta/context.py index 758076407..205dfe1c8 100644 --- a/src/pipecat/services/openai_realtime_beta/context.py +++ b/src/pipecat/services/openai_realtime_beta/context.py @@ -139,7 +139,7 @@ def add_assistant_content_item_as_message(self, item): for content in item.content: if content.type == "audio": message["content"].append({"type": "text", "text": content.transcript}) - if content.type == "text": + elif content.type == "text": message["content"].append({"type": "text", "text": content.text}) else: logger.error(f"Unhandled content type in assistant item: {content.type} - {item}")