From 95c771e330023ea72f3c8e25345e263e997c4665 Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:00:39 +1300 Subject: [PATCH] Send esphome tts_stream event after audio bytes are actually loaded into memory (#104448) Send tts_stream event after audio bytes are actually loaded into memory --- .../components/esphome/voice_assistant.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/homeassistant/components/esphome/voice_assistant.py b/homeassistant/components/esphome/voice_assistant.py index bb62d495076fe4..68ed98aa789376 100644 --- a/homeassistant/components/esphome/voice_assistant.py +++ b/homeassistant/components/esphome/voice_assistant.py @@ -301,10 +301,6 @@ async def _send_tts(self, media_id: str) -> None: if self.transport is None: return - self.handle_event( - VoiceAssistantEventType.VOICE_ASSISTANT_TTS_STREAM_START, {} - ) - extension, data = await tts.async_get_media_source_audio( self.hass, media_id, @@ -331,11 +327,17 @@ async def _send_tts(self, media_id: str) -> None: audio_bytes = wav_file.readframes(wav_file.getnframes()) - _LOGGER.debug("Sending %d bytes of audio", len(audio_bytes)) + audio_bytes_size = len(audio_bytes) + + _LOGGER.debug("Sending %d bytes of audio", audio_bytes_size) + + self.handle_event( + VoiceAssistantEventType.VOICE_ASSISTANT_TTS_STREAM_START, {} + ) bytes_per_sample = stt.AudioBitRates.BITRATE_16 // 8 sample_offset = 0 - samples_left = len(audio_bytes) // bytes_per_sample + samples_left = audio_bytes_size // bytes_per_sample while samples_left > 0: bytes_offset = sample_offset * bytes_per_sample