From 95c771e330023ea72f3c8e25345e263e997c4665 Mon Sep 17 00:00:00 2001
From: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
Date: Mon, 27 Nov 2023 21:00:39 +1300
Subject: [PATCH] Send esphome tts_stream event after audio bytes are actually
 loaded into memory (#104448)

Send tts_stream event after audio bytes are actually loaded into memory
---
 .../components/esphome/voice_assistant.py          | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/homeassistant/components/esphome/voice_assistant.py b/homeassistant/components/esphome/voice_assistant.py
index bb62d495076fe4..68ed98aa789376 100644
--- a/homeassistant/components/esphome/voice_assistant.py
+++ b/homeassistant/components/esphome/voice_assistant.py
@@ -301,10 +301,6 @@ async def _send_tts(self, media_id: str) -> None:
             if self.transport is None:
                 return
 
-            self.handle_event(
-                VoiceAssistantEventType.VOICE_ASSISTANT_TTS_STREAM_START, {}
-            )
-
             extension, data = await tts.async_get_media_source_audio(
                 self.hass,
                 media_id,
@@ -331,11 +327,17 @@ async def _send_tts(self, media_id: str) -> None:
 
                 audio_bytes = wav_file.readframes(wav_file.getnframes())
 
-            _LOGGER.debug("Sending %d bytes of audio", len(audio_bytes))
+            audio_bytes_size = len(audio_bytes)
+
+            _LOGGER.debug("Sending %d bytes of audio", audio_bytes_size)
+
+            self.handle_event(
+                VoiceAssistantEventType.VOICE_ASSISTANT_TTS_STREAM_START, {}
+            )
 
             bytes_per_sample = stt.AudioBitRates.BITRATE_16 // 8
             sample_offset = 0
-            samples_left = len(audio_bytes) // bytes_per_sample
+            samples_left = audio_bytes_size // bytes_per_sample
 
             while samples_left > 0:
                 bytes_offset = sample_offset * bytes_per_sample