From fd6ddc32dbd9bce67c68e835709b34d69cd3cbfd Mon Sep 17 00:00:00 2001 From: oddlama Date: Wed, 19 Jun 2024 22:08:07 +0200 Subject: [PATCH 1/5] fix: make sure shutdown() wakes up the recording_thread Also ensure that the reader_process is not accessed if it hasn't been initialized due to use_microphone=False --- RealtimeSTT/audio_recorder.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py index 531d2c6..58e0cd2 100644 --- a/RealtimeSTT/audio_recorder.py +++ b/RealtimeSTT/audio_recorder.py @@ -1019,19 +1019,22 @@ def shutdown(self): logging.debug('Finishing recording thread') if self.recording_thread: + # Submit a single byte to the audio buffer to force the thread to wake up + # and notice the shutdown. + self.audio_queue.put(bytes(1)) self.recording_thread.join() logging.debug('Terminating reader process') # Give it some time to finish the loop and cleanup. - if self.use_microphone: + if self.use_microphone.value: self.reader_process.join(timeout=10) - if self.reader_process.is_alive(): - logging.warning("Reader process did not terminate " - "in time. Terminating forcefully." - ) - self.reader_process.terminate() + if self.reader_process.is_alive(): + logging.warning("Reader process did not terminate " + "in time. Terminating forcefully." + ) + self.reader_process.terminate() logging.debug('Terminating transcription process') self.transcript_process.join(timeout=10) From 2d76fd4d78474f753cef34568c832bce8e5ede42 Mon Sep 17 00:00:00 2001 From: oddlama Date: Thu, 20 Jun 2024 00:54:33 +0200 Subject: [PATCH 2/5] feat: add init_logging option to allow users to use their own logging framework --- README.md | 2 ++ RealtimeSTT/audio_recorder.py | 36 +++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index cf2349f..7658828 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,8 @@ When you initialize the `AudioToTextRecorder` class, you have various options to - **level** (int, default=logging.WARNING): Logging level. +- **init_logging** (bool, default=True): Whether to initialize the logging framework. Set to False to manage this yourself. + - **handle_buffer_overflow** (bool, default=True): If set, the system will log a warning when an input overflow occurs during recording and remove the data from the buffer. - **beam_size** (int, default=5): The beam size to use for beam search decoding. diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py index 531d2c6..863a689 100644 --- a/RealtimeSTT/audio_recorder.py +++ b/RealtimeSTT/audio_recorder.py @@ -100,6 +100,7 @@ def __init__(self, use_microphone=True, spinner=True, level=logging.WARNING, + init_logging=True, # Realtime transcription parameters enable_realtime_transcription=False, @@ -192,6 +193,8 @@ def __init__(self, - spinner (bool, default=True): Show spinner animation with current state. - level (int, default=logging.WARNING): Logging level. + - init_logging (bool, default=True): Whether to initialize + the logging framework. Set to False to manage this yourself. - enable_realtime_transcription (bool, default=False): Enables or disables real-time transcription of audio. When set to True, the audio will be transcribed continuously as it is being recorded. @@ -369,26 +372,27 @@ def __init__(self, self.initial_prompt = initial_prompt self.suppress_tokens = suppress_tokens - # Initialize the logging configuration with the specified level - log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s' + if init_logging: + # Initialize the logging configuration with the specified level + log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s' - # Create a logger - logger = logging.getLogger() - logger.setLevel(level) # Set the root logger's level + # Create a logger + logger = logging.getLogger() + logger.setLevel(level) # Set the root logger's level - # Create a file handler and set its level - file_handler = logging.FileHandler('realtimesst.log') - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(logging.Formatter(log_format)) + # Create a file handler and set its level + file_handler = logging.FileHandler('realtimesst.log') + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(logging.Formatter(log_format)) - # Create a console handler and set its level - console_handler = logging.StreamHandler() - console_handler.setLevel(level) - console_handler.setFormatter(logging.Formatter(log_format)) + # Create a console handler and set its level + console_handler = logging.StreamHandler() + console_handler.setLevel(level) + console_handler.setFormatter(logging.Formatter(log_format)) - # Add the handlers to the logger - logger.addHandler(file_handler) - logger.addHandler(console_handler) + # Add the handlers to the logger + logger.addHandler(file_handler) + logger.addHandler(console_handler) self.is_shut_down = False self.shutdown_event = mp.Event() From e9f4fe641bf88cca7aa80a6c319970ca62240fdc Mon Sep 17 00:00:00 2001 From: oddlama Date: Thu, 20 Jun 2024 17:13:16 +0200 Subject: [PATCH 3/5] feat: reduce realtime transcription load by only transcribing if new frames arrive --- RealtimeSTT/audio_recorder.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py index 531d2c6..5c54f67 100644 --- a/RealtimeSTT/audio_recorder.py +++ b/RealtimeSTT/audio_recorder.py @@ -537,6 +537,8 @@ def __init__(self, self.pre_recording_buffer_duration) ) self.frames = [] + self.new_frames = mp.Event() + self.new_frames.set() # Recording control flags self.is_recording = False @@ -807,6 +809,7 @@ def wait_audio(self): audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16) self.audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE self.frames.clear() + self.new_frames.set() # Reset recording-related timestamps self.recording_stop_time = 0 @@ -915,6 +918,7 @@ def start(self): self.wakeword_detected = False self.wake_word_detect_time = 0 self.frames = [] + self.new_frames.set() self.is_recording = True self.recording_start_time = time.time() self.is_silero_speech_active = False @@ -1185,6 +1189,7 @@ def _recording_worker(self): # Add the buffered audio # to the recording frames self.frames.extend(list(self.audio_buffer)) + self.new_frames.set() self.audio_buffer.clear() self.silero_vad_model.reset_states() @@ -1238,6 +1243,7 @@ def _recording_worker(self): if self.is_recording: self.frames.append(data) + self.new_frames.set() if not self.is_recording or self.speech_end_silence_start: self.audio_buffer.append(data) @@ -1271,6 +1277,8 @@ def _realtime_worker(self): # Check if the recording is active if self.is_recording: + self.new_frames.wait() + self.new_frames.clear() # Sleep for the duration of the transcription resolution time.sleep(self.realtime_processing_pause) From 7cb10e1f167c2a001ac7b7db2cdacb82865cb6be Mon Sep 17 00:00:00 2001 From: dom3 <52584844+dom3@users.noreply.github.com> Date: Mon, 29 Jul 2024 15:32:36 -0700 Subject: [PATCH 4/5] Update audio_recorder.py --- RealtimeSTT/audio_recorder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py index 05a9f85..3959524 100644 --- a/RealtimeSTT/audio_recorder.py +++ b/RealtimeSTT/audio_recorder.py @@ -835,7 +835,7 @@ def _audio_data_worker(audio_queue, try: while not shutdown_event.is_set(): try: - data = stream.read(buffer_size) + data = stream.read(buffer_size, exception_on_overflow=False) except OSError as e: if e.errno == pyaudio.paInputOverflowed: From b52be1add05637e94e7dd9f0d1ac41c0073c5634 Mon Sep 17 00:00:00 2001 From: prateekvellala <136360150+prateekvellala@users.noreply.github.com> Date: Sat, 24 Aug 2024 18:36:13 +0530 Subject: [PATCH 5/5] removed unnecessary imports --- example_app/ui_openai_voice_interface.py | 6 +++--- example_webserver/client.py | 2 +- example_webserver/server.py | 2 +- tests/openwakeword_test.py | 1 - tests/realtimestt_chinese.py | 2 +- tests/realtimestt_test.py | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/example_app/ui_openai_voice_interface.py b/example_app/ui_openai_voice_interface.py index d256ed1..8fe49a6 100644 --- a/example_app/ui_openai_voice_interface.py +++ b/example_app/ui_openai_voice_interface.py @@ -3,9 +3,9 @@ from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, SystemEngine from RealtimeSTT import AudioToTextRecorder - from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation - from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent - from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction + from PyQt5.QtCore import Qt, QTimer, QEvent, pyqtSignal, QThread + from PyQt5.QtGui import QColor, QPainter, QFontMetrics, QFont, QMouseEvent + from PyQt5.QtWidgets import QApplication, QWidget, QDesktopWidget, QMenu, QAction import os import openai diff --git a/example_webserver/client.py b/example_webserver/client.py index 0371102..2a9f2cc 100644 --- a/example_webserver/client.py +++ b/example_webserver/client.py @@ -1,4 +1,4 @@ -from colorama import Fore, Back, Style +from colorama import Fore, Style import websockets import colorama import keyboard diff --git a/example_webserver/server.py b/example_webserver/server.py index 467b39f..7155e2c 100644 --- a/example_webserver/server.py +++ b/example_webserver/server.py @@ -11,7 +11,7 @@ print ("└─ ... ", end='', flush=True) from RealtimeSTT import AudioToTextRecorder - from colorama import Fore, Back, Style + from colorama import Fore, Style import websockets import threading import colorama diff --git a/tests/openwakeword_test.py b/tests/openwakeword_test.py index 71611c7..8ad083a 100644 --- a/tests/openwakeword_test.py +++ b/tests/openwakeword_test.py @@ -1,7 +1,6 @@ if __name__ == '__main__': print("Starting...") from RealtimeSTT import AudioToTextRecorder - import logging detected = False diff --git a/tests/realtimestt_chinese.py b/tests/realtimestt_chinese.py index 1fb6cfe..54b4b4d 100644 --- a/tests/realtimestt_chinese.py +++ b/tests/realtimestt_chinese.py @@ -1,5 +1,5 @@ from RealtimeSTT import AudioToTextRecorder -from colorama import Fore, Back, Style +from colorama import Fore, Style import colorama import os diff --git a/tests/realtimestt_test.py b/tests/realtimestt_test.py index 33c9a13..5219a56 100644 --- a/tests/realtimestt_test.py +++ b/tests/realtimestt_test.py @@ -1,5 +1,5 @@ from RealtimeSTT import AudioToTextRecorder -from colorama import Fore, Back, Style +from colorama import Fore, Style import colorama import os