Merge branch 'master' of https://github.com/KoljaB/RealtimeSTT

KoljaB · Nov 15, 2024 · c47c2e4 · c47c2e4
2 parents 39684ac + 0c8db45
commit c47c2e4
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -435,6 +435,8 @@ When you initialize the `AudioToTextRecorder` class, you have various options to
 
 - **level** (int, default=logging.WARNING): Logging level.
 
+- **init_logging** (bool, default=True): Whether to initialize the logging framework. Set to False to manage this yourself.
+
 - **handle_buffer_overflow** (bool, default=True): If set, the system will log a warning when an input overflow occurs during recording and remove the data from the buffer.
 
 - **beam_size** (int, default=5): The beam size to use for beam search decoding.

diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
@@ -208,6 +208,7 @@ def __init__(self,
                  use_microphone=True,
                  spinner=True,
                  level=logging.WARNING,
+                 init_logging=True,
 
                  # Realtime transcription parameters
                  enable_realtime_transcription=False,
@@ -314,6 +315,8 @@ def __init__(self,
         - spinner (bool, default=True): Show spinner animation with current
             state.
         - level (int, default=logging.WARNING): Logging level.
+        - init_logging (bool, default=True): Whether to initialize
+            the logging framework. Set to False to manage this yourself.
         - enable_realtime_transcription (bool, default=False): Enables or
             disables real-time transcription of audio. When set to True, the
             audio will be transcribed continuously as it is being recorded.
@@ -568,36 +571,37 @@ def __init__(self,
         self.early_transcription_on_silence = early_transcription_on_silence
         self.use_extended_logging = use_extended_logging
 
-        # Initialize the logging configuration with the specified level
-        log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
+        if init_logging:
+            # Initialize the logging configuration with the specified level
+            log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
 
-        # Adjust file_log_format to include milliseconds
-        file_log_format = '%(asctime)s.%(msecs)03d - ' + log_format
+            # Adjust file_log_format to include milliseconds
+            file_log_format = '%(asctime)s.%(msecs)03d - ' + log_format
 
-        # Get the root logger
-        logger = logging.getLogger()
-        logger.setLevel(logging.DEBUG)  # Set the root logger's level to DEBUG
+            # Get the root logger
+            logger = logging.getLogger()
+            logger.setLevel(logging.DEBUG)  # Set the root logger's level to DEBUG
 
-        # Remove any existing handlers
-        logger.handlers = []
+            # Remove any existing handlers
+            logger.handlers = []
 
-        # Create a console handler and set its level
-        console_handler = logging.StreamHandler()
-        console_handler.setLevel(level) 
-        console_handler.setFormatter(logging.Formatter(log_format))
+            # Create a console handler and set its level
+            console_handler = logging.StreamHandler()
+            console_handler.setLevel(level) 
+            console_handler.setFormatter(logging.Formatter(log_format))
 
-        # Add the handlers to the logger
-        if not no_log_file:
-            # Create a file handler and set its level
-            file_handler = logging.FileHandler('realtimesst.log')
-            file_handler.setLevel(logging.DEBUG)
-            file_handler.setFormatter(logging.Formatter(
-                file_log_format,
-                datefmt='%Y-%m-%d %H:%M:%S'
-            ))
+            # Add the handlers to the logger
+            if not no_log_file:
+                # Create a file handler and set its level
+                file_handler = logging.FileHandler('realtimesst.log')
+                file_handler.setLevel(logging.DEBUG)
+                file_handler.setFormatter(logging.Formatter(
+                    file_log_format,
+                    datefmt='%Y-%m-%d %H:%M:%S'
+                ))
 
-            logger.addHandler(file_handler)
-        logger.addHandler(console_handler)
+                logger.addHandler(file_handler)
+            logger.addHandler(console_handler)            
 
         self.is_shut_down = False
         self.shutdown_event = mp.Event()
@@ -811,6 +815,8 @@ def __init__(self,
                        0.3)
         )
         self.frames = []
+        self.new_frames = mp.Event()
+        self.new_frames.set()
 
         # Recording control flags
         self.is_recording = False
@@ -1135,7 +1141,6 @@ def setup_audio():
                                 time_since_last_buffer_message = time.time()
 
                             audio_queue.put(to_process)
-
 
                 except OSError as e:
                     if e.errno == pyaudio.paInputOverflowed:
@@ -1263,11 +1268,11 @@ def wait_audio(self):
             audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
             self.audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
             self.frames.clear()
+            self.new_frames.set()
 
             # Reset recording-related timestamps
             self.recording_stop_time = 0
             self.listen_start = 0
-
             self._set_state("inactive")
 
         except KeyboardInterrupt:
@@ -1449,6 +1454,7 @@ def start(self):
         self.wakeword_detected = False
         self.wake_word_detect_time = 0
         self.frames = []
+        self.new_frames.set()
         self.is_recording = True
         self.recording_start_time = time.time()
         self.is_silero_speech_active = False
@@ -1560,7 +1566,6 @@ def shutdown(self):
             print("\033[91mRealtimeSTT shutting down\033[0m")
             # logging.debug("RealtimeSTT shutting down")
 
-            # Force wait_audio() and text() to exit
             self.is_shut_down = True
             self.start_recording_event.set()
             self.stop_recording_event.set()
@@ -1571,10 +1576,10 @@ def shutdown(self):
 
             logging.debug('Finishing recording thread')
             if self.recording_thread:
+                self.audio_queue.put(bytes(1))
                 self.recording_thread.join()
 
             logging.debug('Terminating reader process')
-
             # Give it some time to finish the loop and cleanup.
             if self.use_microphone.value:
                 self.reader_process.join(timeout=10)
@@ -1813,7 +1818,8 @@ def _recording_worker(self):
                             # Add the buffered audio
                             # to the recording frames
                             self.frames.extend(list(self.audio_buffer))
-                            self.audio_buffer.clear()
+                            self.new_frames.set()
+                            self.audio_buffer.clear()                            
 
                             if self.use_extended_logging:
                                 logging.debug('Debug: Resetting Silero VAD model states')
@@ -1984,6 +1990,7 @@ def _recording_worker(self):
                     if self.use_extended_logging:
                         logging.debug('Debug: Appending data to frames')
                     self.frames.append(data)
+                    self.new_frames.set()
 
                 if self.use_extended_logging:
                     logging.debug('Debug: Checking if not recording or speech end silence start')
@@ -2026,6 +2033,8 @@ def _realtime_worker(self):
 
                 # Check if the recording is active
                 if self.is_recording:
+                    self.new_frames.wait()
+                    self.new_frames.clear()
 
                     # Sleep for the duration of the transcription resolution
                     time.sleep(self.realtime_processing_pause)

diff --git a/example_app/ui_openai_voice_interface.py b/example_app/ui_openai_voice_interface.py
@@ -3,9 +3,9 @@
     from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, SystemEngine
     from RealtimeSTT import AudioToTextRecorder
 
-    from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
-    from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
-    from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
+    from PyQt5.QtCore import Qt, QTimer, QEvent, pyqtSignal, QThread
+    from PyQt5.QtGui import QColor, QPainter, QFontMetrics, QFont, QMouseEvent
+    from PyQt5.QtWidgets import QApplication, QWidget, QDesktopWidget, QMenu, QAction
 
     import os
     import openai

diff --git a/example_webserver/client.py b/example_webserver/client.py
@@ -1,4 +1,4 @@
-from colorama import Fore, Back, Style
+from colorama import Fore, Style
 import websockets
 import colorama
 import keyboard

diff --git a/example_webserver/server.py b/example_webserver/server.py
@@ -11,7 +11,7 @@
     print ("└─ ... ", end='', flush=True)
 
     from RealtimeSTT import AudioToTextRecorder
-    from colorama import Fore, Back, Style
+    from colorama import Fore, Style
     import websockets
     import threading
     import colorama

diff --git a/tests/openwakeword_test.py b/tests/openwakeword_test.py
@@ -1,7 +1,6 @@
 if __name__ == '__main__':
     print("Starting...")
     from RealtimeSTT import AudioToTextRecorder
-    import logging
 
     detected = False
 

diff --git a/tests/realtimestt_chinese.py b/tests/realtimestt_chinese.py
@@ -1,5 +1,5 @@
 from RealtimeSTT import AudioToTextRecorder
-from colorama import Fore, Back, Style
+from colorama import Fore, Style
 import colorama
 import os