Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/KoljaB/RealtimeSTT
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Nov 15, 2024
2 parents 39684ac + 0c8db45 commit c47c2e4
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 36 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ When you initialize the `AudioToTextRecorder` class, you have various options to
- **level** (int, default=logging.WARNING): Logging level.
- **init_logging** (bool, default=True): Whether to initialize the logging framework. Set to False to manage this yourself.
- **handle_buffer_overflow** (bool, default=True): If set, the system will log a warning when an input overflow occurs during recording and remove the data from the buffer.
- **beam_size** (int, default=5): The beam size to use for beam search decoding.
Expand Down
67 changes: 38 additions & 29 deletions RealtimeSTT/audio_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def __init__(self,
use_microphone=True,
spinner=True,
level=logging.WARNING,
init_logging=True,

# Realtime transcription parameters
enable_realtime_transcription=False,
Expand Down Expand Up @@ -314,6 +315,8 @@ def __init__(self,
- spinner (bool, default=True): Show spinner animation with current
state.
- level (int, default=logging.WARNING): Logging level.
- init_logging (bool, default=True): Whether to initialize
the logging framework. Set to False to manage this yourself.
- enable_realtime_transcription (bool, default=False): Enables or
disables real-time transcription of audio. When set to True, the
audio will be transcribed continuously as it is being recorded.
Expand Down Expand Up @@ -568,36 +571,37 @@ def __init__(self,
self.early_transcription_on_silence = early_transcription_on_silence
self.use_extended_logging = use_extended_logging

# Initialize the logging configuration with the specified level
log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
if init_logging:
# Initialize the logging configuration with the specified level
log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'

# Adjust file_log_format to include milliseconds
file_log_format = '%(asctime)s.%(msecs)03d - ' + log_format
# Adjust file_log_format to include milliseconds
file_log_format = '%(asctime)s.%(msecs)03d - ' + log_format

# Get the root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) # Set the root logger's level to DEBUG
# Get the root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) # Set the root logger's level to DEBUG

# Remove any existing handlers
logger.handlers = []
# Remove any existing handlers
logger.handlers = []

# Create a console handler and set its level
console_handler = logging.StreamHandler()
console_handler.setLevel(level)
console_handler.setFormatter(logging.Formatter(log_format))
# Create a console handler and set its level
console_handler = logging.StreamHandler()
console_handler.setLevel(level)
console_handler.setFormatter(logging.Formatter(log_format))

# Add the handlers to the logger
if not no_log_file:
# Create a file handler and set its level
file_handler = logging.FileHandler('realtimesst.log')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter(
file_log_format,
datefmt='%Y-%m-%d %H:%M:%S'
))
# Add the handlers to the logger
if not no_log_file:
# Create a file handler and set its level
file_handler = logging.FileHandler('realtimesst.log')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter(
file_log_format,
datefmt='%Y-%m-%d %H:%M:%S'
))

logger.addHandler(file_handler)
logger.addHandler(console_handler)
logger.addHandler(file_handler)
logger.addHandler(console_handler)

self.is_shut_down = False
self.shutdown_event = mp.Event()
Expand Down Expand Up @@ -811,6 +815,8 @@ def __init__(self,
0.3)
)
self.frames = []
self.new_frames = mp.Event()
self.new_frames.set()

# Recording control flags
self.is_recording = False
Expand Down Expand Up @@ -1135,7 +1141,6 @@ def setup_audio():
time_since_last_buffer_message = time.time()

audio_queue.put(to_process)


except OSError as e:
if e.errno == pyaudio.paInputOverflowed:
Expand Down Expand Up @@ -1263,11 +1268,11 @@ def wait_audio(self):
audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
self.audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
self.frames.clear()
self.new_frames.set()

# Reset recording-related timestamps
self.recording_stop_time = 0
self.listen_start = 0

self._set_state("inactive")

except KeyboardInterrupt:
Expand Down Expand Up @@ -1449,6 +1454,7 @@ def start(self):
self.wakeword_detected = False
self.wake_word_detect_time = 0
self.frames = []
self.new_frames.set()
self.is_recording = True
self.recording_start_time = time.time()
self.is_silero_speech_active = False
Expand Down Expand Up @@ -1560,7 +1566,6 @@ def shutdown(self):
print("\033[91mRealtimeSTT shutting down\033[0m")
# logging.debug("RealtimeSTT shutting down")

# Force wait_audio() and text() to exit
self.is_shut_down = True
self.start_recording_event.set()
self.stop_recording_event.set()
Expand All @@ -1571,10 +1576,10 @@ def shutdown(self):

logging.debug('Finishing recording thread')
if self.recording_thread:
self.audio_queue.put(bytes(1))
self.recording_thread.join()

logging.debug('Terminating reader process')

# Give it some time to finish the loop and cleanup.
if self.use_microphone.value:
self.reader_process.join(timeout=10)
Expand Down Expand Up @@ -1813,7 +1818,8 @@ def _recording_worker(self):
# Add the buffered audio
# to the recording frames
self.frames.extend(list(self.audio_buffer))
self.audio_buffer.clear()
self.new_frames.set()
self.audio_buffer.clear()

if self.use_extended_logging:
logging.debug('Debug: Resetting Silero VAD model states')
Expand Down Expand Up @@ -1984,6 +1990,7 @@ def _recording_worker(self):
if self.use_extended_logging:
logging.debug('Debug: Appending data to frames')
self.frames.append(data)
self.new_frames.set()

if self.use_extended_logging:
logging.debug('Debug: Checking if not recording or speech end silence start')
Expand Down Expand Up @@ -2026,6 +2033,8 @@ def _realtime_worker(self):

# Check if the recording is active
if self.is_recording:
self.new_frames.wait()
self.new_frames.clear()

# Sleep for the duration of the transcription resolution
time.sleep(self.realtime_processing_pause)
Expand Down
6 changes: 3 additions & 3 deletions example_app/ui_openai_voice_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, SystemEngine
from RealtimeSTT import AudioToTextRecorder

from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
from PyQt5.QtCore import Qt, QTimer, QEvent, pyqtSignal, QThread
from PyQt5.QtGui import QColor, QPainter, QFontMetrics, QFont, QMouseEvent
from PyQt5.QtWidgets import QApplication, QWidget, QDesktopWidget, QMenu, QAction

import os
import openai
Expand Down
2 changes: 1 addition & 1 deletion example_webserver/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from colorama import Fore, Back, Style
from colorama import Fore, Style
import websockets
import colorama
import keyboard
Expand Down
2 changes: 1 addition & 1 deletion example_webserver/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
print ("└─ ... ", end='', flush=True)

from RealtimeSTT import AudioToTextRecorder
from colorama import Fore, Back, Style
from colorama import Fore, Style
import websockets
import threading
import colorama
Expand Down
1 change: 0 additions & 1 deletion tests/openwakeword_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
if __name__ == '__main__':
print("Starting...")
from RealtimeSTT import AudioToTextRecorder
import logging

detected = False

Expand Down
2 changes: 1 addition & 1 deletion tests/realtimestt_chinese.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from RealtimeSTT import AudioToTextRecorder
from colorama import Fore, Back, Style
from colorama import Fore, Style
import colorama
import os

Expand Down

0 comments on commit c47c2e4

Please sign in to comment.