-
Notifications
You must be signed in to change notification settings - Fork 226
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
973 additions
and
812 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,175 @@ | ||
__pycache__ | ||
.gitignore | ||
_open_cmd.bat | ||
docs/ | ||
examples/ | ||
batch/ | ||
test_env/ | ||
tests_private/ | ||
docs/ | ||
docs_private/ | ||
env_realtimestt/ | ||
example_rvc/assets/ | ||
server/ | ||
RealtimeSTT.code-workspace | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
realtimesst.log | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
myenv/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
from .audio_recorder import AudioToTextRecorder | ||
from .audio_recorder_client import AudioToTextRecorderClient | ||
from .audio_recorder_client import AudioToTextRecorderClient | ||
from .audio_input import AudioInput |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
from colorama import init, Fore, Style | ||
import pyaudio | ||
import logging | ||
import time | ||
|
||
|
||
class AudioInput: | ||
def __init__(self, input_device_index=None, debug_mode=False): | ||
self.input_device_index = input_device_index | ||
self.debug_mode = debug_mode | ||
self.audio_interface = None | ||
self.stream = None | ||
self.device_sample_rate = None | ||
|
||
# PyAudio constants | ||
self.CHUNK = 1024 | ||
self.FORMAT = pyaudio.paInt16 | ||
self.CHANNELS = 1 | ||
self.DESIRED_RATE = 16000 | ||
|
||
def get_supported_sample_rates(self, device_index): | ||
"""Test which standard sample rates are supported by the specified device.""" | ||
standard_rates = [8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000] | ||
supported_rates = [] | ||
|
||
device_info = self.audio_interface.get_device_info_by_index(device_index) | ||
max_channels = device_info.get('maxInputChannels') # Changed from maxOutputChannels | ||
|
||
for rate in standard_rates: | ||
try: | ||
if self.audio_interface.is_format_supported( | ||
rate, | ||
input_device=device_index, # Changed to input_device | ||
input_channels=max_channels, # Changed to input_channels | ||
input_format=self.FORMAT, # Changed to input_format | ||
): | ||
supported_rates.append(rate) | ||
except: | ||
continue | ||
return supported_rates | ||
|
||
def _get_best_sample_rate(self, actual_device_index, desired_rate): | ||
"""Determines the best available sample rate for the device.""" | ||
try: | ||
device_info = self.audio_interface.get_device_info_by_index(actual_device_index) | ||
supported_rates = self.get_supported_sample_rates(actual_device_index) | ||
|
||
if desired_rate in supported_rates: | ||
return desired_rate | ||
|
||
return max(supported_rates) | ||
|
||
# lower_rates = [r for r in supported_rates if r <= desired_rate] | ||
# if lower_rates: | ||
# return max(lower_rates) | ||
|
||
# higher_rates = [r for r in supported_rates if r > desired_rate] | ||
# if higher_rates: | ||
# return min(higher_rates) | ||
|
||
return int(device_info.get('defaultSampleRate', 44100)) | ||
|
||
except Exception as e: | ||
logging.warning(f"Error determining sample rate: {e}") | ||
return 44100 # Safe fallback | ||
|
||
def list_devices(self): | ||
"""List all available audio input devices with supported sample rates.""" | ||
try: | ||
init() # Initialize colorama | ||
self.audio_interface = pyaudio.PyAudio() | ||
device_count = self.audio_interface.get_device_count() | ||
|
||
print(f"Available audio input devices:") | ||
#print(f"{Fore.LIGHTBLUE_EX}Available audio input devices:{Style.RESET_ALL}") | ||
for i in range(device_count): | ||
device_info = self.audio_interface.get_device_info_by_index(i) | ||
device_name = device_info.get('name') | ||
max_input_channels = device_info.get('maxInputChannels', 0) | ||
|
||
if max_input_channels > 0: # Only consider devices with input capabilities | ||
supported_rates = self.get_supported_sample_rates(i) | ||
print(f"{Fore.LIGHTGREEN_EX}Device {Style.RESET_ALL}{i}{Fore.LIGHTGREEN_EX}: {device_name}{Style.RESET_ALL}") | ||
|
||
# Format each rate in cyan | ||
if supported_rates: | ||
rates_formatted = ", ".join([f"{Fore.CYAN}{rate}{Style.RESET_ALL}" for rate in supported_rates]) | ||
print(f" {Fore.YELLOW}Supported sample rates: {rates_formatted}{Style.RESET_ALL}") | ||
else: | ||
print(f" {Fore.YELLOW}Supported sample rates: None{Style.RESET_ALL}") | ||
|
||
except Exception as e: | ||
print(f"Error listing devices: {e}") | ||
finally: | ||
if self.audio_interface: | ||
self.audio_interface.terminate() | ||
|
||
def setup(self): | ||
"""Initialize audio interface and open stream""" | ||
try: | ||
self.audio_interface = pyaudio.PyAudio() | ||
|
||
if self.debug_mode: | ||
print(f"Input device index: {self.input_device_index}") | ||
actual_device_index = (self.input_device_index if self.input_device_index is not None | ||
else self.audio_interface.get_default_input_device_info()['index']) | ||
|
||
if self.debug_mode: | ||
print(f"Actual selected device index: {actual_device_index}") | ||
self.input_device_index = actual_device_index | ||
self.device_sample_rate = self._get_best_sample_rate(actual_device_index, self.DESIRED_RATE) | ||
|
||
if self.debug_mode: | ||
print(f"Setting up audio on device {self.input_device_index} with sample rate {self.device_sample_rate}") | ||
|
||
try: | ||
self.stream = self.audio_interface.open( | ||
format=self.FORMAT, | ||
channels=self.CHANNELS, | ||
rate=self.device_sample_rate, | ||
input=True, | ||
frames_per_buffer=self.CHUNK, | ||
input_device_index=self.input_device_index, | ||
) | ||
if self.debug_mode: | ||
print(f"Audio recording initialized successfully at {self.device_sample_rate} Hz") | ||
return True | ||
except Exception as e: | ||
print(f"Failed to initialize audio stream at {self.device_sample_rate} Hz: {e}") | ||
return False | ||
|
||
except Exception as e: | ||
print(f"Error initializing audio recording: {e}") | ||
if self.audio_interface: | ||
self.audio_interface.terminate() | ||
return False | ||
|
||
def read_chunk(self): | ||
"""Read a chunk of audio data""" | ||
return self.stream.read(self.CHUNK, exception_on_overflow=False) | ||
|
||
def cleanup(self): | ||
"""Clean up audio resources""" | ||
try: | ||
if self.stream: | ||
self.stream.stop_stream() | ||
self.stream.close() | ||
self.stream = None | ||
if self.audio_interface: | ||
self.audio_interface.terminate() | ||
self.audio_interface = None | ||
except Exception as e: | ||
print(f"Error cleaning up audio resources: {e}") |
Oops, something went wrong.