upgrade to server

KoljaB · Nov 25, 2024 · 16ad094 · 16ad094
1 parent c47c2e4
commit 16ad094
Show file tree

Hide file tree

Showing 12 changed files with 973 additions and 812 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,175 @@
-__pycache__
+.gitignore
+_open_cmd.bat
+docs/
+examples/
+batch/
+test_env/
+tests_private/
+docs/
+docs_private/
+env_realtimestt/
+example_rvc/assets/
+server/
+RealtimeSTT.code-workspace
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
 .cache
-realtimesst.log
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+myenv/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ https://github.com/user-attachments/assets/797e6552-27cd-41b1-a7f3-e5cbc72094f5
 
 ### Updates
 
-Latest Version: v0.3.7
+Latest Version: v0.3.8
 
 See [release history](https://github.com/KoljaB/RealtimeSTT/releases).
 

diff --git a/RealtimeSTT/__init__.py b/RealtimeSTT/__init__.py
@@ -1,2 +1,3 @@
 from .audio_recorder import AudioToTextRecorder
-from .audio_recorder_client import AudioToTextRecorderClient
+from .audio_recorder_client import AudioToTextRecorderClient
+from .audio_input import AudioInput
diff --git a/RealtimeSTT/audio_input.py b/RealtimeSTT/audio_input.py
@@ -0,0 +1,153 @@
+from colorama import init, Fore, Style
+import pyaudio
+import logging
+import time
+
+
+class AudioInput:
+    def __init__(self, input_device_index=None, debug_mode=False):
+        self.input_device_index = input_device_index
+        self.debug_mode = debug_mode
+        self.audio_interface = None
+        self.stream = None
+        self.device_sample_rate = None
+
+        # PyAudio constants
+        self.CHUNK = 1024
+        self.FORMAT = pyaudio.paInt16  
+        self.CHANNELS = 1
+        self.DESIRED_RATE = 16000
+
+    def get_supported_sample_rates(self, device_index):
+        """Test which standard sample rates are supported by the specified device."""
+        standard_rates = [8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000]
+        supported_rates = []
+
+        device_info = self.audio_interface.get_device_info_by_index(device_index)
+        max_channels = device_info.get('maxInputChannels')  # Changed from maxOutputChannels
+
+        for rate in standard_rates:
+            try:
+                if self.audio_interface.is_format_supported(
+                    rate,
+                    input_device=device_index,  # Changed to input_device
+                    input_channels=max_channels,  # Changed to input_channels 
+                    input_format=self.FORMAT,  # Changed to input_format
+                ):
+                    supported_rates.append(rate)
+            except:
+                continue
+        return supported_rates
+
+    def _get_best_sample_rate(self, actual_device_index, desired_rate):
+        """Determines the best available sample rate for the device."""
+        try:
+            device_info = self.audio_interface.get_device_info_by_index(actual_device_index)
+            supported_rates = self.get_supported_sample_rates(actual_device_index)
+
+            if desired_rate in supported_rates:
+                return desired_rate
+
+            return max(supported_rates)
+
+            # lower_rates = [r for r in supported_rates if r <= desired_rate]
+            # if lower_rates:
+            #     return max(lower_rates)
+
+            # higher_rates = [r for r in supported_rates if r > desired_rate]
+            # if higher_rates:
+            #     return min(higher_rates)
+
+            return int(device_info.get('defaultSampleRate', 44100))
+
+        except Exception as e:
+            logging.warning(f"Error determining sample rate: {e}")
+            return 44100  # Safe fallback
+
+    def list_devices(self):
+        """List all available audio input devices with supported sample rates."""
+        try:
+            init()  # Initialize colorama
+            self.audio_interface = pyaudio.PyAudio()
+            device_count = self.audio_interface.get_device_count()
+
+            print(f"Available audio input devices:")
+            #print(f"{Fore.LIGHTBLUE_EX}Available audio input devices:{Style.RESET_ALL}")
+            for i in range(device_count):
+                device_info = self.audio_interface.get_device_info_by_index(i)
+                device_name = device_info.get('name')
+                max_input_channels = device_info.get('maxInputChannels', 0)
+
+                if max_input_channels > 0:  # Only consider devices with input capabilities
+                    supported_rates = self.get_supported_sample_rates(i)
+                    print(f"{Fore.LIGHTGREEN_EX}Device {Style.RESET_ALL}{i}{Fore.LIGHTGREEN_EX}: {device_name}{Style.RESET_ALL}")
+
+                    # Format each rate in cyan
+                    if supported_rates:
+                        rates_formatted = ", ".join([f"{Fore.CYAN}{rate}{Style.RESET_ALL}" for rate in supported_rates])
+                        print(f"  {Fore.YELLOW}Supported sample rates: {rates_formatted}{Style.RESET_ALL}")
+                    else:
+                        print(f"  {Fore.YELLOW}Supported sample rates: None{Style.RESET_ALL}")
+
+        except Exception as e:
+            print(f"Error listing devices: {e}")
+        finally:
+            if self.audio_interface:
+                self.audio_interface.terminate()
+
+    def setup(self):
+        """Initialize audio interface and open stream"""
+        try:
+            self.audio_interface = pyaudio.PyAudio()
+
+            if self.debug_mode:
+                print(f"Input device index: {self.input_device_index}")
+            actual_device_index = (self.input_device_index if self.input_device_index is not None 
+                                else self.audio_interface.get_default_input_device_info()['index'])
+
+            if self.debug_mode:
+                print(f"Actual selected device index: {actual_device_index}")
+            self.input_device_index = actual_device_index
+            self.device_sample_rate = self._get_best_sample_rate(actual_device_index, self.DESIRED_RATE)
+
+            if self.debug_mode:
+                print(f"Setting up audio on device {self.input_device_index} with sample rate {self.device_sample_rate}")
+
+            try:
+                self.stream = self.audio_interface.open(
+                    format=self.FORMAT,
+                    channels=self.CHANNELS,
+                    rate=self.device_sample_rate,
+                    input=True,
+                    frames_per_buffer=self.CHUNK,
+                    input_device_index=self.input_device_index,
+                )
+                if self.debug_mode:
+                    print(f"Audio recording initialized successfully at {self.device_sample_rate} Hz")
+                return True
+            except Exception as e:
+                print(f"Failed to initialize audio stream at {self.device_sample_rate} Hz: {e}")
+                return False
+
+        except Exception as e:
+            print(f"Error initializing audio recording: {e}")
+            if self.audio_interface:
+                self.audio_interface.terminate()
+            return False
+
+    def read_chunk(self):
+        """Read a chunk of audio data"""
+        return self.stream.read(self.CHUNK, exception_on_overflow=False)
+
+    def cleanup(self):
+        """Clean up audio resources"""
+        try:
+            if self.stream:
+                self.stream.stop_stream()
+                self.stream.close()
+                self.stream = None
+            if self.audio_interface:
+                self.audio_interface.terminate()
+                self.audio_interface = None
+        except Exception as e:
+            print(f"Error cleaning up audio resources: {e}")