Skip to content

Commit

Permalink
Merge pull request #25 from FrancescoCaracciolo/nyarch-sync-0.5.0
Browse files Browse the repository at this point in the history
Nyarch sync 0.5.0
  • Loading branch information
FrancescoCaracciolo authored Dec 7, 2024
2 parents 58b4ca9 + 0a74e81 commit 2feb280
Show file tree
Hide file tree
Showing 10 changed files with 288 additions and 31 deletions.
17 changes: 13 additions & 4 deletions data/moe.nyarchlinux.assistant.gschema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,19 @@
</key>
<key name="stt-settings" type="s">
<default>"{}"</default>
</key>
<key name="welcome-screen-shown" type="b">
<default>false</default>
</key>
</key>
<key name="automatic-stt" type="b">
<default>false</default>
</key>
<key name="stt-silence-detection-threshold" type="d">
<default>0.07</default>
</key>
<key name="stt-silence-detection-duration" type="i">
<default>3</default>
</key>
<key name="welcome-screen-shown" type="b">
<default>false</default>
</key>
<key name="custom-prompts" type="s">
<default>"{}"</default>
</key>
Expand Down
9 changes: 8 additions & 1 deletion src/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

from .llm import BingHandler, GPT4AllHandler, GroqHandler, NyarchApiHandler, OllamaHandler, OpenAIHandler, CustomLLMHandler, GPT3AnyHandler, GeminiHandler, MistralHandler, OpenRouterHandler
from .tts import VoiceVoxHanlder, gTTSHandler, EspeakHandler, CustomTTSHandler, VitsHandler, EdgeTTSHandler
from .tts import VoiceVoxHanlder,ElevenLabs, gTTSHandler, EspeakHandler, CustomTTSHandler, VitsHandler, EdgeTTSHandler
from .stt import GroqSRHandler, OpenAISRHandler, SphinxHandler, GoogleSRHandler, WhisperHandler, WitAIHandler, VoskHandler, CustomSRHandler

from .avatar import Live2DHandler, LivePNGHandler
Expand Down Expand Up @@ -154,6 +155,12 @@
"description": _("Google's text to speech"),
"class": gTTSHandler,
},
"elevenlabs": {
"key": "elevenlabs",
"title": _("ElevenLabs TTS"),
"description": _("Natural sounding TTS"),
"class": ElevenLabs,
},
"voicevox": {
"key": "voicevox",
"title": _("Voicevox API"),
Expand Down
12 changes: 11 additions & 1 deletion src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ def reconstruct_dataset_from_csv(filename):
```console
sudo pacman -S ollama
```
If you have a Nvidia GPU, remember to install ollama-cuda for better performances:
```console
sudo pacman -S ollama-cuda
```
Then you can run ollama by executing this command:
```console
ollama serve
Expand All @@ -205,7 +210,12 @@ def reconstruct_dataset_from_csv(filename):
```console
ollama pull llama3.1
```
("""
You can also install custom models from HuggingFace:
```console
ollama pull hf.co/{username}/{repository}
```
"""
}

if is_flatpak():
Expand Down
5 changes: 3 additions & 2 deletions src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def generate_chat_name(self, request_prompt:str = "") -> str:
class G4FHandler(LLMHandler):
"""Common methods for g4f models"""
key = "g4f"
version = "0.3.3.4"
version = "0.3.5.8"

@staticmethod
def get_extra_requirements() -> list:
Expand Down Expand Up @@ -218,6 +218,7 @@ def generate_text(self, prompt: str, history: list[dict[str, str]] = [], system_
return response.choices[0].message.content
except Exception as e:
return f"Error: {e}"

def generate_text_stream(self, prompt: str, history: list[dict[str, str]] = [], system_prompt: list[str] = [], on_update: Callable[[str], Any] = lambda _: None, extra_args: list = []) -> str:
model = self.get_setting("model")
img = None
Expand Down Expand Up @@ -1070,7 +1071,7 @@ def get_extra_settings(self) -> list:

class GroqHandler(OpenAIHandler):
key = "groq"
default_models = (("llama-3.1-70B-versatile", "llama-3.1-70B-versatile" ), )
default_models = (("llama-3.3-70B-versatile", "llama-3.3-70B-versatile" ), )
def supports_vision(self) -> bool:
return "vision" in self.get_setting("model")

Expand Down
8 changes: 8 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ def new_chat(self,*a):
self.win.notification_block.add_toast(
Adw.Toast(title=_('Chat is created')))

def start_recording(self,*a):
self.win.start_recording(self.win.recording_button)

def stop_tts(self,*a):
self.win.mute_tts(self.win.mute_tts_button)

def do_shutdown(self):
self.win.save_chat()
settings = Gio.Settings.new('moe.nyarchlinux.assistant')
Expand All @@ -221,4 +227,6 @@ def main(version):
app.create_action('reload_folder', app.reload_folder, ['<primary>e'])
app.create_action('new_chat', app.new_chat, ['<primary>t'])
app.create_action('focus_message', app.focus_message, ['<primary>l'])
app.create_action('start_recording', app.start_recording, ['<primary>s'])
app.create_action('stop_tts', app.stop_tts, ['<primary>k'])
app.run(sys.argv)
61 changes: 60 additions & 1 deletion src/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def __init__(self,app,headless=False, *args, **kwargs):
self.prompts_settings = json.loads(self.settings.get_string("prompts-settings"))
self.prompts = override_prompts(self.custom_prompts, PROMPTS)
self.sandbox = can_escape_sandbox()

self.cache_handlers()
# Page building
self.general_page = Adw.PreferencesPage()

Expand Down Expand Up @@ -109,6 +111,10 @@ def __init__(self,app,headless=False, *args, **kwargs):
for stt_key in AVAILABLE_STT:
row = self.build_row(AVAILABLE_STT, stt_key, selected, group)
stt_engine.add_row(row)
# Automatic STT settings
self.auto_stt = Adw.ExpanderRow(title=_('Automatic Speech To Text'), subtitle=_("Automatically restart speech to text at the end of a text/TTS"))
self.build_auto_stt()
self.Voicegroup.add(self.auto_stt)

# Build the AVATAR settings
self.avatargroup = Adw.PreferencesGroup(title=_('Avatar'))
Expand Down Expand Up @@ -136,6 +142,7 @@ def __init__(self,app,headless=False, *args, **kwargs):
for smart_prompt_key in AVAILABLE_SMART_PROMPTS:
row = self.build_row(AVAILABLE_SMART_PROMPTS, smart_prompt_key, selected, group)
smartprompt.add_row(row)

# Prompts settings
self.prompt = Adw.PreferencesGroup(title=_('Prompt control'))
self.general_page.add(self.prompt)
Expand Down Expand Up @@ -213,6 +220,47 @@ def __init__(self,app,headless=False, *args, **kwargs):

self.add(self.general_page)

def build_auto_stt(self):
auto_stt_enabled = Gtk.Switch(valign=Gtk.Align.CENTER)
self.settings.bind("automatic-stt", auto_stt_enabled, 'active', Gio.SettingsBindFlags.DEFAULT)
self.auto_stt.add_suffix(auto_stt_enabled)
def update_scale(scale, label, setting_value, type):
value = scale.get_value()
if type is float:
self.settings.set_double(setting_value, value)
elif type is int:
value = int(value)
self.settings.set_int(setting_value, value)
label.set_text(str(value))

# Silence Threshold
silence_threshold = Adw.ActionRow(title=_("Silence threshold"), subtitle=_("Silence threshold in seconds, percentage of the volume to be considered silence"))
threshold = Gtk.Scale(digits=0, round_digits=2)
threshold.set_range(0, 0.5)
threshold.set_size_request(120, -1)
th = self.settings.get_double("stt-silence-detection-threshold")
label = Gtk.Label(label=str(th))
threshold.set_value(th)
threshold.connect("value-changed", update_scale, label, "stt-silence-detection-threshold", float)
box = Gtk.Box()
box.append(threshold)
box.append(label)
silence_threshold.add_suffix(box)
# Silence time
silence_time = Adw.ActionRow(title=_("Silence time"), subtitle=_("Silence time in seconds before recording stops automatically"))
time_scale = Gtk.Scale(digits=0, round_digits=0)
time_scale.set_range(0, 10)
time_scale.set_size_request(120, -1)
value = self.settings.get_int("stt-silence-detection-duration")
time_scale.set_value(value)
label = Gtk.Label(label=str(value))
time_scale.connect("value-changed", update_scale, label, "stt-silence-detection-duration", int)
box = Gtk.Box()
box.append(time_scale)
box.append(label)
silence_time.add_suffix(box)
self.auto_stt.add_row(silence_threshold)
self.auto_stt.add_row(silence_time)

def update_prompt(self, switch: Gtk.Switch, state, key: str):
"""Update the prompt in the settings
Expand Down Expand Up @@ -269,6 +317,14 @@ def build_row(self, constants: dict[str, Any], key: str, selected: str, group: G
row.add_prefix(button)
return row

def cache_handlers(self):
self.handlers = {}
for key in AVAILABLE_TTS:
self.handlers[(key, self.convert_constants(AVAILABLE_TTS))] = self.get_object(AVAILABLE_TTS, key)
for key in AVAILABLE_STT:
self.handlers[(key, self.convert_constants(AVAILABLE_STT))] = self.get_object(AVAILABLE_STT, key)
for key in AVAILABLE_LLMS:
self.handlers[(key, self.convert_constants(AVAILABLE_LLMS))] = self.get_object(AVAILABLE_LLMS, key)

def get_object(self, constants: dict[str, Any], key:str) -> (Handler):

Expand All @@ -284,12 +340,15 @@ def get_object(self, constants: dict[str, Any], key:str) -> (Handler):
Returns:
The created handler
"""
if (key, self.convert_constants(constants)) in self.handlers:
return self.handlers[(key, self.convert_constants(constants))]

if constants == AVAILABLE_LLMS:
model = constants[key]["class"](self.settings, os.path.join(self.directory, "pip"))
elif constants == AVAILABLE_STT:
model = constants[key]["class"](self.settings,os.path.join(self.directory, "models"))
elif constants == AVAILABLE_TTS:
model = constants[key]["class"](self.settings, self.directory)
model = constants[key]["class"](self.settings, os.path.join(self.directory, "pip"))
elif constants == AVAILABLE_AVATARS:
model = constants[key]["class"](self.settings, self.directory)
elif constants == AVAILABLE_TRANSLATORS:
Expand Down
2 changes: 2 additions & 0 deletions src/shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def __init__(self,app, *args, **kwargs):
gr.append(Gtk.ShortcutsShortcut(title=_("New tab"), accelerator='<primary>t'))
gr.append(Gtk.ShortcutsShortcut(title=_("Paste Image"), accelerator='<primary>v'))
gr.append(Gtk.ShortcutsShortcut(title=_("Focus message box"), accelerator='<primary>l'))
gr.append(Gtk.ShortcutsShortcut(title=_("Start recording"), accelerator='<primary>s'))
gr.append(Gtk.ShortcutsShortcut(title=_("Stop TTS"), accelerator='<primary>k'))

sect_main.append(gr)
self.set_child(sect_main)
40 changes: 37 additions & 3 deletions src/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,60 @@
from typing import Any
import pyaudio
import wave
import struct
import speech_recognition as sr
from .extra import find_module, get_spawn_command, install_module
from .handler import Handler
import math


class AudioRecorder:
"""Record audio"""
def __init__(self):
"""Record audio with optional auto-stop on silence detection."""
def __init__(self, auto_stop: bool = False, stop_function: callable = lambda _: (), silence_threshold_percent: float = 0.01, silence_duration: int = 2):
self.recording = False
self.frames = []
self.auto_stop = auto_stop
self.stop_function = stop_function
self.silence_threshold_percent = silence_threshold_percent
self.silence_duration = silence_duration
self.sample_format = pyaudio.paInt16
self.channels = 1
self.sample_rate = 44100
self.chunk_size = 1024
self.silent_chunks = 0
self.max_rms = 32767 # Maximum possible RMS for 16-bit audio

def start_recording(self):
def start_recording(self, output_file):
self.recording = True
self.frames = []
self.silent_chunks = 0
p = pyaudio.PyAudio()
stream = p.open(format=self.sample_format,
channels=self.channels,
rate=self.sample_rate,
frames_per_buffer=self.chunk_size,
input=True)
silence_threshold = self.max_rms * self.silence_threshold_percent
while self.recording:
data = stream.read(self.chunk_size)
self.frames.append(data)
if self.auto_stop:
rms = self._calculate_rms(data)
if rms < silence_threshold:
self.silent_chunks += 1
else:
self.silent_chunks = 0
if self.silent_chunks >= self.silence_duration * (self.sample_rate / self.chunk_size):
self.recording = False
stream.stop_stream()
stream.close()
p.terminate()
self.save_recording(output_file)

def stop_recording(self, output_file):
self.recording = False

def save_recording(self, output_file):
p = pyaudio.PyAudio()
wf = wave.open(output_file, 'wb')
wf.setnchannels(self.channels)
Expand All @@ -45,6 +67,18 @@ def stop_recording(self, output_file):
wf.writeframes(b''.join(self.frames))
wf.close()
p.terminate()
self.stop_function()

def _calculate_rms(self, data):
"""Calculate the root mean square of the audio data."""
count = len(data) // 2 # Each sample is 2 bytes (16-bit)
format = "<" + str(count) + "h" # little-endian signed shorts
shorts = struct.unpack(format, data)
mean = sum(shorts) / count
shorts_demeaned = [sample - mean for sample in shorts]
sum_squares = sum(sample * sample for sample in shorts_demeaned)
rms = (sum_squares / count) ** 0.5
return rms


class STTHandler(Handler):
Expand Down
Loading

0 comments on commit 2feb280

Please sign in to comment.