diff --git a/config.yaml b/config.yaml index eb477aa6..f4c01d29 100644 --- a/config.yaml +++ b/config.yaml @@ -29,7 +29,13 @@ AVAILABLE_MODELS: - sentence-transformers/gtr-t5-large - sentence-transformers/gtr-t5-base COMPUTE_DEVICE: cuda -EMBEDDING_MODEL_NAME: C:/PATH/Scripts/LM Search Vector Database/v1_7 - working/Embedding_Models/sentence-transformers--all-mpnet-base-v2 +EMBEDDING_MODEL_NAME: C:/PATH/Scripts/LM Search Vector Database/v2_0_1 - working/Embedding_Models/hkunlp--instructor-xl +styles: + button: 'background-color: #323842; color: light gray; font: 10pt "Segoe UI Historic"; + width: 29;' + frame: 'background-color: #161b22;' + input: 'background-color: #2e333b; color: light gray; font: 13pt "Segoe UI Historic";' + text: 'background-color: #092327; color: light gray; font: 12pt "Segoe UI Historic";' tabs: - name: Settings placeholder: Placeholder text for Settings tab. diff --git a/gui.py b/gui.py index 877273be..156f34a8 100644 --- a/gui.py +++ b/gui.py @@ -14,33 +14,15 @@ from metrics_gpu import GPU_Monitor from metrics_system import SystemMonitor from initialize import determine_compute_device, is_nvidia_gpu, get_os_name - -styles = { - "button": 'background-color: #323842; color: light gray; font: 10pt "Segoe UI Historic"; width: 29;', - "frame": 'background-color: #161b22;', - "input": 'background-color: #2e333b; color: light gray; font: 13pt "Segoe UI Historic";', - "text": 'background-color: #092327; color: light gray; font: 12pt "Segoe UI Historic";' -} +from voice_recorder_module import VoiceRecorder +from gui_tabs import create_tabs +from gui_threads import CreateDatabaseThread, SubmitButtonThread with open('config.yaml', 'r') as config_file: config = yaml.safe_load(config_file) tabs_config = config.get('tabs', []) - -class CreateDatabaseThread(QThread): - def run(self): - create_database.main() - -class SubmitButtonThread(QThread): - responseSignal = Signal(str) - - def __init__(self, user_question, parent=None): - super(SubmitButtonThread, self).__init__(parent) - self.user_question = user_question - - def run(self): - response = server_connector.ask_local_chatgpt(self.user_question) - self.responseSignal.emit(response['answer']) +styles = config.get('styles', {}) class DocQA_GUI(QWidget): def __init__(self): @@ -64,20 +46,10 @@ def init_ui(self): self.setGeometry(300, 300, 850, 910) self.setMinimumSize(550, 610) - self.left_frame = QFrame() # Changed here + self.left_frame = QFrame() left_vbox = QVBoxLayout() - tab_widget = QTabWidget() - tab_widget.setTabPosition(QTabWidget.South) - - tab_widgets = [QTextEdit(tab.get('placeholder', '')) for tab in tabs_config] - for i, tab in enumerate(tabs_config): - tab_widget.addTab(tab_widgets[i], tab.get('name', '')) - - tutorial_tab = QWebEngineView() - tab_widget.addTab(tutorial_tab, 'Tutorial') - user_manual_folder = os.path.join(os.path.dirname(__file__), 'User_Manual') - html_file_path = os.path.join(user_manual_folder, 'number_format.html') - tutorial_tab.setUrl(QUrl.fromLocalFile(html_file_path)) + + tab_widget = create_tabs(tabs_config) left_vbox.addWidget(tab_widget) button_data = [ @@ -108,9 +80,21 @@ def init_ui(self): submit_button.setStyleSheet(styles.get('button', '')) submit_button.clicked.connect(self.on_submit_button_clicked) - right_vbox.addWidget(self.read_only_text, 5) + right_vbox.addWidget(self.read_only_text, 4) right_vbox.addWidget(self.text_input, 1) right_vbox.addWidget(submit_button) + + self.recorder = VoiceRecorder() + + self.start_button = QPushButton("Start Recording") + self.start_button.setStyleSheet(styles.get('button', '')) + self.start_button.clicked.connect(self.start_recording) + right_vbox.addWidget(self.start_button) + + self.stop_button = QPushButton("Stop Recording") + self.stop_button.setStyleSheet(styles.get('button', '')) + self.stop_button.clicked.connect(self.stop_recording) + right_vbox.addWidget(self.stop_button) right_frame.setLayout(right_vbox) right_frame.setStyleSheet(styles.get('frame', '')) @@ -140,7 +124,6 @@ def resizeEvent(self, event): self.left_frame.setMaximumWidth(self.width() * 0.5) super().resizeEvent(event) - def on_create_button_clicked(self): self.create_database_thread = CreateDatabaseThread(self) self.create_database_thread.start() @@ -151,6 +134,12 @@ def on_submit_button_clicked(self): self.submit_button_thread.responseSignal.connect(self.update_response) self.submit_button_thread.start() + def start_recording(self): + self.recorder.start_recording() + + def stop_recording(self): + self.recorder.stop_recording() + def update_response(self, response): self.read_only_text.setPlainText(response) diff --git a/gui_tabs.py b/gui_tabs.py new file mode 100644 index 00000000..45a625a1 --- /dev/null +++ b/gui_tabs.py @@ -0,0 +1,22 @@ +# tabs.py + +from PySide6.QtWebEngineWidgets import QWebEngineView +from PySide6.QtWidgets import QTextEdit, QTabWidget +from PySide6.QtCore import QUrl +import os + +def create_tabs(tabs_config): + tab_widget = QTabWidget() + tab_widget.setTabPosition(QTabWidget.South) + + tab_widgets = [QTextEdit(tab.get('placeholder', '')) for tab in tabs_config] + for i, tab in enumerate(tabs_config): + tab_widget.addTab(tab_widgets[i], tab.get('name', '')) + + tutorial_tab = QWebEngineView() + tab_widget.addTab(tutorial_tab, 'Tutorial') + user_manual_folder = os.path.join(os.path.dirname(__file__), 'User_Manual') + html_file_path = os.path.join(user_manual_folder, 'number_format.html') + tutorial_tab.setUrl(QUrl.fromLocalFile(html_file_path)) + + return tab_widget diff --git a/gui_threads.py b/gui_threads.py new file mode 100644 index 00000000..b87318e2 --- /dev/null +++ b/gui_threads.py @@ -0,0 +1,20 @@ +# Creates threads for gui functions + +from PySide6.QtCore import QThread, Signal +import server_connector +import create_database + +class CreateDatabaseThread(QThread): + def run(self): + create_database.main() + +class SubmitButtonThread(QThread): + responseSignal = Signal(str) + + def __init__(self, user_question, parent=None): + super(SubmitButtonThread, self).__init__(parent) + self.user_question = user_question + + def run(self): + response = server_connector.ask_local_chatgpt(self.user_question) + self.responseSignal.emit(response['answer']) diff --git a/initialize.py b/initialize.py index a3fe9cd5..c34e0cd1 100644 --- a/initialize.py +++ b/initialize.py @@ -1,3 +1,5 @@ +# determine OS compute device for gui and save to config and determine whether to display metrics + import torch import yaml import platform diff --git a/main.py b/main.py new file mode 100644 index 00000000..c7c06a0e --- /dev/null +++ b/main.py @@ -0,0 +1,78 @@ +import sys +import pyaudio +import wave +import os +import tempfile +import threading +import pyperclip +from faster_whisper import WhisperModel +from PySide6.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QLabel +from PySide6.QtCore import Qt + +class VoiceRecorder: + def __init__(self, window, format=pyaudio.paInt16, channels=1, rate=44100, chunk=1024): + self.format, self.channels, self.rate, self.chunk = format, channels, rate, chunk + self.window = window + self.is_recording, self.frames = False, [] + self.model = WhisperModel("large-v2", device="cuda", compute_type="int8_float32") + + def transcribe_audio(self, audio_file): + segments, _ = self.model.transcribe(audio_file) + pyperclip.copy("\n".join([segment.text for segment in segments])) + self.window.update_status("Audio saved and transcribed") + + def record_audio(self): + self.window.update_status("Recording...") + p = pyaudio.PyAudio() + try: + stream = p.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) + [self.frames.append(stream.read(self.chunk)) for _ in iter(lambda: self.is_recording, False)] + stream.stop_stream() + stream.close() + finally: + p.terminate() + + def save_audio(self): + self.is_recording = False + temp_filename = tempfile.mktemp(suffix=".wav") + with wave.open(temp_filename, "wb") as wf: + wf.setnchannels(self.channels) + wf.setsampwidth(pyaudio.PyAudio().get_sample_size(self.format)) + wf.setframerate(self.rate) + wf.writeframes(b"".join(self.frames)) + self.transcribe_audio(temp_filename) + os.remove(temp_filename) + self.frames.clear() + + def start_recording(self): + if not self.is_recording: + self.is_recording = True + threading.Thread(target=self.record_audio).start() + +class MyWindow(QWidget): + def __init__(self): + super().__init__() + self.recorder = VoiceRecorder(self) + layout = QVBoxLayout(self) + + for text, callback in [("Record", self.recorder.start_recording), + ("Stop and Copy to Clipboard", self.recorder.save_audio)]: + button = QPushButton(text, self) + button.clicked.connect(callback) + layout.addWidget(button) + + self.status_label = QLabel('', self) + layout.addWidget(self.status_label) + self.setFixedSize(300, 150) + + self.setWindowFlag(Qt.WindowStaysOnTopHint) + + def update_status(self, text): + self.status_label.setText(text) + +if __name__ == "__main__": + app = QApplication(sys.argv) + app.setStyle('Fusion') + window = MyWindow() + window.show() + sys.exit(app.exec()) diff --git a/metrics_gpu.py b/metrics_gpu.py index f8d0ae16..a0985041 100644 --- a/metrics_gpu.py +++ b/metrics_gpu.py @@ -2,6 +2,7 @@ from multiprocessing import Process, Pipe, Event import time from PySide6.QtCore import QTimer +import humanize def monitor_nvml(pipe, stop_event): nvmlInit() @@ -11,7 +12,7 @@ def monitor_nvml(pipe, stop_event): memory_info = nvmlDeviceGetMemoryInfo(handle) utilization_rates = nvmlDeviceGetUtilizationRates(handle) - memory_used_str = f"{memory_info.used / (1024 * 1024):.2f} MiB" + memory_used_str = humanize.naturalsize(memory_info.used, binary=True) gpu_utilization = f"{utilization_rates.gpu}%" data = (memory_used_str, gpu_utilization) @@ -51,4 +52,4 @@ def update_gpu_info(self): def stop_and_exit_gpu_monitor(self): self.timer.stop() stop_monitoring_gpu(self.process, self.stop_event) - self.root.close() \ No newline at end of file + self.root.close() diff --git a/metrics_system.py b/metrics_system.py index 04958cd9..4e56c4a9 100644 --- a/metrics_system.py +++ b/metrics_system.py @@ -2,12 +2,13 @@ import time from multiprocessing import Process, Pipe, Event from PySide6.QtCore import QTimer +import humanize def monitor_system(pipe, stop_event): while not stop_event.is_set(): cpu_percent = collect_cpu_metrics() - ram_percent, ram_used_mib = collect_ram_metrics() - data = (cpu_percent, ram_percent, ram_used_mib) + ram_percent, ram_used = collect_ram_metrics() + data = (cpu_percent, ram_percent, ram_used) pipe.send(data) time.sleep(0.5) @@ -17,8 +18,8 @@ def collect_cpu_metrics(): def collect_ram_metrics(): ram = psutil.virtual_memory() - ram_used_mib = round(ram.used / (1024 ** 2), 2) - return round(ram.percent, 2), ram_used_mib + ram_used = humanize.naturalsize(ram.used, binary=True) + return round(ram.percent, 2), ram_used def start_monitoring_system(): stop_event = Event() @@ -44,12 +45,12 @@ def __init__(self, cpu_label, ram_label, ram_usage_label, root): def update_system_info(self): if self.parent_conn.poll(): - cpu_percent, ram_percent, ram_used_mib = self.parent_conn.recv() + cpu_percent, ram_percent, ram_used = self.parent_conn.recv() self.cpu_label.setText(f"CPU: {cpu_percent}%") self.ram_label.setText(f"RAM: {ram_percent}%") - self.ram_usage_label.setText(f"RAM: {ram_used_mib:.2f} MiB") + self.ram_usage_label.setText(f"RAM: {ram_used}") def stop_and_exit_system_monitor(self): self.timer.stop() stop_monitoring_system(self.process, self.stop_event) - self.root.close() \ No newline at end of file + self.root.close() diff --git a/requirements.txt b/requirements.txt index 09aeee9b..dd4f2e47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,8 @@ openpyxl==3.1.2 unstructured==0.10.12 nvidia-ml-py==12.535.108 psutil==5.9.5 -PySide6==6.5.2 \ No newline at end of file +PySide6==6.5.2 +Humanize==4.8.0 +PyAudio==0.2.13 +pyperclip==1.8.2 +faster-whisper==0.9.0 \ No newline at end of file diff --git a/server_connector.py b/server_connector.py index e61147df..f2189268 100644 --- a/server_connector.py +++ b/server_connector.py @@ -63,7 +63,7 @@ def ask_local_chatgpt(query, persist_directory=PERSIST_DIRECTORY, client_setting retriever = db.as_retriever() relevant_contexts = retriever.get_relevant_documents(query) contexts = [document.page_content for document in relevant_contexts] - prepend_string = "Only base your answer to the following question on the provided context. If the provided context does not provide an answer, simply state that is the case." + prepend_string = "Only base your answer to the following question on the provided context." augmented_query = "\n\n---\n\n".join(contexts) + "\n\n-----\n\n" + query response_json = connect_to_local_chatgpt(augmented_query) diff --git a/voice_recorder_module.py b/voice_recorder_module.py index e0ec5a2e..1e286eb1 100644 --- a/voice_recorder_module.py +++ b/voice_recorder_module.py @@ -10,7 +10,19 @@ class VoiceRecorder: def __init__(self, format=pyaudio.paInt16, channels=1, rate=44100, chunk=1024): self.format, self.channels, self.rate, self.chunk = format, channels, rate, chunk self.is_recording, self.frames = False, [] - self.model = WhisperModel(r"C:\PATH\Scripts\ctranslate2-faster-whisper-transcriber\whisper-small.en-ct2-int8_float32", device="auto", compute_type="int8_float32") + + current_directory = os.getcwd() + + model_folder_path = os.path.join( + current_directory, + "whisper-small.en-ct2-int8_float32" + ) + + self.model = WhisperModel( + model_folder_path, + device="auto", + compute_type="int8_float32" + ) def transcribe_audio(self, audio_file): segments, _ = self.model.transcribe(audio_file)