From 6f56e346f8eb29d7785dc42a3f1ce84f431267b7 Mon Sep 17 00:00:00 2001 From: Tim Nunamaker Date: Wed, 27 Mar 2024 13:42:04 -0500 Subject: [PATCH] Externalize data --- selfie/config.py | 23 +++++++++- selfie/connectors/chatgpt/connector.py | 2 +- .../connectors/google_messages/connector.py | 2 +- selfie/connectors/telegram/connector.py | 2 +- selfie/connectors/text_files/connector.py | 2 +- selfie/connectors/whatsapp/connector.py | 2 +- selfie/logging.py | 19 ++------ selfie/parsers/chat/chatgpt.py | 2 +- selfie/utils/__init__.py | 0 selfie/{utils.py => utils/data_structures.py} | 0 selfie/utils/filesystem.py | 46 +++++++++++++++++++ 11 files changed, 76 insertions(+), 24 deletions(-) create mode 100644 selfie/utils/__init__.py rename selfie/{utils.py => utils/data_structures.py} (100%) create mode 100644 selfie/utils/filesystem.py diff --git a/selfie/config.py b/selfie/config.py index fd04855..d333f54 100644 --- a/selfie/config.py +++ b/selfie/config.py @@ -1,22 +1,41 @@ import os +import platform from typing import Optional from pydantic import BaseModel, Field, ValidationError, Extra import logging +from selfie.utils.filesystem import get_data_dir + logger = logging.getLogger(__name__) default_port = 8181 +def get_data_root(): + os_name = platform.system() + + if os_name == 'Darwin': # macOS + data_directory = os.path.expanduser('~/Library/Application Support/Selfie/Data') + elif os_name == 'Windows': + data_directory = os.path.join(os.environ['APPDATA'], 'Selfie', 'Data') + else: # Assume Linux/Unix + data_directory = os.path.expanduser('~/.Selfie/data') + + return data_directory + + +data_root = get_data_dir('Selfie') + + class AppConfig(BaseModel): host: str = Field(default="http://localhost", description="Specify the host, with the scheme") port: Optional[int] = Field(default=default_port, description="Specify the port to run on") share: bool = Field(default=False, description="Enable sharing via ngrok") gpu: bool = Field(default=False, description="Enable GPU support") verbose: bool = Field(default=False, description="Enable verbose logging") - database_storage_root: str = Field(default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/database"), description="Root directory for database storage") - embeddings_storage_root: str = Field(default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/embeddings"), description="Root directory for embeddings storage") + database_storage_root: str = Field(default=os.path.join(data_root, "database"), description="Root directory for database storage") + embeddings_storage_root: str = Field(default=os.path.join(data_root, "embeddings"), description="Root directory for embeddings storage") db_name: str = Field(default='selfie.db', description="Database name") # local_model: str = Field(default='TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf', description="Local model") local_model: str = Field(default='TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf', description="Local model") diff --git a/selfie/connectors/chatgpt/connector.py b/selfie/connectors/chatgpt/connector.py index ce1d088..d71cd87 100644 --- a/selfie/connectors/chatgpt/connector.py +++ b/selfie/connectors/chatgpt/connector.py @@ -6,7 +6,7 @@ from selfie.embeddings import EmbeddingDocumentModel, DataIndex from selfie.parsers.chat import ChatFileParser # TODO Replace this with ChatGPTParser from selfie.types.documents import DocumentDTO -from selfie.utils import data_uri_to_dict +from selfie.utils.data_structures import data_uri_to_dict class ChatGPTConfiguration(BaseModel): diff --git a/selfie/connectors/google_messages/connector.py b/selfie/connectors/google_messages/connector.py index d077655..fccc767 100644 --- a/selfie/connectors/google_messages/connector.py +++ b/selfie/connectors/google_messages/connector.py @@ -6,7 +6,7 @@ from selfie.embeddings import EmbeddingDocumentModel, DataIndex from selfie.parsers.chat import ChatFileParser from selfie.types.documents import DocumentDTO -from selfie.utils import data_uri_to_dict +from selfie.utils.data_structures import data_uri_to_dict class GoogleMessagesConfiguration(BaseModel): diff --git a/selfie/connectors/telegram/connector.py b/selfie/connectors/telegram/connector.py index 23335da..32dcdee 100644 --- a/selfie/connectors/telegram/connector.py +++ b/selfie/connectors/telegram/connector.py @@ -6,7 +6,7 @@ from selfie.embeddings import EmbeddingDocumentModel, DataIndex from selfie.parsers.chat import ChatFileParser from selfie.types.documents import DocumentDTO -from selfie.utils import data_uri_to_dict +from selfie.utils.data_structures import data_uri_to_dict class TelegramConfiguration(BaseModel): diff --git a/selfie/connectors/text_files/connector.py b/selfie/connectors/text_files/connector.py index 5ffc604..ed1628b 100644 --- a/selfie/connectors/text_files/connector.py +++ b/selfie/connectors/text_files/connector.py @@ -10,7 +10,7 @@ from selfie.database import BaseModel, DataManager from selfie.embeddings import EmbeddingDocumentModel from selfie.types.documents import DocumentDTO -from selfie.utils import data_uri_to_dict +from selfie.utils.data_structures import data_uri_to_dict config = get_app_config() diff --git a/selfie/connectors/whatsapp/connector.py b/selfie/connectors/whatsapp/connector.py index d80a387..c090b01 100644 --- a/selfie/connectors/whatsapp/connector.py +++ b/selfie/connectors/whatsapp/connector.py @@ -6,7 +6,7 @@ from selfie.embeddings import EmbeddingDocumentModel, DataIndex from selfie.parsers.chat import ChatFileParser from selfie.types.documents import DocumentDTO -from selfie.utils import data_uri_to_dict +from selfie.utils.data_structures import data_uri_to_dict class WhatsAppConfiguration(BaseModel): diff --git a/selfie/logging.py b/selfie/logging.py index 13d2977..c166a14 100644 --- a/selfie/logging.py +++ b/selfie/logging.py @@ -1,28 +1,15 @@ -import os import logging -import platform from logging.handlers import RotatingFileHandler +from selfie.utils.filesystem import get_log_path as fs_get_log_path + # TODO: Don't hardcode these level = logging.INFO log_file = "selfie.log" def get_log_path(): - os_name = platform.system() - - # Set default log directory based on the operating system - if os_name == 'Darwin': # macOS - log_directory = os.path.expanduser('~/Library/Logs/Selfie') - elif os_name == 'Windows': - log_directory = os.path.join(os.environ['APPDATA'], 'Selfie', 'Logs') - else: # Assume Linux/Unix - log_directory = os.path.expanduser('~/Selfie/Logs') - - if not os.path.exists(log_directory): - os.makedirs(log_directory) - - return os.path.join(log_directory, log_file) + return fs_get_log_path('Selfie', log_file) def setup_logging(): diff --git a/selfie/parsers/chat/chatgpt.py b/selfie/parsers/chat/chatgpt.py index 5cc54ae..5405ec0 100644 --- a/selfie/parsers/chat/chatgpt.py +++ b/selfie/parsers/chat/chatgpt.py @@ -4,7 +4,7 @@ from selfie.parsers.chat.base import JsonBasedChatParser from selfie.types.share_gpt import ShareGPTConversation -from selfie.utils import check_nested +from selfie.utils.data_structures import check_nested class Author(BaseModel): diff --git a/selfie/utils/__init__.py b/selfie/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/selfie/utils.py b/selfie/utils/data_structures.py similarity index 100% rename from selfie/utils.py rename to selfie/utils/data_structures.py diff --git a/selfie/utils/filesystem.py b/selfie/utils/filesystem.py new file mode 100644 index 0000000..8c8c063 --- /dev/null +++ b/selfie/utils/filesystem.py @@ -0,0 +1,46 @@ +import os +import platform + + +def get_app_dir(app_name, dir_name, roaming=True, log_dir=False): + os_name = platform.system() + if os_name == 'Darwin': + home = os.path.expanduser('~') + if log_dir: + return os.path.join(home, 'Library', 'Logs', app_name, dir_name) + return os.path.join(home, 'Library', 'Application Support', app_name, dir_name) + elif os_name == 'Windows': + if roaming: + root = os.environ.get('APPDATA') + else: + root = os.environ.get('LOCALAPPDATA') + if root is None: + raise OSError("Unable to determine application data directory") + return os.path.join(root, app_name, dir_name) + else: + home = os.path.expanduser('~') + return os.path.join(home, '.' + app_name, dir_name) + + +def ensure_dir_exists(dir_path): + os.makedirs(dir_path, exist_ok=True) + + +def get_data_dir(app_name): + return get_app_dir(app_name, 'Data', roaming=True) + + +def get_log_dir(app_name): + return get_app_dir(app_name, '', log_dir=True) + + +def get_data_path(app_name, file_name): + data_dir = get_data_dir(app_name) + ensure_dir_exists(data_dir) + return os.path.join(data_dir, file_name) + + +def get_log_path(app_name, file_name): + log_dir = get_log_dir(app_name) + ensure_dir_exists(log_dir) + return os.path.join(log_dir, file_name) \ No newline at end of file