Skip to content

Commit

Permalink
Externalize data
Browse files Browse the repository at this point in the history
  • Loading branch information
tnunamak committed Mar 27, 2024
1 parent dafb7bf commit 6f56e34
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 24 deletions.
23 changes: 21 additions & 2 deletions selfie/config.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
import os
import platform
from typing import Optional

from pydantic import BaseModel, Field, ValidationError, Extra
import logging

from selfie.utils.filesystem import get_data_dir

logger = logging.getLogger(__name__)

default_port = 8181


def get_data_root():
os_name = platform.system()

if os_name == 'Darwin': # macOS
data_directory = os.path.expanduser('~/Library/Application Support/Selfie/Data')
elif os_name == 'Windows':
data_directory = os.path.join(os.environ['APPDATA'], 'Selfie', 'Data')
else: # Assume Linux/Unix
data_directory = os.path.expanduser('~/.Selfie/data')

return data_directory


data_root = get_data_dir('Selfie')


class AppConfig(BaseModel):
host: str = Field(default="http://localhost", description="Specify the host, with the scheme")
port: Optional[int] = Field(default=default_port, description="Specify the port to run on")
share: bool = Field(default=False, description="Enable sharing via ngrok")
gpu: bool = Field(default=False, description="Enable GPU support")
verbose: bool = Field(default=False, description="Enable verbose logging")
database_storage_root: str = Field(default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/database"), description="Root directory for database storage")
embeddings_storage_root: str = Field(default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/embeddings"), description="Root directory for embeddings storage")
database_storage_root: str = Field(default=os.path.join(data_root, "database"), description="Root directory for database storage")
embeddings_storage_root: str = Field(default=os.path.join(data_root, "embeddings"), description="Root directory for embeddings storage")
db_name: str = Field(default='selfie.db', description="Database name")
# local_model: str = Field(default='TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf', description="Local model")
local_model: str = Field(default='TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf', description="Local model")
Expand Down
2 changes: 1 addition & 1 deletion selfie/connectors/chatgpt/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from selfie.embeddings import EmbeddingDocumentModel, DataIndex
from selfie.parsers.chat import ChatFileParser # TODO Replace this with ChatGPTParser
from selfie.types.documents import DocumentDTO
from selfie.utils import data_uri_to_dict
from selfie.utils.data_structures import data_uri_to_dict


class ChatGPTConfiguration(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion selfie/connectors/google_messages/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from selfie.embeddings import EmbeddingDocumentModel, DataIndex
from selfie.parsers.chat import ChatFileParser
from selfie.types.documents import DocumentDTO
from selfie.utils import data_uri_to_dict
from selfie.utils.data_structures import data_uri_to_dict


class GoogleMessagesConfiguration(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion selfie/connectors/telegram/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from selfie.embeddings import EmbeddingDocumentModel, DataIndex
from selfie.parsers.chat import ChatFileParser
from selfie.types.documents import DocumentDTO
from selfie.utils import data_uri_to_dict
from selfie.utils.data_structures import data_uri_to_dict


class TelegramConfiguration(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion selfie/connectors/text_files/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from selfie.database import BaseModel, DataManager
from selfie.embeddings import EmbeddingDocumentModel
from selfie.types.documents import DocumentDTO
from selfie.utils import data_uri_to_dict
from selfie.utils.data_structures import data_uri_to_dict

config = get_app_config()

Expand Down
2 changes: 1 addition & 1 deletion selfie/connectors/whatsapp/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from selfie.embeddings import EmbeddingDocumentModel, DataIndex
from selfie.parsers.chat import ChatFileParser
from selfie.types.documents import DocumentDTO
from selfie.utils import data_uri_to_dict
from selfie.utils.data_structures import data_uri_to_dict


class WhatsAppConfiguration(BaseModel):
Expand Down
19 changes: 3 additions & 16 deletions selfie/logging.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,15 @@
import os
import logging
import platform
from logging.handlers import RotatingFileHandler

from selfie.utils.filesystem import get_log_path as fs_get_log_path

# TODO: Don't hardcode these
level = logging.INFO
log_file = "selfie.log"


def get_log_path():
os_name = platform.system()

# Set default log directory based on the operating system
if os_name == 'Darwin': # macOS
log_directory = os.path.expanduser('~/Library/Logs/Selfie')
elif os_name == 'Windows':
log_directory = os.path.join(os.environ['APPDATA'], 'Selfie', 'Logs')
else: # Assume Linux/Unix
log_directory = os.path.expanduser('~/Selfie/Logs')

if not os.path.exists(log_directory):
os.makedirs(log_directory)

return os.path.join(log_directory, log_file)
return fs_get_log_path('Selfie', log_file)


def setup_logging():
Expand Down
2 changes: 1 addition & 1 deletion selfie/parsers/chat/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from selfie.parsers.chat.base import JsonBasedChatParser
from selfie.types.share_gpt import ShareGPTConversation
from selfie.utils import check_nested
from selfie.utils.data_structures import check_nested


class Author(BaseModel):
Expand Down
Empty file added selfie/utils/__init__.py
Empty file.
File renamed without changes.
46 changes: 46 additions & 0 deletions selfie/utils/filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import platform


def get_app_dir(app_name, dir_name, roaming=True, log_dir=False):
os_name = platform.system()
if os_name == 'Darwin':
home = os.path.expanduser('~')
if log_dir:
return os.path.join(home, 'Library', 'Logs', app_name, dir_name)
return os.path.join(home, 'Library', 'Application Support', app_name, dir_name)
elif os_name == 'Windows':
if roaming:
root = os.environ.get('APPDATA')
else:
root = os.environ.get('LOCALAPPDATA')
if root is None:
raise OSError("Unable to determine application data directory")
return os.path.join(root, app_name, dir_name)
else:
home = os.path.expanduser('~')
return os.path.join(home, '.' + app_name, dir_name)


def ensure_dir_exists(dir_path):
os.makedirs(dir_path, exist_ok=True)


def get_data_dir(app_name):
return get_app_dir(app_name, 'Data', roaming=True)


def get_log_dir(app_name):
return get_app_dir(app_name, '', log_dir=True)


def get_data_path(app_name, file_name):
data_dir = get_data_dir(app_name)
ensure_dir_exists(data_dir)
return os.path.join(data_dir, file_name)


def get_log_path(app_name, file_name):
log_dir = get_log_dir(app_name)
ensure_dir_exists(log_dir)
return os.path.join(log_dir, file_name)

0 comments on commit 6f56e34

Please sign in to comment.