diff --git a/app/data_source/sources/slack/slack.py b/app/data_source/sources/slack/slack.py index d9e7593..2996740 100644 --- a/app/data_source/sources/slack/slack.py +++ b/app/data_source/sources/slack/slack.py @@ -29,6 +29,7 @@ class SlackAuthor: class SlackConfig(BaseDataSourceConfig): token: str + filters: str class SlackDataSource(BaseDataSource): @@ -36,7 +37,10 @@ class SlackDataSource(BaseDataSource): @staticmethod def get_config_fields() -> List[ConfigField]: - return [ConfigField(label="Bot User OAuth Token", name="token", type=HTMLInputType.PASSWORD)] + return [ + ConfigField(label="Bot User OAuth Token", name="token", type=HTMLInputType.PASSWORD), + ConfigField(label="Channel name filters", name="filters", type=HTMLInputType.TEXT), + ] @staticmethod async def validate_config(config: Dict) -> None: @@ -52,11 +56,31 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) slack_config = SlackConfig(**self._raw_config) self._slack = WebClient(token=slack_config.token) + self.filters = [_filter.strip() for _filter in slack_config.filters.split(",")] if slack_config.filters else [] self._authors_cache: Dict[str, SlackAuthor] = {} def _list_conversations(self) -> List[SlackConversation]: + all_conversations = [] conversations = self._slack.conversations_list(exclude_archived=True, limit=1000) - return [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]] + all_conversations.extend( + [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]] + ) + while conversations["response_metadata"]["next_cursor"]: + conversations = self._slack.conversations_list( + exclude_archived=True, limit=1000, cursor=conversations["response_metadata"]["next_cursor"] + ) + all_conversations.extend( + [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]] + ) + logger.info(f"Found {len(all_conversations)} conversations") + if self.filters: + filtered_conversations = [ + conv for conv in all_conversations if any([_filter in conv.name for _filter in self.filters]) + ] + logger.info(f"Found {len(filtered_conversations)} conversations after filtering") + return filtered_conversations + else: + return all_conversations def _feed_conversations(self, conversations: List[SlackConversation]) -> List[SlackConversation]: joined_conversations = [] diff --git a/app/paths.py b/app/paths.py index f8085b7..80b0204 100644 --- a/app/paths.py +++ b/app/paths.py @@ -1,20 +1,30 @@ -from pathlib import Path +import getpass import os +from pathlib import Path +from sys import platform -IS_IN_DOCKER = os.environ.get('DOCKER_DEPLOYMENT', False) +IS_IN_DOCKER = os.environ.get("DOCKER_DEPLOYMENT", False) -if os.name == 'nt': +if os.name == "nt": STORAGE_PATH = Path(".gerev\\storage") +if platform == "darwin": + STORAGE_PATH = Path(f"/Users/{getpass.getuser()}/.gerev/storage/") else: - STORAGE_PATH = Path('/opt/storage/') if IS_IN_DOCKER else Path(f'/home/{os.getlogin()}/.gerev/storage/') + STORAGE_PATH = Path("/opt/storage/") if IS_IN_DOCKER else Path(f"/home/{os.getlogin()}/.gerev/storage/") + +try: + STORAGE_PATH_EXISTS = STORAGE_PATH.exists() +except PermissionError: + STORAGE_PATH = Path(f"home/{os.getlogin()}/.gerev/storage/") # remove leading / + STORAGE_PATH_EXISTS = STORAGE_PATH.exists() -if not STORAGE_PATH.exists(): +if not STORAGE_PATH_EXISTS: STORAGE_PATH.mkdir(parents=True) -UI_PATH = Path('/ui/') if IS_IN_DOCKER else Path('../ui/build/') -SQLITE_DB_PATH = STORAGE_PATH / 'db.sqlite3' -SQLITE_TASKS_PATH = STORAGE_PATH / 'tasks.sqlite3' -SQLITE_INDEXING_PATH = STORAGE_PATH / 'indexing.sqlite3' -FAISS_INDEX_PATH = str(STORAGE_PATH / 'faiss_index.bin') -BM25_INDEX_PATH = str(STORAGE_PATH / 'bm25_index.bin') -UUID_PATH = str(STORAGE_PATH / '.uuid') +UI_PATH = Path("/ui/") if IS_IN_DOCKER else Path("../ui/build/") +SQLITE_DB_PATH = STORAGE_PATH / "db.sqlite3" +SQLITE_TASKS_PATH = STORAGE_PATH / "tasks.sqlite3" +SQLITE_INDEXING_PATH = STORAGE_PATH / "indexing.sqlite3" +FAISS_INDEX_PATH = str(STORAGE_PATH / "faiss_index.bin") +BM25_INDEX_PATH = str(STORAGE_PATH / "bm25_index.bin") +UUID_PATH = str(STORAGE_PATH / ".uuid") diff --git a/app/requirements.txt b/app/requirements.txt index 43c2443..d012040 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -16,6 +16,7 @@ fastapi-restful google-api-python-client google-auth-httplib2 google-auth-oauthlib +greenlet # needed to run on mac m1 oauth2client mammoth python-pptx diff --git a/ui/src/components/data-source-panel.tsx b/ui/src/components/data-source-panel.tsx index 4a2e6b4..3b8c7f6 100644 --- a/ui/src/components/data-source-panel.tsx +++ b/ui/src/components/data-source-panel.tsx @@ -355,6 +355,8 @@ export default class DataSourcePanel extends React.Component *Gerev bot will join your channels. + 6. {"(Optional) Add filters to limit the channels to fetch. Gerev will only look for messages in the channels containing the filters you input."} + {'Example: setting "errors, operations" in the filters field will cause Gerev to index only the channels containg "errors" and "operations"'} ) }