diff --git a/app/data_source/sources/slack/slack.py b/app/data_source/sources/slack/slack.py
index d9e7593..2996740 100644
--- a/app/data_source/sources/slack/slack.py
+++ b/app/data_source/sources/slack/slack.py
@@ -29,6 +29,7 @@ class SlackAuthor:
class SlackConfig(BaseDataSourceConfig):
token: str
+ filters: str
class SlackDataSource(BaseDataSource):
@@ -36,7 +37,10 @@ class SlackDataSource(BaseDataSource):
@staticmethod
def get_config_fields() -> List[ConfigField]:
- return [ConfigField(label="Bot User OAuth Token", name="token", type=HTMLInputType.PASSWORD)]
+ return [
+ ConfigField(label="Bot User OAuth Token", name="token", type=HTMLInputType.PASSWORD),
+ ConfigField(label="Channel name filters", name="filters", type=HTMLInputType.TEXT),
+ ]
@staticmethod
async def validate_config(config: Dict) -> None:
@@ -52,11 +56,31 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
slack_config = SlackConfig(**self._raw_config)
self._slack = WebClient(token=slack_config.token)
+ self.filters = [_filter.strip() for _filter in slack_config.filters.split(",")] if slack_config.filters else []
self._authors_cache: Dict[str, SlackAuthor] = {}
def _list_conversations(self) -> List[SlackConversation]:
+ all_conversations = []
conversations = self._slack.conversations_list(exclude_archived=True, limit=1000)
- return [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]]
+ all_conversations.extend(
+ [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]]
+ )
+ while conversations["response_metadata"]["next_cursor"]:
+ conversations = self._slack.conversations_list(
+ exclude_archived=True, limit=1000, cursor=conversations["response_metadata"]["next_cursor"]
+ )
+ all_conversations.extend(
+ [SlackConversation(id=conv["id"], name=conv["name"]) for conv in conversations["channels"]]
+ )
+ logger.info(f"Found {len(all_conversations)} conversations")
+ if self.filters:
+ filtered_conversations = [
+ conv for conv in all_conversations if any([_filter in conv.name for _filter in self.filters])
+ ]
+ logger.info(f"Found {len(filtered_conversations)} conversations after filtering")
+ return filtered_conversations
+ else:
+ return all_conversations
def _feed_conversations(self, conversations: List[SlackConversation]) -> List[SlackConversation]:
joined_conversations = []
diff --git a/app/paths.py b/app/paths.py
index f8085b7..80b0204 100644
--- a/app/paths.py
+++ b/app/paths.py
@@ -1,20 +1,30 @@
-from pathlib import Path
+import getpass
import os
+from pathlib import Path
+from sys import platform
-IS_IN_DOCKER = os.environ.get('DOCKER_DEPLOYMENT', False)
+IS_IN_DOCKER = os.environ.get("DOCKER_DEPLOYMENT", False)
-if os.name == 'nt':
+if os.name == "nt":
STORAGE_PATH = Path(".gerev\\storage")
+if platform == "darwin":
+ STORAGE_PATH = Path(f"/Users/{getpass.getuser()}/.gerev/storage/")
else:
- STORAGE_PATH = Path('/opt/storage/') if IS_IN_DOCKER else Path(f'/home/{os.getlogin()}/.gerev/storage/')
+ STORAGE_PATH = Path("/opt/storage/") if IS_IN_DOCKER else Path(f"/home/{os.getlogin()}/.gerev/storage/")
+
+try:
+ STORAGE_PATH_EXISTS = STORAGE_PATH.exists()
+except PermissionError:
+ STORAGE_PATH = Path(f"home/{os.getlogin()}/.gerev/storage/") # remove leading /
+ STORAGE_PATH_EXISTS = STORAGE_PATH.exists()
-if not STORAGE_PATH.exists():
+if not STORAGE_PATH_EXISTS:
STORAGE_PATH.mkdir(parents=True)
-UI_PATH = Path('/ui/') if IS_IN_DOCKER else Path('../ui/build/')
-SQLITE_DB_PATH = STORAGE_PATH / 'db.sqlite3'
-SQLITE_TASKS_PATH = STORAGE_PATH / 'tasks.sqlite3'
-SQLITE_INDEXING_PATH = STORAGE_PATH / 'indexing.sqlite3'
-FAISS_INDEX_PATH = str(STORAGE_PATH / 'faiss_index.bin')
-BM25_INDEX_PATH = str(STORAGE_PATH / 'bm25_index.bin')
-UUID_PATH = str(STORAGE_PATH / '.uuid')
+UI_PATH = Path("/ui/") if IS_IN_DOCKER else Path("../ui/build/")
+SQLITE_DB_PATH = STORAGE_PATH / "db.sqlite3"
+SQLITE_TASKS_PATH = STORAGE_PATH / "tasks.sqlite3"
+SQLITE_INDEXING_PATH = STORAGE_PATH / "indexing.sqlite3"
+FAISS_INDEX_PATH = str(STORAGE_PATH / "faiss_index.bin")
+BM25_INDEX_PATH = str(STORAGE_PATH / "bm25_index.bin")
+UUID_PATH = str(STORAGE_PATH / ".uuid")
diff --git a/app/requirements.txt b/app/requirements.txt
index 43c2443..d012040 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -16,6 +16,7 @@ fastapi-restful
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
+greenlet # needed to run on mac m1
oauth2client
mammoth
python-pptx
diff --git a/ui/src/components/data-source-panel.tsx b/ui/src/components/data-source-panel.tsx
index 4a2e6b4..3b8c7f6 100644
--- a/ui/src/components/data-source-panel.tsx
+++ b/ui/src/components/data-source-panel.tsx
@@ -355,6 +355,8 @@ export default class DataSourcePanel extends React.Component
*Gerev bot will join your channels.
+ 6. {"(Optional) Add filters to limit the channels to fetch. Gerev will only look for messages in the channels containing the filters you input."}
+ {'Example: setting "errors, operations" in the filters field will cause Gerev to index only the channels containg "errors" and "operations"'}
)
}