diff --git a/config/chat-vectorstore-qa.yml b/config/chat-vectorstore-qa.yml index 8765e6d..62802bc 100644 --- a/config/chat-vectorstore-qa.yml +++ b/config/chat-vectorstore-qa.yml @@ -17,8 +17,7 @@ llm: Helpful answer: vector: - vector_path: "qdrant" - # vector_path: ./vectorstore/qdrant # Path to the vectorstore to do QA retrieval + vector_path: ./vectorstore/db_faiss # Path to the vectorstore to do QA retrieval vector_download: null embeddings_path: ./embeddings/all-MiniLM-L6-v2 # Embeddings used to generate the vectors. To use from HF: sentence-transformers/all-MiniLM-L6-v2 embeddings_download: https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/v0.2/all-MiniLM-L6-v2.zip diff --git a/docker-compose.yml b/docker-compose.yml index 07be8d7..e4a21cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,22 +42,22 @@ services: - nginx - qdrant: - # https://hub.docker.com/r/qdrant/qdrant/tags - image: qdrant/qdrant:v1.5.1 - restart: unless-stopped - volumes: - - ./data/vectorstore/qdrant:/qdrant/storage - # - ./scripts/qdrant_config.yml:/qdrant/config/production.yaml - environment: - - QDRANT_ALLOW_RECOVERY_MODE=true - # - VIRTUAL_HOST=qdrant.137.120.31.148.nip.io - # - LETSENCRYPT_HOST=qdrant.137.120.31.148.nip.io - # - VIRTUAL_PORT=6333 - # ports: - # - 6333:6333 - # command: - # - ./qdrant --config-path /qdrant/qdrant_config.yml + # qdrant: + # # https://hub.docker.com/r/qdrant/qdrant/tags + # image: qdrant/qdrant:v1.5.1 + # restart: unless-stopped + # volumes: + # - ./data/vectorstore/qdrant:/qdrant/storage + # # - ./scripts/qdrant_config.yml:/qdrant/config/production.yaml + # environment: + # - QDRANT_ALLOW_RECOVERY_MODE=true + # # - VIRTUAL_HOST=qdrant.137.120.31.148.nip.io + # # - LETSENCRYPT_HOST=qdrant.137.120.31.148.nip.io + # # - VIRTUAL_PORT=6333 + # # ports: + # # - 6333:6333 + # # command: + # # - ./qdrant --config-path /qdrant/qdrant_config.yml # Also required to deploy containers publicly diff --git a/src/libre_chat/llm.py b/src/libre_chat/llm.py index 9a911f6..07772d2 100644 --- a/src/libre_chat/llm.py +++ b/src/libre_chat/llm.py @@ -27,7 +27,7 @@ ) from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.llms import LlamaCpp -from langchain_community.vectorstores import Qdrant +from langchain_community.vectorstores import FAISS from libre_chat.conf import ChatConf, default_conf from libre_chat.utils import BOLD, CYAN, END, log, parallel_download @@ -163,18 +163,16 @@ def get_llm(self, config: Optional[Dict[str, Any]] = None) -> LlamaCpp: def setup_dbqa(self) -> None: """Setup the vectorstore for QA""" if self.has_vectorstore(): - from qdrant_client import QdrantClient - embeddings = HuggingFaceEmbeddings( model_name=self.conf.vector.embeddings_path, model_kwargs={"device": self.device} ) # FAISS should automatically use GPU? - # vectorstore = FAISS.load_local(self.get_vectorstore(), embeddings) - vectorstore = Qdrant( - QdrantClient(host=self.conf.vector.vector_path, prefer_grpc=True), - collection_name="libre_chat_rag", - embeddings=embeddings, - ) + vectorstore = FAISS.load_local(self.get_vectorstore(), embeddings) + # vectorstore = Qdrant( + # QdrantClient(host=self.conf.vector.vector_path, prefer_grpc=True), + # collection_name="libre_chat_rag", + # embeddings=embeddings, + # ) search_args: Dict[str, Any] = {"k": self.conf.vector.return_sources_count} if self.conf.vector.score_threshold is not None: @@ -189,7 +187,7 @@ def setup_dbqa(self) -> None: chain_type_kwargs={"prompt": self.prompt}, ) - def build_vectorstore(self, documents_path: Optional[str] = None) -> Optional[Qdrant]: + def build_vectorstore(self, documents_path: Optional[str] = None) -> Optional[FAISS]: """Build vectorstore from documents.""" # https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/vectorstores/qdrant.py time_start = datetime.now() @@ -226,19 +224,20 @@ def build_vectorstore(self, documents_path: Optional[str] = None) -> Optional[Qd embeddings = HuggingFaceEmbeddings( model_name=self.conf.vector.embeddings_path, model_kwargs={"device": self.device} ) - os.makedirs(str(self.conf.vector.vector_path), exist_ok=True) - vectorstore = Qdrant.from_documents( - splitted_texts, - embeddings, - # path=self.conf.vector.vector_path, - host=self.conf.vector.vector_path, - collection_name="libre_chat_rag", - prefer_grpc=True, - # force_recreate=True, - ) - # vectorstore = FAISS.from_documents(splitted_texts, embeddings) - # if self.vector_path: - # vectorstore.save_local(self.vector_path) + # TODO: use Qdrant vectorstore + # os.makedirs(str(self.conf.vector.vector_path), exist_ok=True) + # vectorstore = Qdrant.from_documents( + # splitted_texts, + # embeddings, + # # path=self.conf.vector.vector_path, + # host=self.conf.vector.vector_path, + # collection_name="libre_chat_rag", + # prefer_grpc=True, + # # force_recreate=True, + # ) + vectorstore = FAISS.from_documents(splitted_texts, embeddings) + if self.vector_path: + vectorstore.save_local(self.vector_path) log.info(f"✅ Vectorstore built in {datetime.now() - time_start}") return vectorstore return None