From c5a0675753a4b72f6cdf47ee8cceed84e94dfedd Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 16:29:22 +0300 Subject: [PATCH 01/17] fix: run tests on pull_request_target --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 4ef8cd5..9436113 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,6 @@ name: Tests -on: [pull_request] +on: [pull_request_target] jobs: test: From dad166be8460170969ef9b3e260b88436bdd7994 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 16:43:41 +0300 Subject: [PATCH 02/17] fix: add secrets to dependabot configuration --- .github/dependabot.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9d866e3..dea7a3b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,8 @@ updates: directory: "/" # Location of package manifests schedule: interval: "weekly" + secrets: + - PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} From 475bd8a62a6319a697f25a4ac20c5c93c2883130 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 16:44:24 +0300 Subject: [PATCH 03/17] fix: revert back to on pull_request --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9436113..4ef8cd5 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,6 @@ name: Tests -on: [pull_request_target] +on: [pull_request] jobs: test: From 36fedb3d74b646b520a7a1161a6eec400b043301 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 17:31:25 +0300 Subject: [PATCH 04/17] fix: GH secrets access in GA run --- .github/dependabot.yml | 6 +----- .github/workflows/tests.yaml | 4 ++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index dea7a3b..182feb7 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,8 +9,4 @@ updates: directory: "/" # Location of package manifests schedule: interval: "weekly" - secrets: - - PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 4ef8cd5..4b46188 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,6 +2,10 @@ name: Tests on: [pull_request] +permissions: + id-token: write + + jobs: test: runs-on: ubuntu-latest From 9cb1930a9159e10d454a5f47754f3f8ea199fb8c Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 18:35:42 +0300 Subject: [PATCH 05/17] chore: add gitignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a8cf33c..305c4f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ src/_openai/__pycache__ venv -src/_openai/main.py \ No newline at end of file +src/_openai/main.py +src/tests/__pycache__ +src/_cohere/__pycache__ +src/_google/__pycache__ +.DS_STORE From 89659265e084a1d1b27b6ebdc4a2e0f19eb620b4 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 18:38:21 +0300 Subject: [PATCH 06/17] Merf --- .github/workflows/tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f78152d..dc1a756 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -58,4 +58,5 @@ jobs: run: | pytest src/tests/openaiindex_test.py pytest src/tests/googleindex_test.py - pytest src/tests/cohereindex_test.py \ No newline at end of file + pytest src/tests/cohereindex_test.py + \ No newline at end of file From 742c0de52519ecb8c05fd6d5b23832760c92fe42 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 18:42:40 +0300 Subject: [PATCH 07/17] chore(dependencies): rename dependabot test run --- .github/workflows/tests.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index dc1a756..6ec52f6 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -32,7 +32,7 @@ jobs: pytest src/tests/googleindex_test.py pytest src/tests/cohereindex_test.py - dependabot-test: + dependabot-test-run: runs-on: ubuntu-latest if: ${{ github.actor == 'dependabot[bot]' }} steps: @@ -58,5 +58,4 @@ jobs: run: | pytest src/tests/openaiindex_test.py pytest src/tests/googleindex_test.py - pytest src/tests/cohereindex_test.py - \ No newline at end of file + pytest src/tests/cohereindex_test.py \ No newline at end of file From 5c3cb9949361c512dd8953dfdb633d78b2d181a4 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Mon, 3 Jun 2024 22:24:44 +0300 Subject: [PATCH 08/17] chore(dependabot): add permissions field --- .github/workflows/tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 6ec52f6..6e235a0 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,6 +2,8 @@ name: Tests on: pull_request +permissions: read-all + jobs: test: From 3a18e28de87250b25f112205ad1fbb21b21c07f7 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Tue, 4 Jun 2024 12:10:35 +0300 Subject: [PATCH 09/17] chore(dependabot): add test trigger on pull request target master branch --- .github/workflows/tests.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 6e235a0..645391e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,8 +1,8 @@ name: Tests -on: pull_request - -permissions: read-all +on: + pull_request_target: + branches: [master] jobs: From 6fdecf2418aa41431e2f43489c8b10be7f741a96 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 5 Jun 2024 10:50:11 +0300 Subject: [PATCH 10/17] update: add dictionary to get reranker models in constant time complexity --- src/utils/rerank.py | 53 +++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/src/utils/rerank.py b/src/utils/rerank.py index 6f93131..c6d38a2 100644 --- a/src/utils/rerank.py +++ b/src/utils/rerank.py @@ -1,49 +1,40 @@ from rerankers import Reranker class RerankerConfig: + SUPPORTED_MODELS = { + 'cohere': {'lang': True, 'api_key': True}, + 'jina': {'api_key': True}, + 'cross-encoder': {'api_key': False}, + 'flashrank': {'api_key': False}, + 't5': {'api_key': False}, + 'rankgpt': {'api_key': True}, + 'rankgpt3': {'api_key': True}, + 'colbert': {'api_key': False}, + 'mixedbread-ai/mxbai-rerank-large-v1': {'model_type': True}, + 'ce-esci-MiniLM-L12-v2': {'model_type': True}, + 'unicamp-dl/InRanker-base': {'model_type': True}, + } @staticmethod - def get_ranker(rerank_model: str, model_type: str = None, lang: str = None, api_key: str = None, api_provider: str = None) -> Reranker: + def get_ranker(rerank_model: str, lang: str = None, api_key: str = None, model_type: str = None) -> Reranker: """ Returns a Reranker instance based on the provided parameters. Args: rerank_model (str): The name or path of the model. - model_type (str, optional): The type of the model. Defaults to None. lang (str, optional): The language for multilingual models. Defaults to None. api_key (str, optional): The API key for models accessed through an API. Defaults to None. - api_provider (str, optional): The provider of the API. Defaults to None. + model_type (str, optional): The model type of a reranker, defaults to None. Returns: Reranker: An instance of Reranker. Raises: - ValueError: If unsupported model_type is provided. + ValueError: If unsupported rerank_model is provided. """ - if rerank_model and rerank_model not in ["cross-encoder", "flashrank", "t5", "rankgpt", "colbert", "mixedbread-ai/mxbai-rerank-large-v1", "ce-esci-MiniLM-L12-v2", "unicamp-dl/InRanker-base", "jina", - "rankgpt", "rankgpt3"]: - raise ValueError("Unsupported model_type provided.") + if rerank_model not in RerankerConfig.SUPPORTED_MODELS: + raise ValueError(f"Unsupported rerank_model provided: {rerank_model}") - if rerank_model == 'cohere': - return Reranker(rerank_model, lang=lang, api_key=api_key) - elif rerank_model == 'jina': - return Reranker(rerank_model, api_key=api_key) - elif rerank_model == 'cross-encoder': - return Reranker(rerank_model) - elif rerank_model == 'flashrank': - return Reranker(rerank_model) - elif rerank_model == 't5': - return Reranker(rerank_model) - elif rerank_model == 'rankgpt': - return Reranker(rerank_model, api_key=api_key) - elif rerank_model == 'rankgpt3': - return Reranker(rerank_model, api_key=api_key) - elif rerank_model == 'colbert': - return Reranker(rerank_model) - elif rerank_model == "mixedbread-ai/mxbai-rerank-large-v1": - return Reranker(rerank_model, model_type='cross-encoder') - elif rerank_model == "ce-esci-MiniLM-L12-v2": - return Reranker(rerank_model, model_type='flashrank') - elif rerank_model == "unicamp-dl/InRanker-base": - return Reranker(rerank_model, model_type='t5') - else: - return None + model_config = RerankerConfig.SUPPORTED_MODELS[rerank_model] + return Reranker(rerank_model, lang=lang if model_config.get('lang') else None, + api_key=api_key if model_config.get('api_key') else None, + model_type=model_type if model_config.get('model_type') else None) \ No newline at end of file From a0b1cbe47297ed8ad5eeadcd5288a345f89d4f74 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 5 Jun 2024 11:12:20 +0300 Subject: [PATCH 11/17] fix: remove api_provider parameter from retrieve_and_generate method --- src/_cohere/doc_index.py | 7 ++----- src/_google/doc_index.py | 7 ++----- src/_openai/doc_index.py | 7 ++----- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/_cohere/doc_index.py b/src/_cohere/doc_index.py index 343ed9a..b201b6b 100644 --- a/src/_cohere/doc_index.py +++ b/src/_cohere/doc_index.py @@ -242,8 +242,7 @@ def retrieve_and_generate( rerank_model: str = 'flashrank', model_type: Optional[str] = None, lang: Optional[str] = None, - api_key: Optional[str] = None, - api_provider: Optional[str] = None, + api_key: Optional[str] = None ) -> QueryResult: """ Retrieve documents from the Pinecone index and generate a response. @@ -256,7 +255,6 @@ def retrieve_and_generate( model_type (str, optional): The type of the model (e.g., 'cross-encoder', 'flashrank', 't5', etc.). lang (str, optional): The language for multilingual models. api_key (str, optional): The API key for models accessed through an API. - api_provider (str, optional): The provider of the API. Returns: QueryResult: A Pydantic model representing the generated response. @@ -274,8 +272,7 @@ def retrieve_and_generate( rerank_model, model_type, lang, - api_key, - api_provider + api_key ) compressor = ranker.as_langchain_compressor(k=top_k) compression_retriever = ContextualCompressionRetriever( diff --git a/src/_google/doc_index.py b/src/_google/doc_index.py index c8d07f9..eb32129 100644 --- a/src/_google/doc_index.py +++ b/src/_google/doc_index.py @@ -255,8 +255,7 @@ def retrieve_and_generate( rerank_model: str = 'flashrank', model_type: Optional[str] = None, lang: Optional[str] = None, - api_key: Optional[str] = None, - api_provider: Optional[str] = None, + api_key: Optional[str] = None ) -> QueryResult: """ Retrieve documents from the Pinecone index and generate a response. @@ -269,7 +268,6 @@ def retrieve_and_generate( model_type (str, optional): The type of the model (e.g., 'cross-encoder', 'flashrank', 't5', etc.). lang (str, optional): The language for multilingual models. api_key (str, optional): The API key for models accessed through an API. - api_provider (str, optional): The provider of the API. Returns: QueryResult: A Pydantic model representing the generated response. @@ -287,8 +285,7 @@ def retrieve_and_generate( rerank_model, model_type, lang, - api_key, - api_provider + api_key ) compressor = ranker.as_langchain_compressor(k=top_k) compression_retriever = ContextualCompressionRetriever( diff --git a/src/_openai/doc_index.py b/src/_openai/doc_index.py index f20d056..ef572d5 100644 --- a/src/_openai/doc_index.py +++ b/src/_openai/doc_index.py @@ -255,7 +255,6 @@ def retrieve_and_generate( model_type: Optional[str] = None, lang: Optional[str] = None, api_key: Optional[str] = None, - api_provider: Optional[str] = None, ) -> QueryResult: """ Retrieve documents from the Pinecone index and generate a response. @@ -268,7 +267,6 @@ def retrieve_and_generate( model_type (str, optional): The type of the model (e.g., 'cross-encoder', 'flashrank', 't5', etc.). lang (str, optional): The language for multilingual models. api_key (str, optional): The API key for models accessed through an API. - api_provider (str, optional): The provider of the API. Returns: QueryResult: A Pydantic model representing the generated response. @@ -284,10 +282,9 @@ def retrieve_and_generate( retriever = vector_store.as_retriever() ranker = RerankerConfig.get_ranker( rerank_model, - model_type, lang, - api_key, - api_provider + api_key, + model_type, ) compressor = ranker.as_langchain_compressor(k=top_k) compression_retriever = ContextualCompressionRetriever( From 9ed15a966fa7c88dc23c50b7e2a0cc07dc9e84b2 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 5 Jun 2024 17:45:02 +0300 Subject: [PATCH 12/17] chore(dependencies): add rerankers package --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 3dde6b7..4435274 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,7 @@ install_requires = markdown==3.6 langchain-core==0.1.46 langchain-cohere==0.1.4 + rerankers[all]==0.2.0 package_dir= =src From b244dec9df0c550e46b08a34a9688f8d36a76d43 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 5 Jun 2024 17:45:58 +0300 Subject: [PATCH 13/17] fix: get reranker method and implementation in test --- src/tests/openaiindex_test.py | 3 ++- src/utils/rerank.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py index 8ab1c40..cc9f7fd 100644 --- a/src/tests/openaiindex_test.py +++ b/src/tests/openaiindex_test.py @@ -67,7 +67,8 @@ def test_04_retrieve_and_generate(self): query = "give a short summary of the introduction", vector_store = vectorstore, top_k = 3, - rerank_model = "t5" + # lang= "en", + rerank_model = "flashrank" ) self.assertIsNotNone(response, "The retriever response should not be None.") diff --git a/src/utils/rerank.py b/src/utils/rerank.py index c6d38a2..f7733a0 100644 --- a/src/utils/rerank.py +++ b/src/utils/rerank.py @@ -4,12 +4,12 @@ class RerankerConfig: SUPPORTED_MODELS = { 'cohere': {'lang': True, 'api_key': True}, 'jina': {'api_key': True}, - 'cross-encoder': {'api_key': False}, - 'flashrank': {'api_key': False}, - 't5': {'api_key': False}, + 'cross-encoder': {}, + 'flashrank': {}, + 't5': {}, 'rankgpt': {'api_key': True}, 'rankgpt3': {'api_key': True}, - 'colbert': {'api_key': False}, + 'colbert': {}, 'mixedbread-ai/mxbai-rerank-large-v1': {'model_type': True}, 'ce-esci-MiniLM-L12-v2': {'model_type': True}, 'unicamp-dl/InRanker-base': {'model_type': True}, @@ -35,6 +35,11 @@ def get_ranker(rerank_model: str, lang: str = None, api_key: str = None, model_t raise ValueError(f"Unsupported rerank_model provided: {rerank_model}") model_config = RerankerConfig.SUPPORTED_MODELS[rerank_model] - return Reranker(rerank_model, lang=lang if model_config.get('lang') else None, - api_key=api_key if model_config.get('api_key') else None, - model_type=model_type if model_config.get('model_type') else None) \ No newline at end of file + init_kwargs = { + 'lang': lang if model_config.get('lang') else None, + 'api_key': api_key if model_config.get('api_key') else None, + 'model_type': model_type if model_config.get('model_type') else None + } + init_kwargs = {k: v for k, v in init_kwargs.items() if v is not None} + return Reranker(rerank_model, **init_kwargs) + From 74cf04d1a6d53252c2c81cd9fce0fe8fadc57e53 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Tue, 11 Jun 2024 07:40:37 +0300 Subject: [PATCH 14/17] feat: Add pydantic_parser parameter to retrieve_and_generate --- src/_openai/doc_index.py | 29 +++++++++++++++++++---------- src/tests/openaiindex_test.py | 6 ++++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/_openai/doc_index.py b/src/_openai/doc_index.py index ef572d5..7f041a1 100644 --- a/src/_openai/doc_index.py +++ b/src/_openai/doc_index.py @@ -251,6 +251,7 @@ def retrieve_and_generate( query: str, vector_store: str, top_k: int =3, + pydantic_parser: bool = True, rerank_model: str = 'flashrank', model_type: Optional[str] = None, lang: Optional[str] = None, @@ -263,6 +264,7 @@ def retrieve_and_generate( query (str): The query from the user. vector_store (str): The name of the Pinecone index. top_k (int, optional): The number of documents to retrieve from the index (default is 3). + pydantic_parser (bool, optional): Whether to use Pydantic parsing for the generated response (default is True). rerank_model (str, optional): The name or path of the model to use for ranking (default is 'flashrank'). model_type (str, optional): The type of the model (e.g., 'cross-encoder', 'flashrank', 't5', etc.). lang (str, optional): The language for multilingual models. @@ -291,16 +293,23 @@ def retrieve_and_generate( base_compressor=compressor, base_retriever=retriever ) - - rag_chain = ( - {"context": itemgetter("query")| compression_retriever, - "query": itemgetter("query"), - } - | rag_prompt - | llm - | parser - ) - + if pydantic_parser: + rag_chain = ( + {"context": itemgetter("query")| compression_retriever, + "query": itemgetter("query"), + } + | rag_prompt + | llm + | parser + ) + else: + rag_chain = ( + {"context": itemgetter("query")| compression_retriever, + "query": itemgetter("query"), + } + | rag_prompt + | llm + ) return rag_chain.invoke({"query": query}) diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py index cc9f7fd..93692a2 100644 --- a/src/tests/openaiindex_test.py +++ b/src/tests/openaiindex_test.py @@ -42,7 +42,7 @@ def test_02_index_documents(self, mock_print): Test indexing documents and assert the type of the index. """ urls = [ - "https://arxiv.org/pdf/1706.03762.pdf", + "https://arxiv.org/pdf/1706.03762.pdf" "src/tests/DOCX_TestPage.docx", "src/tests/TEST.md", "src/tests/test.html" @@ -68,8 +68,10 @@ def test_04_retrieve_and_generate(self): vector_store = vectorstore, top_k = 3, # lang= "en", - rerank_model = "flashrank" + rerank_model = "flashrank", + pydantic_parser=True ) + print(response) self.assertIsNotNone(response, "The retriever response should not be None.") @patch('sys.stdout', new_callable=StringIO) From 72439041cef034c62c5903a8397a2e4274d5a323 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Tue, 11 Jun 2024 07:41:19 +0300 Subject: [PATCH 15/17] feat: Add pydantic_parser parameter to retrieve_and_generate --- src/_cohere/doc_index.py | 27 +++++++++++++++++++-------- src/tests/cohereindex_test.py | 27 ++++++++++++++------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/_cohere/doc_index.py b/src/_cohere/doc_index.py index b201b6b..c5685f5 100644 --- a/src/_cohere/doc_index.py +++ b/src/_cohere/doc_index.py @@ -239,6 +239,7 @@ def retrieve_and_generate( query: str, vector_store: str, top_k: int =3, + pydantic_parser: bool = True, rerank_model: str = 'flashrank', model_type: Optional[str] = None, lang: Optional[str] = None, @@ -251,6 +252,7 @@ def retrieve_and_generate( query (str): The query from the user. vector_store (str): The name of the Pinecone index. top_k (int, optional): The number of documents to retrieve from the index (default is 3). + pydantic_parser (bool, optional): Whether to use Pydantic parsing for the generated response (default is True). rerank_model (str, optional): The name or path of the model to use for ranking (default is 'flashrank'). model_type (str, optional): The type of the model (e.g., 'cross-encoder', 'flashrank', 't5', etc.). lang (str, optional): The language for multilingual models. @@ -280,13 +282,22 @@ def retrieve_and_generate( base_retriever=retriever ) - rag_chain = ( - {"context": itemgetter("query")| compression_retriever, - "query": itemgetter("query"), - } - | rag_prompt - | llm - | parser - ) + if pydantic_parser: + rag_chain = ( + {"context": itemgetter("query")| compression_retriever, + "query": itemgetter("query"), + } + | rag_prompt + | llm + | parser + ) + else: + rag_chain = ( + {"context": itemgetter("query")| compression_retriever, + "query": itemgetter("query"), + } + | rag_prompt + | llm + ) return rag_chain.invoke({"query": query}) diff --git a/src/tests/cohereindex_test.py b/src/tests/cohereindex_test.py index b284fa4..5b8e67c 100644 --- a/src/tests/cohereindex_test.py +++ b/src/tests/cohereindex_test.py @@ -54,19 +54,20 @@ def test_03_initialize_vectorstore(self): vectorstore = self.indexer.initialize_vectorstore(self.index_name) self.assertIsInstance(vectorstore, PineconeVectorStore) - # def test_04_retrieve_and_generate(self): - # """ - # Test initializing the vector store and assert its type. - # """ - # vector_store = self.indexer.initialize_vectorstore(self.index_name) - # response = self.indexer.retrieve_and_generate( - # query = "give a short summary of the introduction", - # vector_store = vector_store, - # top_k = 3, - # rerank_model = "t5" - # ) - # print(response) - # self.assertIsNotNone(response, "The retriever response should not be None.") + def test_04_retrieve_and_generate(self): + """ + Test initializing the vector store and assert its type. + """ + vector_store = self.indexer.initialize_vectorstore(self.index_name) + response = self.indexer.retrieve_and_generate( + query = "give a short summary of the introduction", + vector_store = vector_store, + top_k = 1, + pydantic_parser=False, + rerank_model = "flashrank" + ) + print(response) + self.assertIsNotNone(response, "The retriever response should not be None.") @patch('sys.stdout', new_callable=StringIO) def test_05_delete_index(self, mock_stdout): From 68a6bba8e531ddf44603c707fac4c118cfe3207e Mon Sep 17 00:00:00 2001 From: KevKibe Date: Tue, 11 Jun 2024 07:51:12 +0300 Subject: [PATCH 16/17] fix: pydantic parser to false --- src/tests/openaiindex_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py index 93692a2..d081cda 100644 --- a/src/tests/openaiindex_test.py +++ b/src/tests/openaiindex_test.py @@ -69,7 +69,7 @@ def test_04_retrieve_and_generate(self): top_k = 3, # lang= "en", rerank_model = "flashrank", - pydantic_parser=True + pydantic_parser=False ) print(response) self.assertIsNotNone(response, "The retriever response should not be None.") From 836d950ed9ea14e1c890853df1349fe777ac3abd Mon Sep 17 00:00:00 2001 From: KevKibe Date: Tue, 11 Jun 2024 08:05:17 +0300 Subject: [PATCH 17/17] fix: source documents list --- src/tests/openaiindex_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py index d081cda..5a1a92f 100644 --- a/src/tests/openaiindex_test.py +++ b/src/tests/openaiindex_test.py @@ -42,7 +42,7 @@ def test_02_index_documents(self, mock_print): Test indexing documents and assert the type of the index. """ urls = [ - "https://arxiv.org/pdf/1706.03762.pdf" + "https://arxiv.org/pdf/1706.03762.pdf", "src/tests/DOCX_TestPage.docx", "src/tests/TEST.md", "src/tests/test.html"