From 74d29e211344599f0e37b7211659730fefdf6eba Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 15:59:44 +0300 Subject: [PATCH 01/10] add test for _google.docindex module --- src/tests/__init__.py | 0 src/tests/googleindex_test.py | 75 +++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 src/tests/__init__.py create mode 100644 src/tests/googleindex_test.py diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/googleindex_test.py b/src/tests/googleindex_test.py new file mode 100644 index 0000000..899399c --- /dev/null +++ b/src/tests/googleindex_test.py @@ -0,0 +1,75 @@ +import unittest +from _google.docindex import GooglePineconeIndexer +import os +from io import StringIO +from unittest.mock import patch +import pinecone +from langchain_pinecone import PineconeVectorStore + +class TestGooglePineconeIndexer(unittest.TestCase): + """ + Test case class for the GooglePineconeIndexer. + """ + + def setUp(self): + """ + Set up the test case with common attributes. + """ + self.index_name = "new-index-1" + self.pinecone_api_key = os.environ.get('PINECONE_API_KEY') + self.google_api_key = os.environ.get('GOOGLE_API_KEY') + self.indexer = GooglePineconeIndexer(self.index_name, self.pinecone_api_key, self.google_api_key) + + @patch('sys.stdout', new_callable=StringIO) + def test_create_index(self, mock_stdout): + """ + Test creating an index and assert the output. + """ + self.indexer.create_index() + printed_output = mock_stdout.getvalue().strip() + lines = printed_output.split('\n') + index_created_message_0 = lines[0] + self.assertEqual(index_created_message_0, f"Creating index {self.index_name}") + index_created_message_1 = lines[1] + self.assertEqual(index_created_message_1, f"Index {self.index_name} created successfully!") + + @patch('builtins.print') + def test_index_documents(self, mock_print): + """ + Test indexing documents and assert the type of the index. + """ + urls = ["https://arxiv.org/pdf/1706.03762.pdf"] + self.indexer.index_documents(urls, batch_limit=10, chunk_size=256) + index = self.indexer.pc.Index(self.index_name) + self.assertIsInstance(index, pinecone.data.index.Index) + + def test_initialize_vectorstore(self): + """ + Test initializing the vector store and assert its type. + """ + vectorstore = self.indexer.initialize_vectorstore(self.index_name) + self.assertIsInstance(vectorstore, PineconeVectorStore) + + @patch('sys.stdout', new_callable=StringIO) + def test_delete_index(self, mock_stdout): + """ + Test deleting an index and assert the output. + """ + self.indexer.delete_index() + printed_output = mock_stdout.getvalue().strip() + lines = printed_output.split('\n') + index_deleted_message_0 = lines[0] + self.assertEqual(index_deleted_message_0, f"Deleting index {self.index_name}") + index_deleted_message_1 = lines[1] + self.assertEqual(index_deleted_message_1, f"Index {self.index_name} deleted successfully!") + + @classmethod + def sort_test_methods(cls, testCaseClass, testCaseNames): + """ + Sort test methods for better readability. + """ + return sorted(testCaseNames) + +if __name__ == "__main__": + unittest.TestLoader.sortTestMethodsUsing = TestGooglePineconeIndexer.sort_test_methods + unittest.main() From cd16235557cda5b3485b6ef01ac7809b55543b85 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:00:24 +0300 Subject: [PATCH 02/10] add tests github workflow --- .github/workflows/tests.yaml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/tests.yaml diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..dfd4020 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,29 @@ +name: Tests + +on: [pull_request, push] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + pip install -r requirements.txt --no-cache-dir + pip install pytest einops lion-pytorch + + - name: Test with pytest + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + run: | + pytest + \ No newline at end of file From 96af7b8488602088196c03b0eba048c2cd09314d Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:03:35 +0300 Subject: [PATCH 03/10] chore: upgrade unstructured dependency to 0.13.2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 37040df..c2c3bd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pinecone-client==3.2.2 tiktoken==0.6.0 pypdf==4.1.0 -unstructured==0.12.6 +unstructured==0.13.2 langchain-community==0.0.31 langchain==0.1.14 langchain-openai==0.1.1 From ca73bd1085dd614c1a51537446e9eff0489c18be Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:05:19 +0300 Subject: [PATCH 04/10] chore: upgrade checkoput repo and setup python steps in workflow --- .github/workflows/tests.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index dfd4020..0f7f4cb 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -8,12 +8,12 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: '3.11' - name: Install dependencies run: | From 5b16ffc2ce168c481de0489b34c316e98cde8282 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:14:18 +0300 Subject: [PATCH 05/10] renaming test methods to execute them in order --- src/tests/googleindex_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/googleindex_test.py b/src/tests/googleindex_test.py index 899399c..5106092 100644 --- a/src/tests/googleindex_test.py +++ b/src/tests/googleindex_test.py @@ -21,7 +21,7 @@ def setUp(self): self.indexer = GooglePineconeIndexer(self.index_name, self.pinecone_api_key, self.google_api_key) @patch('sys.stdout', new_callable=StringIO) - def test_create_index(self, mock_stdout): + def test_01_create_index(self, mock_stdout): """ Test creating an index and assert the output. """ @@ -34,7 +34,7 @@ def test_create_index(self, mock_stdout): self.assertEqual(index_created_message_1, f"Index {self.index_name} created successfully!") @patch('builtins.print') - def test_index_documents(self, mock_print): + def test_02_index_documents(self, mock_print): """ Test indexing documents and assert the type of the index. """ @@ -43,7 +43,7 @@ def test_index_documents(self, mock_print): index = self.indexer.pc.Index(self.index_name) self.assertIsInstance(index, pinecone.data.index.Index) - def test_initialize_vectorstore(self): + def test_03_initialize_vectorstore(self): """ Test initializing the vector store and assert its type. """ @@ -51,7 +51,7 @@ def test_initialize_vectorstore(self): self.assertIsInstance(vectorstore, PineconeVectorStore) @patch('sys.stdout', new_callable=StringIO) - def test_delete_index(self, mock_stdout): + def test_04_delete_index(self, mock_stdout): """ Test deleting an index and assert the output. """ From f3b884e34a5f9d5da4c1ac4ad6a5bd46d82ece3d Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:19:07 +0300 Subject: [PATCH 06/10] add return statement to index_documents method --- src/_openai/docindex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/_openai/docindex.py b/src/_openai/docindex.py index a09f375..8c77621 100644 --- a/src/_openai/docindex.py +++ b/src/_openai/docindex.py @@ -186,6 +186,7 @@ def index_documents(self, urls: List[str], batch_limit: int, chunk_size: int = 2 index = self.pc.Index(self.index_name) print(index.describe_index_stats()) print("Indexing complete.") + return index def initialize_vectorstore(self, index_name): index = self.pc.Index(index_name) From b11311fa6dbcb73b0a90f61f6969c200a83c556f Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:19:57 +0300 Subject: [PATCH 07/10] add tests for _openai.docindex module --- src/tests/openaiindex_test.py | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/tests/openaiindex_test.py diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py new file mode 100644 index 0000000..ac56007 --- /dev/null +++ b/src/tests/openaiindex_test.py @@ -0,0 +1,75 @@ +import unittest +from _openai.docindex import OpenaiPineconeIndexer +import os +from io import StringIO +from unittest.mock import patch +import pinecone +from langchain_pinecone import PineconeVectorStore + +class TestOpenaiPineconeIndexer(unittest.TestCase): + """ + Test case class for the OpenaiPineconeIndexer. + """ + + def setUp(self): + """ + Set up the test case with common attributes. + """ + self.index_name = "new-index-1" + self.pinecone_api_key = os.environ.get('PINECONE_API_KEY') + self.openai_api_key = os.environ.get('OPENAI_API_KEY') + self.indexer = OpenaiPineconeIndexer(self.index_name, self.pinecone_api_key, self.openai_api_key) + + @patch('sys.stdout', new_callable=StringIO) + def test_01_create_index(self, mock_stdout): + """ + Test creating an index and assert the output. + """ + self.indexer.create_index() + printed_output = mock_stdout.getvalue().strip() + lines = printed_output.split('\n') + index_created_message_0 = lines[0] + self.assertEqual(index_created_message_0, f"Creating index {self.index_name}") + index_created_message_1 = lines[1] + self.assertEqual(index_created_message_1, f"Index {self.index_name} created successfully!") + + @patch('builtins.print') + def test_02_index_documents(self, mock_print): + """ + Test indexing documents and assert the type of the index. + """ + urls = ["https://arxiv.org/pdf/1706.03762.pdf"] + self.indexer.index_documents(urls, batch_limit=10, chunk_size=256) + index = self.indexer.pc.Index(self.index_name) + self.assertIsInstance(index, pinecone.data.index.Index) + + def test_initialize_vectorstore(self): + """ + Test initializing the vector store and assert its type. + """ + vectorstore = self.indexer.initialize_vectorstore(self.index_name) + self.assertIsInstance(vectorstore, PineconeVectorStore) + + @patch('sys.stdout', new_callable=StringIO) + def test_03_delete_index(self, mock_stdout): + """ + Test deleting an index and assert the output. + """ + self.indexer.delete_index() + printed_output = mock_stdout.getvalue().strip() + lines = printed_output.split('\n') + index_deleted_message_0 = lines[0] + self.assertEqual(index_deleted_message_0, f"Deleting index {self.index_name}") + index_deleted_message_1 = lines[1] + self.assertEqual(index_deleted_message_1, f"Index {self.index_name} deleted successfully!") + + @classmethod + def sort_04_test_methods(cls, testCaseClass, testCaseNames): + """ + Sort test methods for better readability. + """ + return sorted(testCaseNames) + +if __name__ == "__main__": + unittest.TestLoader.sortTestMethodsUsing = TestOpenaiPineconeIndexer.sort_test_methods + unittest.main() From 10a6e677f7ffb8fae3ebb5ce7702e4a2b2875914 Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:21:27 +0300 Subject: [PATCH 08/10] change workflow trigger to on PR only --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0f7f4cb..bdc0d53 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,6 @@ name: Tests -on: [pull_request, push] +on: [pull_request] jobs: test: From 54bfc2a4d128d0c29852feb8701a6563481780ec Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:25:10 +0300 Subject: [PATCH 09/10] change workflow trigger to on PR only --- .github/workflows/tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index bdc0d53..c8fce6c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -24,6 +24,7 @@ jobs: env: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | pytest \ No newline at end of file From afcb4f394d3113ed9e500c9a9b57e553cd99a5ce Mon Sep 17 00:00:00 2001 From: KevKibe Date: Wed, 17 Apr 2024 16:32:24 +0300 Subject: [PATCH 10/10] fix: test execution ordering --- src/tests/openaiindex_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/openaiindex_test.py b/src/tests/openaiindex_test.py index ac56007..26caa7c 100644 --- a/src/tests/openaiindex_test.py +++ b/src/tests/openaiindex_test.py @@ -43,7 +43,7 @@ def test_02_index_documents(self, mock_print): index = self.indexer.pc.Index(self.index_name) self.assertIsInstance(index, pinecone.data.index.Index) - def test_initialize_vectorstore(self): + def test_03_initialize_vectorstore(self): """ Test initializing the vector store and assert its type. """ @@ -51,7 +51,7 @@ def test_initialize_vectorstore(self): self.assertIsInstance(vectorstore, PineconeVectorStore) @patch('sys.stdout', new_callable=StringIO) - def test_03_delete_index(self, mock_stdout): + def test_04_delete_index(self, mock_stdout): """ Test deleting an index and assert the output. """ @@ -64,7 +64,7 @@ def test_03_delete_index(self, mock_stdout): self.assertEqual(index_deleted_message_1, f"Index {self.index_name} deleted successfully!") @classmethod - def sort_04_test_methods(cls, testCaseClass, testCaseNames): + def sort_test_methods(cls, testCaseClass, testCaseNames): """ Sort test methods for better readability. """