Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chore: Add tests for _google.docindex and _openai.docindex modules #12

Merged
merged 10 commits into from
Apr 17, 2024
30 changes: 30 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Tests

on: [pull_request]

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: |
pip install -r requirements.txt --no-cache-dir
pip install pytest einops lion-pytorch

- name: Test with pytest
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
pytest

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pinecone-client==3.2.2
tiktoken==0.6.0
pypdf==4.1.0
unstructured==0.12.6
unstructured==0.13.2
langchain-community==0.0.31
langchain==0.1.14
langchain-openai==0.1.1
Expand Down
1 change: 1 addition & 0 deletions src/_openai/docindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def index_documents(self, urls: List[str], batch_limit: int, chunk_size: int = 2
index = self.pc.Index(self.index_name)
print(index.describe_index_stats())
print("Indexing complete.")
return index

def initialize_vectorstore(self, index_name):
index = self.pc.Index(index_name)
Expand Down
Empty file added src/tests/__init__.py
Empty file.
75 changes: 75 additions & 0 deletions src/tests/googleindex_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import unittest
from _google.docindex import GooglePineconeIndexer
import os
from io import StringIO
from unittest.mock import patch
import pinecone
from langchain_pinecone import PineconeVectorStore

class TestGooglePineconeIndexer(unittest.TestCase):
"""
Test case class for the GooglePineconeIndexer.
"""

def setUp(self):
"""
Set up the test case with common attributes.
"""
self.index_name = "new-index-1"
self.pinecone_api_key = os.environ.get('PINECONE_API_KEY')
self.google_api_key = os.environ.get('GOOGLE_API_KEY')
self.indexer = GooglePineconeIndexer(self.index_name, self.pinecone_api_key, self.google_api_key)

@patch('sys.stdout', new_callable=StringIO)
def test_01_create_index(self, mock_stdout):
"""
Test creating an index and assert the output.
"""
self.indexer.create_index()
printed_output = mock_stdout.getvalue().strip()
lines = printed_output.split('\n')
index_created_message_0 = lines[0]
self.assertEqual(index_created_message_0, f"Creating index {self.index_name}")
index_created_message_1 = lines[1]
self.assertEqual(index_created_message_1, f"Index {self.index_name} created successfully!")

@patch('builtins.print')
def test_02_index_documents(self, mock_print):
"""
Test indexing documents and assert the type of the index.
"""
urls = ["https://arxiv.org/pdf/1706.03762.pdf"]
self.indexer.index_documents(urls, batch_limit=10, chunk_size=256)
index = self.indexer.pc.Index(self.index_name)
self.assertIsInstance(index, pinecone.data.index.Index)

def test_03_initialize_vectorstore(self):
"""
Test initializing the vector store and assert its type.
"""
vectorstore = self.indexer.initialize_vectorstore(self.index_name)
self.assertIsInstance(vectorstore, PineconeVectorStore)

@patch('sys.stdout', new_callable=StringIO)
def test_04_delete_index(self, mock_stdout):
"""
Test deleting an index and assert the output.
"""
self.indexer.delete_index()
printed_output = mock_stdout.getvalue().strip()
lines = printed_output.split('\n')
index_deleted_message_0 = lines[0]
self.assertEqual(index_deleted_message_0, f"Deleting index {self.index_name}")
index_deleted_message_1 = lines[1]
self.assertEqual(index_deleted_message_1, f"Index {self.index_name} deleted successfully!")

@classmethod
def sort_test_methods(cls, testCaseClass, testCaseNames):
"""
Sort test methods for better readability.
"""
return sorted(testCaseNames)

if __name__ == "__main__":
unittest.TestLoader.sortTestMethodsUsing = TestGooglePineconeIndexer.sort_test_methods
unittest.main()
75 changes: 75 additions & 0 deletions src/tests/openaiindex_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import unittest
from _openai.docindex import OpenaiPineconeIndexer
import os
from io import StringIO
from unittest.mock import patch
import pinecone
from langchain_pinecone import PineconeVectorStore

class TestOpenaiPineconeIndexer(unittest.TestCase):
"""
Test case class for the OpenaiPineconeIndexer.
"""

def setUp(self):
"""
Set up the test case with common attributes.
"""
self.index_name = "new-index-1"
self.pinecone_api_key = os.environ.get('PINECONE_API_KEY')
self.openai_api_key = os.environ.get('OPENAI_API_KEY')
self.indexer = OpenaiPineconeIndexer(self.index_name, self.pinecone_api_key, self.openai_api_key)

@patch('sys.stdout', new_callable=StringIO)
def test_01_create_index(self, mock_stdout):
"""
Test creating an index and assert the output.
"""
self.indexer.create_index()
printed_output = mock_stdout.getvalue().strip()
lines = printed_output.split('\n')
index_created_message_0 = lines[0]
self.assertEqual(index_created_message_0, f"Creating index {self.index_name}")
index_created_message_1 = lines[1]
self.assertEqual(index_created_message_1, f"Index {self.index_name} created successfully!")

@patch('builtins.print')
def test_02_index_documents(self, mock_print):
"""
Test indexing documents and assert the type of the index.
"""
urls = ["https://arxiv.org/pdf/1706.03762.pdf"]
self.indexer.index_documents(urls, batch_limit=10, chunk_size=256)
index = self.indexer.pc.Index(self.index_name)
self.assertIsInstance(index, pinecone.data.index.Index)

def test_03_initialize_vectorstore(self):
"""
Test initializing the vector store and assert its type.
"""
vectorstore = self.indexer.initialize_vectorstore(self.index_name)
self.assertIsInstance(vectorstore, PineconeVectorStore)

@patch('sys.stdout', new_callable=StringIO)
def test_04_delete_index(self, mock_stdout):
"""
Test deleting an index and assert the output.
"""
self.indexer.delete_index()
printed_output = mock_stdout.getvalue().strip()
lines = printed_output.split('\n')
index_deleted_message_0 = lines[0]
self.assertEqual(index_deleted_message_0, f"Deleting index {self.index_name}")
index_deleted_message_1 = lines[1]
self.assertEqual(index_deleted_message_1, f"Index {self.index_name} deleted successfully!")

@classmethod
def sort_test_methods(cls, testCaseClass, testCaseNames):
"""
Sort test methods for better readability.
"""
return sorted(testCaseNames)

if __name__ == "__main__":
unittest.TestLoader.sortTestMethodsUsing = TestOpenaiPineconeIndexer.sort_test_methods
unittest.main()
Loading