diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index c0d31b0..ebb0712 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,4 +1,4 @@ -name: Tests +name: CoherePineconeIndexer Tests on: [pull_request] @@ -20,14 +20,30 @@ jobs: pip install -r requirements.txt --no-cache-dir pip install pytest einops lion-pytorch - - name: Test with pytest + - name: Test CoherePineconeIndexer + id: cohere_test env: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + run: | + pytest src/tests/cohereindex_test.py + + + - name: Test OpenaiPineconeIndexer + id: openai_test + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | pytest src/tests/openaiindex_test.py + + + - name: Test GooglePineconeIndexer + id: google_test + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + run: | pytest src/tests/googleindex_test.py - pytest src/tests/cohereindex_test.py + \ No newline at end of file diff --git a/README.md b/README.md index c54148a..436d0c7 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ cd src ```bash # Using OpenAI -python -m _openai.create_index --pinecone_api_key "your_pinecone_api_key" --index_name "your_index_name" - +python -m utils.create_index --pinecone_api_key "your_pinecone_api_key" --index_name "your_index_name" ``` @@ -164,7 +164,7 @@ python -m _cohere.index_documents --pinecone_api_key "your_pinecone_api_key" - ```bash # Using OpenAI -python -m _openai.delete_index --pinecone_api_key "your_pinecone_api_key" --index_name "your_index_name" +python -m utils.delete_index --pinecone_api_key "your_pinecone_api_key" --index_name "your_index_name" ``` diff --git a/src/_cohere/config.py b/src/_cohere/config.py deleted file mode 100644 index 13b30da..0000000 --- a/src/_cohere/config.py +++ /dev/null @@ -1,12 +0,0 @@ -class Config: - template_str = """ - You are very helpful assistant for question answering tasks. Use the pieces of retrieved context to answer question given. If you do not know - the answer, Just say that you do not know the answer instead of making up an answer. - - Retrieved context: {context} - Query: {query} - """ - - default_google_model = "gemini-pro" - default_openai_model = "gpt-3.5-turbo-0125" - default_cohere_model = "command" diff --git a/src/_cohere/doc_index.py b/src/_cohere/doc_index.py index 9910894..7e693d7 100644 --- a/src/_cohere/doc_index.py +++ b/src/_cohere/doc_index.py @@ -5,7 +5,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter import tiktoken from typing import List -from _openai.doc_model import Page +from utils.doc_model import Page from langchain_pinecone import PineconeVectorStore from pathlib import Path from langchain_community.document_loaders import UnstructuredWordDocumentLoader diff --git a/src/_google/config.py b/src/_google/config.py deleted file mode 100644 index c62e834..0000000 --- a/src/_google/config.py +++ /dev/null @@ -1,11 +0,0 @@ -class Config: - template_str = """ - You are very helpful assistant for question answering tasks. Use the pieces of retrieved context to answer question given. If you do not know - the answer, Just say that you do not know the answer instead of making up an answer. - - Retrieved context: {context} - Query: {query} - """ - - default_google_model = "gemini-pro" - default_openai_model = "gpt-3.5-turbo-0125" diff --git a/src/_google/create_index.py b/src/_google/create_index.py deleted file mode 100644 index e912942..0000000 --- a/src/_google/create_index.py +++ /dev/null @@ -1,15 +0,0 @@ -from .doc_index import GooglePineconeIndexer -import argparse - -def parse_args(): - parser = argparse.ArgumentParser(description="Creates an Index on Pinecone.") - parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") - parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") - parser.add_argument("--google_api_key", type=str, help="Google API key") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - pinecone_indexer = GooglePineconeIndexer(args.index_name, args.pinecone_api_key, args.google_api_key) - pinecone_indexer.create_index() diff --git a/src/_google/delete_index.py b/src/_google/delete_index.py deleted file mode 100644 index ac15105..0000000 --- a/src/_google/delete_index.py +++ /dev/null @@ -1,15 +0,0 @@ -from .doc_index import GooglePineconeIndexer -import argparse - -def parse_args(): - parser = argparse.ArgumentParser(description="Deletes an Index on Pinecone.") - parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") - parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") - parser.add_argument("--google_api_key", type=str, help="OpenAI API key") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - pinecone_indexer = GooglePineconeIndexer(args.index_name, args.pinecone_api_key, args.google_api_key) - pinecone_indexer.delete_index() diff --git a/src/_google/doc_index.py b/src/_google/doc_index.py index 46e2fdb..e15944a 100644 --- a/src/_google/doc_index.py +++ b/src/_google/doc_index.py @@ -6,7 +6,7 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings import tiktoken from typing import List -from _openai.doc_model import Page +from utils.doc_model import Page import google.generativeai as genai from pathlib import Path from langchain_community.document_loaders import UnstructuredWordDocumentLoader @@ -27,8 +27,8 @@ class GooglePineconeIndexer: def __init__( self, index_name: str, - pinecone_api_key: str, - google_api_key: str + pinecone_api_key: str = None, + google_api_key: str = None ) -> None: """ Initialize the GoogleGenerativeAIEmbeddings object. diff --git a/src/_openai/create_index.py b/src/_openai/create_index.py deleted file mode 100644 index 3da40b6..0000000 --- a/src/_openai/create_index.py +++ /dev/null @@ -1,15 +0,0 @@ -from .doc_index import OpenaiPineconeIndexer -import argparse - -def parse_args(): - parser = argparse.ArgumentParser(description="Index documents on Pinecone using OpenAI embeddings.") - parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") - parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") - parser.add_argument("--openai_api_key", type=str, help="OpenAI API key") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - pinecone_indexer = OpenaiPineconeIndexer(args.index_name, args.pinecone_api_key, args.openai_api_key) - pinecone_indexer.create_index() diff --git a/src/_openai/delete_index.py b/src/_openai/delete_index.py deleted file mode 100644 index d4f4539..0000000 --- a/src/_openai/delete_index.py +++ /dev/null @@ -1,15 +0,0 @@ -from .doc_index import OpenaiPineconeIndexer -import argparse - -def parse_args(): - parser = argparse.ArgumentParser(description="Index documents on Pinecone using OpenAI embeddings.") - parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") - parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") - parser.add_argument("--openai_api_key", type=str, help="OpenAI API key") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - pinecone_indexer = OpenaiPineconeIndexer(args.index_name, args.pinecone_api_key, args.openai_api_key) - pinecone_indexer.delete_index() diff --git a/src/_openai/doc_index.py b/src/_openai/doc_index.py index 3decd5e..f290ba4 100644 --- a/src/_openai/doc_index.py +++ b/src/_openai/doc_index.py @@ -6,7 +6,7 @@ from langchain_openai import OpenAIEmbeddings import tiktoken from typing import List -from .doc_model import Page +from utils.doc_model import Page from pathlib import Path from langchain_community.document_loaders import UnstructuredWordDocumentLoader from langchain_community.document_loaders import UnstructuredMarkdownLoader @@ -26,8 +26,8 @@ class OpenaiPineconeIndexer: def __init__( self, index_name: str, - pinecone_api_key: str, - openai_api_key: str + pinecone_api_key: str = None , + openai_api_key: str = None ) -> None: """ Initialize the OpenAIPineconeIndexer object. diff --git a/src/_openai/doc_model.py b/src/_openai/doc_model.py deleted file mode 100644 index da57dc4..0000000 --- a/src/_openai/doc_model.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel, Field -from typing import Dict, Union - -class Page(BaseModel): - page_content: str = Field(..., description="The content of the page") - metadata: Dict[str, Union[str, int]] = Field(..., description="Metadata about the document") - page: int = Field(..., description="The page of the content") - source: Union[str, int] = Field(..., description="The source url of the document") \ No newline at end of file diff --git a/src/tests/googleindex_test.py b/src/tests/googleindex_test.py index e47cbcb..fefb019 100644 --- a/src/tests/googleindex_test.py +++ b/src/tests/googleindex_test.py @@ -59,16 +59,16 @@ def test_03_initialize_vectorstore(self): vectorstore = self.indexer.initialize_vectorstore(self.index_name) self.assertIsInstance(vectorstore, PineconeVectorStore) - def test_04_retrieve_and_generate(self): - """ - Test initializing the vector store and assert its type. - """ - vector_store = self.indexer.initialize_vectorstore(self.index_name) - response = self.indexer.retrieve_and_generate(query = "give a short summary of the introduction", - vector_store= vector_store - ) - print(response) - self.assertIsNotNone(response, "The retriever response should not be None.") + # def test_04_retrieve_and_generate(self): + # """ + # Test initializing the vector store and assert its type. + # """ + # vector_store = self.indexer.initialize_vectorstore(self.index_name) + # response = self.indexer.retrieve_and_generate(query = "give a short summary of the introduction", + # vector_store= vector_store + # ) + # print(response) + # self.assertIsNotNone(response, "The retriever response should not be None.") @patch('sys.stdout', new_callable=StringIO) def test_05_delete_index(self, mock_stdout): diff --git a/src/_cohere/create_index.py b/src/utils/create_index.py similarity index 63% rename from src/_cohere/create_index.py rename to src/utils/create_index.py index 37d750e..eb1d7a6 100644 --- a/src/_cohere/create_index.py +++ b/src/utils/create_index.py @@ -1,8 +1,8 @@ -from .doc_index import CoherePineconeIndexer +from _openai.doc_index import OpenaiPineconeIndexer import argparse def parse_args(): - parser = argparse.ArgumentParser(description="Creates an Index on Pinecone.") + parser = argparse.ArgumentParser(description="Create an Index on Pinecone.") parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") return parser.parse_args() @@ -10,5 +10,5 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - pinecone_indexer = CoherePineconeIndexer(args.index_name, args.pinecone_api_key) + pinecone_indexer = OpenaiPineconeIndexer(args.index_name, args.pinecone_api_key) pinecone_indexer.create_index() diff --git a/src/_cohere/delete_index.py b/src/utils/delete_index.py similarity index 62% rename from src/_cohere/delete_index.py rename to src/utils/delete_index.py index 52a0bf4..8a93ce7 100644 --- a/src/_cohere/delete_index.py +++ b/src/utils/delete_index.py @@ -1,8 +1,8 @@ -from .doc_index import CoherePineconeIndexer +from _openai.doc_index import OpenaiPineconeIndexer import argparse def parse_args(): - parser = argparse.ArgumentParser(description="Deletes an Index on Pinecone.") + parser = argparse.ArgumentParser(description="Delete an existing Index on Pinecone.") parser.add_argument("--pinecone_api_key", type=str, help="Pinecone API key") parser.add_argument("--index_name", type=str, help="Name of the Pinecone index") return parser.parse_args() @@ -10,5 +10,5 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - pinecone_indexer = CoherePineconeIndexer(args.index_name, args.pinecone_api_key) + pinecone_indexer = OpenaiPineconeIndexer(args.index_name, args.pinecone_api_key) pinecone_indexer.delete_index() diff --git a/src/_cohere/doc_model.py b/src/utils/doc_model.py similarity index 100% rename from src/_cohere/doc_model.py rename to src/utils/doc_model.py