From d7853b3e7117d2cf5d9857239f579d855368df1a Mon Sep 17 00:00:00 2001 From: zachary_royals Date: Tue, 27 Jun 2023 17:12:00 -0400 Subject: [PATCH] readme developed, frontend and css style changes --- README.md | 14 +++++++ backend/app/api/routes.py | 4 +- frontend/src/App.css | 5 ++- frontend/src/App.js | 12 +++--- frontend/src/FileUpload.js | 48 +++++++++++++++++----- test.py | 83 -------------------------------------- test_two.py | 80 ------------------------------------ testing_notebook.ipynb | 68 ------------------------------- 8 files changed, 62 insertions(+), 252 deletions(-) delete mode 100644 test.py delete mode 100644 test_two.py delete mode 100644 testing_notebook.ipynb diff --git a/README.md b/README.md index 98633f2..436c19e 100644 --- a/README.md +++ b/README.md @@ -7,3 +7,17 @@ App allows transcript to be loaded with insights returned from the API. ## How to run the project +1. Clone the repo + +### Frontend + +` cd frontend && npm install && npm start ` + +Afterwards, open [http://localhost:3000](http://localhost:3000) to view it in the browser. + +### Backend + +` cd backend/app && python main.py ` + +Afterwards, open [http://localhost:8000](http://localhost:8000) to view it in the browser. + diff --git a/backend/app/api/routes.py b/backend/app/api/routes.py index 089e0f3..833f58f 100644 --- a/backend/app/api/routes.py +++ b/backend/app/api/routes.py @@ -47,7 +47,7 @@ def root(): """ -@router.post("/analyze/") +@router.post("/analyze") async def analyze_document(file: UploadFile) -> dict: filename = file.filename @@ -84,7 +84,7 @@ async def analyze_document(file: UploadFile) -> dict: insights.append(insight) summary = final_product_prompt_template.format_messages(text=insights) - chat = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo-16k") + chat = ChatOpenAI(temperature=0.0, model="gpt-4") # run blocking operations in a thread pool final_insights = await loop.run_in_executor(executor, chat, summary) diff --git a/frontend/src/App.css b/frontend/src/App.css index 0eb4cd7..54907e6 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -16,10 +16,10 @@ .App-header { background-color: #282c34; display: flex; - padding-top: 5px; + padding-top: 0; flex-direction: column; align-items: center; - justify-content: top; + justify-content: flex-start; font-size: calc(10px + 2vmin); color: white; flex-direction: column; @@ -40,6 +40,7 @@ .file-upload { display: flex; + margin-bottom: 500px; /* adjust as needed */ flex-direction: column; align-items: center; justify-content: center; diff --git a/frontend/src/App.js b/frontend/src/App.js index 06bd8b3..6ebe68c 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -13,12 +13,12 @@ function App() { alt='logo' /> - Source Code - + className='App-link' + href='https://github.com/Luca-Blight/zelta-challenge' + target='_blank' + rel='noopener noreferrer'> + Source Code +
diff --git a/frontend/src/FileUpload.js b/frontend/src/FileUpload.js index ae86b68..bab8a91 100644 --- a/frontend/src/FileUpload.js +++ b/frontend/src/FileUpload.js @@ -4,6 +4,7 @@ import axios from 'axios'; const FileUpload = () => { const [file, setFile] = useState(null); + const [responseData, setResponseData] = useState({}); const submitFile = async (event) => { event.preventDefault(); @@ -11,12 +12,13 @@ const FileUpload = () => { formData.append('file', file); try { - const res = await axios.post('http://localhost:8000/analyze/', formData, { + const res = await axios.post('http://localhost:8000/analyze', formData, { headers: { 'Content-Type': 'multipart/form-data', }, }); - console.log(res.data); + console.log(res.data.content); + setResponseData(res.data.content); } catch (error) { console.error(error); } @@ -27,15 +29,39 @@ const FileUpload = () => { }; return ( -
- - -
+
+
+ + + +
+
+ {responseData.delivery_days ? +

Delivery Days: {responseData.delivery_days}

: null} + {responseData.price_value ? +

Price Value: {responseData.price_value}

: null} + {responseData.customer_negative_feedback ? +

Customer Negative Feedback: {responseData.customer_negative_feedback}

: null} + {responseData.feature_requests ? +

Feature Requests: {responseData.feature_requests}

: null} + {responseData.competitor_mentions ? +

Competitor Mentions: {responseData.competitor_mentions}

: null} +
+
); }; diff --git a/test.py b/test.py deleted file mode 100644 index ca03780..0000000 --- a/test.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -import asyncio - -from PyPDF2 import PdfReader - -from fastapi import APIRouter -from pydantic import BaseModel -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.text_splitter import CharacterTextSplitter -from langchain.vectorstores import FAISS -from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain -from langchain.chat_models import ChatOpenAI - -from dotenv import load_dotenv, find_dotenv - -from prompts import product_prompt_template,final_product_prompt_template - -_ = load_dotenv(find_dotenv()) - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") - - -def extract_text_from_pdf(pdf): - pdf_reader = PdfReader(pdf) - text = "" - for page in pdf_reader.pages: - text += page.extract_text() - - return text - - -def split_into_chunks(text: str) -> list[str]: - """Splits text into chunks - - Args: - text (_type_): _extracted text_ - - Returns: - _type_: _description_ - """ - text_splitter = CharacterTextSplitter( - separator="\n", chunk_size=7000, chunk_overlap=700, length_function=len - ) - chunks = text_splitter.split_text(text) - return chunks - - -def analyze_document(document: str) -> dict: - if document.endswith(".pdf"): - try: - extracted_text = extract_text_from_pdf(document) - chunks = split_into_chunks(extracted_text) - - insights = [] - for chunk in chunks: - transcript = product_prompt_template.format_messages(text=chunk) - chat = ChatOpenAI(temperature=0.0,model="gpt-4") - - insight = chat(transcript) - insights.append(insight) - - summary = final_product_prompt_template.format_messages(text=insights) - chat = ChatOpenAI(temperature=0.0,model="gpt-4") - final_insights = chat(summary) - return final_insights - - except Exception as e: - print("Error: ", e) - elif document.endswith(".txt"): - return "This is a text file." - else: - return "Error: This is not a PDF or text file." - - - -def main(): - # Assume 'document.pdf' is the path to a PDF file - document = '/Users/Zachary_Royals/Code/zelta-challenge/Sample Transcript_pdf.pdf' - results = analyze_document(document) - print(results) - -# Run the event loop -main() diff --git a/test_two.py b/test_two.py deleted file mode 100644 index a00ce49..0000000 --- a/test_two.py +++ /dev/null @@ -1,80 +0,0 @@ -import asyncio -import os - -from PyPDF2 import PdfReader - -from fastapi import APIRouter -from pydantic import BaseModel -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.text_splitter import CharacterTextSplitter -from langchain.vectorstores import FAISS -from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain -from langchain.chat_models import ChatOpenAI -from concurrent.futures import ThreadPoolExecutor - -from dotenv import load_dotenv, find_dotenv - -from prompts import product_prompt_template,final_product_prompt_template - -_ = load_dotenv(find_dotenv()) - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") - -def extract_text_from_pdf(pdf): - pdf_reader = PdfReader(pdf) - text = "" - for page in pdf_reader.pages: - text += page.extract_text() - - return text - - -def split_into_chunks(text: str) -> list[str]: - text_splitter = CharacterTextSplitter( - separator="\n", chunk_size=12000, chunk_overlap=1200, length_function=len - ) - chunks = text_splitter.split_text(text) - return chunks - - -async def analyze_document(document: str) -> dict: - loop = asyncio.get_event_loop() - with ThreadPoolExecutor() as executor: - if document.endswith(".pdf"): - # run blocking operations in a thread pool - extracted_text = await loop.run_in_executor(executor, extract_text_from_pdf, document) - chunks = await loop.run_in_executor(executor, split_into_chunks, extracted_text) - - insights = [] - - for chunk in chunks: - transcript = product_prompt_template.format_messages(text=chunk) - chat = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo-16k") - # run blocking operations in a thread pool - insight = await loop.run_in_executor(executor, chat, transcript) - insights.append(insight) - - summary = final_product_prompt_template.format_messages(text=insights) - chat = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo-16k") - # run blocking operations in a thread pool - final_insights = await loop.run_in_executor(executor, chat, summary) - return final_insights - - - elif document.endswith(".txt"): - return "This is a text file." - else: - return "Error: This is not a PDF or text file." - - -# Now you can call this async function in an event loop. -# Here's a basic example of how you might do it: - -async def main(): - # Assume 'document.pdf' is the path to a PDF file - document = '/Users/Zachary_Royals/Code/zelta-challenge/Sample Transcript_pdf.pdf' - results = await analyze_document(document) - print(results) - -# Run the event loop -asyncio.run(main()) diff --git a/testing_notebook.ipynb b/testing_notebook.ipynb deleted file mode 100644 index b42d614..0000000 --- a/testing_notebook.ipynb +++ /dev/null @@ -1,68 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "1d625df7", - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install PyPDF2\n", - "# !pip install faiss-cpu\n", - "import requests\n", - "import langchain" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4c2a0f2f", - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'PyPDF2'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mPyPDF2\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PdfReader\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'PyPDF2'" - ] - } - ], - "source": [ - "from PyPDF2 import PdfReader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d84798d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}