-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from neuralmagic/mwiti/llm
Add LangChain examples
- Loading branch information
Showing
8 changed files
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain.vectorstores import Chroma | ||
from langchain.chains import RetrievalQA | ||
from langchain.prompts.chat import ( | ||
ChatPromptTemplate, | ||
SystemMessagePromptTemplate, | ||
HumanMessagePromptTemplate, | ||
) | ||
import os | ||
import chainlit as cl | ||
from langchain.embeddings import HuggingFaceEmbeddings | ||
from langchain.llms import DeepSparse | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from io import BytesIO | ||
import PyPDF2 | ||
|
||
MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant" | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | ||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) | ||
llm = DeepSparse(model=MODEL_PATH) | ||
|
||
@cl.on_chat_start | ||
async def init(): | ||
files = None | ||
|
||
# Wait for the user to upload a file | ||
while files == None: | ||
files = await cl.AskFileMessage( | ||
content="Please upload a pdf file to begin!", accept=["application/pdf"], max_size_mb=50, | ||
).send() | ||
|
||
file = files[0] | ||
|
||
msg = cl.Message(content=f"Processing `{file.name}`...") | ||
await msg.send() | ||
|
||
if file.type != "application/pdf": | ||
raise TypeError("Only PDF files are supported") | ||
|
||
pdf_stream = BytesIO(file.content) | ||
pdf = PyPDF2.PdfReader(pdf_stream) | ||
pdf_text = "" | ||
for page in pdf.pages: | ||
pdf_text += page.extract_text() | ||
|
||
# texts = text_splitter.create_documents(pdf_text) | ||
texts = text_splitter.create_documents([pdf_text]) | ||
for i, text in enumerate(texts): text.metadata["source"] = f"{i}-pl" | ||
|
||
|
||
# Create a Chroma vector store | ||
docsearch = Chroma.from_documents(texts, embeddings) | ||
# Create a chain that uses the Chroma vector store | ||
chain = RetrievalQA.from_chain_type( | ||
llm, | ||
chain_type="stuff", | ||
return_source_documents=True, | ||
retriever=docsearch.as_retriever(), | ||
) | ||
|
||
# Save the metadata and texts in the user session | ||
metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))] | ||
cl.user_session.set("metadatas", metadatas) | ||
cl.user_session.set("texts", texts) | ||
|
||
# Let the user know that the system is ready | ||
msg.content = f"Processing `{file.name}` done. You can now ask questions!" | ||
await msg.update() | ||
|
||
cl.user_session.set("chain", chain) | ||
|
||
|
||
@cl.on_message | ||
async def main(message): | ||
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain | ||
cb = cl.AsyncLangchainCallbackHandler( | ||
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] | ||
) | ||
cb.answer_reached = True | ||
res = await chain.acall(message, callbacks=[cb]) | ||
|
||
answer = res["result"] | ||
source_documents = res["source_documents"] | ||
source_elements = [] | ||
|
||
# Get the metadata and texts from the user session | ||
metadatas = cl.user_session.get("metadatas") | ||
all_sources = [m["source"] for m in metadatas] | ||
texts = cl.user_session.get("texts") | ||
|
||
if source_documents: | ||
found_sources = [] | ||
|
||
# Add the sources to the message | ||
for source_idx, source in enumerate(source_documents): | ||
# Get the index of the source | ||
source_name = f"source_{source_idx}" | ||
found_sources.append(source_name) | ||
# Create the text element referenced in the message | ||
source_elements.append(cl.Text(content=str(source.page_content).strip(), name=source_name)) | ||
|
||
if found_sources: | ||
answer += f"\nSources: {', '.join(found_sources)}" | ||
else: | ||
answer += "\nNo sources found" | ||
|
||
if cb.has_streamed_final_answer: | ||
cb.final_stream.content = answer | ||
cb.final_stream.elements = source_elements | ||
await cb.final_stream.update() | ||
else: | ||
await cl.Message(content=answer, | ||
elements=source_elements | ||
).send() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Run LLMs on CPU With LangChain and DeepSparse: Chat With PDF Demo | ||
|
||
[![HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-lg.svg)](https://huggingface.co/spaces/neuralmagic/pdfchat) | ||
|
||
This examples demostrates how to run a PDF chatbot on CPUs using [DeepSparse](https://github.com/neuralmagic/deepsparse/), [LangChain](https://github.com/langchain-ai/langchain) and [Chainlit](https://github.com/Chainlit/chainlit). The project can run on a CPU with 16GB of RAM. DeepSparse provides accelerated inference on CPUs by taking advantage of a model's sparsity to deliver GPU-class performance on CPUs. | ||
|
||
## Installation | ||
Clone the repo and install the required packages to get started: | ||
|
||
```bash | ||
pip install -r requrements.txt | ||
|
||
``` | ||
## Start With Chainlit | ||
|
||
```bash | ||
chainlit run app.py -w | ||
``` | ||
|
||
![PDF Image](../../images/pdf.png) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
datasets | ||
transformers | ||
deepsparse-nightly==1.6.0.20231007 | ||
chainlit | ||
langchain | ||
PyPDF2 | ||
sentence_transformers | ||
chromadb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain.vectorstores import Chroma | ||
from langchain.chains import RetrievalQA | ||
from langchain.prompts.chat import ( | ||
ChatPromptTemplate, | ||
SystemMessagePromptTemplate, | ||
HumanMessagePromptTemplate, | ||
) | ||
import os | ||
import chainlit as cl | ||
|
||
from langchain.embeddings import HuggingFaceEmbeddings | ||
from langchain.llms import DeepSparse | ||
|
||
from pytube import YouTube | ||
import whisper | ||
import tempfile | ||
|
||
MODEL_PATH = "hf:neuralmagic/mpt-7b-chat-pruned50-quant" | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | ||
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) | ||
llm = DeepSparse(model=MODEL_PATH) | ||
model = whisper.load_model("base") | ||
|
||
def transcribe(youtube_url, model): | ||
youtube = YouTube(youtube_url) | ||
|
||
audio = youtube.streams.filter(only_audio=True).first() | ||
|
||
with tempfile.TemporaryDirectory() as tmpdir: | ||
file = audio.download(output_path=tmpdir) | ||
title = os.path.basename(file)[:-4] | ||
result = model.transcribe(file, fp16=False) | ||
|
||
return title, youtube_url, result["text"].strip() | ||
|
||
@cl.on_chat_start | ||
async def init(): | ||
url = None | ||
|
||
# Wait for the user to upload a file | ||
while url == None: | ||
url = await cl.AskUserMessage(content="Please type a YouTube URL to begin!").send() | ||
|
||
msg = cl.Message(content=f"Processing video...") | ||
await msg.send() | ||
|
||
transcriptions = transcribe(str(url), model) | ||
|
||
texts = text_splitter.create_documents([transcriptions[2]]) | ||
for i, text in enumerate(texts): text.metadata["source"] = f"{i}-pl" | ||
|
||
# Create a Chroma vector store | ||
docsearch = Chroma.from_documents(texts, embeddings) | ||
# Create a chain that uses the Chroma vector store | ||
chain = RetrievalQA.from_chain_type( | ||
llm, | ||
chain_type="stuff", | ||
return_source_documents=True, | ||
retriever=docsearch.as_retriever(), | ||
) | ||
|
||
# Save the metadata and texts in the user session | ||
metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))] | ||
cl.user_session.set("metadatas", metadatas) | ||
cl.user_session.set("texts", texts) | ||
|
||
# Let the user know that the system is ready | ||
msg.content = f"Processing `{transcriptions[0]}` video done. You can now ask questions!" | ||
await msg.update() | ||
|
||
cl.user_session.set("chain", chain) | ||
|
||
|
||
@cl.on_message | ||
async def main(message): | ||
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain | ||
cb = cl.AsyncLangchainCallbackHandler( | ||
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] | ||
) | ||
cb.answer_reached = True | ||
res = await chain.acall(message, callbacks=[cb]) | ||
|
||
answer = res["result"] | ||
source_documents = res["source_documents"] | ||
source_elements = [] | ||
|
||
# Get the metadata and texts from the user session | ||
metadatas = cl.user_session.get("metadatas") | ||
all_sources = [m["source"] for m in metadatas] | ||
texts = cl.user_session.get("texts") | ||
|
||
if source_documents: | ||
found_sources = [] | ||
|
||
# Add the sources to the message | ||
for source_idx, source in enumerate(source_documents): | ||
# Get the index of the source | ||
source_name = f"source_{source_idx}" | ||
found_sources.append(source_name) | ||
# Create the text element referenced in the message | ||
source_elements.append(cl.Text(content=str(source.page_content).strip(), name=source_name)) | ||
|
||
if found_sources: | ||
answer += f"\nSources: {', '.join(found_sources)}" | ||
else: | ||
answer += "\nNo sources found" | ||
|
||
if cb.has_streamed_final_answer: | ||
cb.final_stream.content = answer | ||
cb.final_stream.elements = source_elements | ||
await cb.final_stream.update() | ||
else: | ||
await cl.Message(content=answer, | ||
elements=source_elements | ||
).send() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Run LLMs on CPU With LangChain and DeepSparse: Chat With Video Demo | ||
|
||
[![HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-lg.svg)](https://huggingface.co/spaces/neuralmagic/videochat) | ||
|
||
This examples demostrates how to run a video chatbot on CPUs using [DeepSparse](https://github.com/neuralmagic/deepsparse/), [LangChain](https://github.com/langchain-ai/langchain) and [Chainlit](https://github.com/Chainlit/chainlit). The project can run on a CPU with 16GB of RAM. DeepSparse provides accelerated inference on CPUs by taking advantage of a model's sparsity to deliver GPU-class performance on CPUs. | ||
|
||
## Installation | ||
Clone the repo and install the required packages to get started: | ||
|
||
```bash | ||
pip install -r requrements.txt | ||
|
||
``` | ||
## Start With Chainlit | ||
|
||
```bash | ||
chainlit run app.py -w | ||
``` | ||
|
||
![PDF Image](../../images/video.png) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
datasets | ||
transformers | ||
deepsparse-nightly==1.6.0.20231007 | ||
chainlit | ||
langchain | ||
sentence_transformers | ||
chromadb | ||
git+https://github.com/openai/whisper.git | ||
pytube |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.