diff --git a/examples/chainlit/Dockerfile b/examples/chainlit/Dockerfile new file mode 100644 index 000000000000..4aa00a72e4af --- /dev/null +++ b/examples/chainlit/Dockerfile @@ -0,0 +1,16 @@ +# Use an official Python runtime as a parent image +FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the current directory contents into the container at /app +COPY requirements.txt /app + +# Install any needed packages specified in requirements.txt +RUN pip install -r requirements.txt + +COPY . /app + +# Run app.py when the container launches +CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ] diff --git a/examples/chainlit/README.md b/examples/chainlit/README.md new file mode 100644 index 000000000000..9970b3b7c161 --- /dev/null +++ b/examples/chainlit/README.md @@ -0,0 +1,25 @@ +# LocalAI Demonstration with Embeddings and Chainlit + +This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source. + +## Prerequisites + +Before proceeding, make sure you have the following installed: +- Weaviate client +- LocalAI and its dependencies +- Chainlit and its dependencies + +## Getting Started + +1. Clone this repository: +2. Navigate to the project directory: +3. Run the example: `chainlit run main.py` + +# Highlight on `llama_index` and `chainlit` + +`llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience. + +`chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user. + +In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results. + diff --git a/examples/chainlit/config.yaml b/examples/chainlit/config.yaml new file mode 100644 index 000000000000..1590f7b8e014 --- /dev/null +++ b/examples/chainlit/config.yaml @@ -0,0 +1,16 @@ +localAI: + temperature: 0 + modelName: gpt-3.5-turbo + apiBase: http://local-ai.default + apiKey: stub + streaming: True +weviate: + url: http://weviate.local + index: AIChroma +query: + mode: hybrid + topK: 1 + alpha: 0.0 + chunkSize: 1024 +embedding: + model: BAAI/bge-small-en-v1.5 \ No newline at end of file diff --git a/examples/chainlit/main.py b/examples/chainlit/main.py new file mode 100644 index 000000000000..b57c72287792 --- /dev/null +++ b/examples/chainlit/main.py @@ -0,0 +1,82 @@ +import os + +import weaviate +from llama_index.storage.storage_context import StorageContext +from llama_index.vector_stores import WeaviateVectorStore + +from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine +from llama_index.callbacks.base import CallbackManager +from llama_index import ( + LLMPredictor, + ServiceContext, + StorageContext, + VectorStoreIndex, +) +import chainlit as cl + +from llama_index.llms import LocalAI +from llama_index.embeddings import HuggingFaceEmbedding +import yaml + +# Load the configuration file +with open("config.yaml", "r") as ymlfile: + cfg = yaml.safe_load(ymlfile) + +# Get the values from the configuration file or set the default values +temperature = cfg['localAI'].get('temperature', 0) +model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo") +api_base = cfg['localAI'].get('apiBase', "http://local-ai.default") +api_key = cfg['localAI'].get('apiKey', "stub") +streaming = cfg['localAI'].get('streaming', True) +weaviate_url = cfg['weviate'].get('url', "http://weviate.default") +index_name = cfg['weviate'].get('index', "AIChroma") +query_mode = cfg['query'].get('mode', "hybrid") +topK = cfg['query'].get('topK', 1) +alpha = cfg['query'].get('alpha', 0.0) +embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5") +chunk_size = cfg['query'].get('chunkSize', 1024) + + +embed_model = HuggingFaceEmbedding(model_name=embed_model_name) + + +llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming) +llm.globally_use_chat_completions = True; +client = weaviate.Client(weaviate_url) +vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name) +storage_context = StorageContext.from_defaults(vector_store=vector_store) + +@cl.on_chat_start +async def factory(): + + llm_predictor = LLMPredictor( + llm=llm + ) + + service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size) + + index = VectorStoreIndex.from_vector_store( + vector_store, + storage_context=storage_context, + service_context=service_context + ) + + query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True) + + cl.user_session.set("query_engine", query_engine) + + +@cl.on_message +async def main(message: cl.Message): + query_engine = cl.user_session.get("query_engine") + response = await cl.make_async(query_engine.query)(message.content) + + response_message = cl.Message(content="") + + for token in response.response_gen: + await response_message.stream_token(token=token) + + if response.response_txt: + response_message.content = response.response_txt + + await response_message.send() diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt new file mode 100644 index 000000000000..8519033efe8a --- /dev/null +++ b/examples/chainlit/requirements.txt @@ -0,0 +1,7 @@ +llama_hub==0.0.41 +llama_index==0.8.55 +Requests==2.31.0 +weaviate_client==3.25.1 +transformers +torch +chainlit \ No newline at end of file