Skip to content

Commit

Permalink
Refactored code to support different LLM hosts and added installation…
Browse files Browse the repository at this point in the history
… process for llama-cpp-python package.
  • Loading branch information
fynnfluegge committed Sep 24, 2023
1 parent 4125e3d commit 7cc5034
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 40 deletions.
22 changes: 16 additions & 6 deletions codeqai/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import os

from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from rich.console import Console
from rich.syntax import Syntax
from yaspin import yaspin

from codeqai import codeparser, repo
from codeqai.config import (create_cache_dir, create_config, get_cache_path,
load_config)
from codeqai.constants import EmbeddingsModel
from codeqai.constants import EmbeddingsModel, LllmHost
from codeqai.embeddings import Embeddings
from codeqai.llm import LLM
from codeqai.vector_store import VectorStore


Expand All @@ -25,6 +27,7 @@ def run():

if args.action == "configure":
create_config()
exit()

# load config
config = {}
Expand Down Expand Up @@ -56,12 +59,15 @@ def run():
else:
vector_store = VectorStore(repo_name, embeddings=embeddings_model.embeddings)

llm = ChatOpenAI(temperature=0.9, max_tokens=2048, model="gpt-3.5-turbo")
llm = LLM(
llm_host=LllmHost[config["llm-host"].upper().replace("-", "_")],
chat_model=config["chat-model"],
)
memory = ConversationSummaryMemory(
llm=llm, memory_key="chat_history", return_messages=True
llm=llm.chat_model, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(
llm, retriever=vector_store.retriever, memory=memory
llm.chat_model, retriever=vector_store.retriever, memory=memory
)

while True:
Expand All @@ -73,7 +79,11 @@ def run():
similarity_result = vector_store.similarity_search(search_pattern)
spinner.stop()
for doc in similarity_result:
print(doc.page_content)
syntax = Syntax(
doc.page_content, "python", theme="monokai", line_numbers=True
)
console = Console()
console.print(syntax)

choice = input("[?] (C)ontinue search or (E)xit [C]:").strip().lower()

Expand Down
76 changes: 64 additions & 12 deletions codeqai/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,32 +72,37 @@ def create_config():
"embeddings",
message="Which local embeddings model do you want to use?",
choices=[
"SentenceTransformers-all-mpnet-base-v2",
"Instructor-Large",
"Ollama",
"SentenceTransformers-all-mpnet-base-v2",
"SentenceTransformers-all-MiniLM-L6-v2",
],
default="SentenceTransformers-all-mpnet-base-v2",
default="Instructor-Large",
),
inquirer.List(
"llm",
message="Which local LLM do you want to use?",
choices=["Llamacpp", "Ollama", "Huggingface"],
"llm-host",
message="Which local LLM host do you want to use?",
choices=[
"Llamacpp",
"Ollama",
],
default="Llamacpp",
),
]
else:
questions = [
inquirer.List(
"embeddings",
message="Which embeddings do you want to use?",
message="Which remote embeddings do you want to use?",
choices=["OpenAI-text-embedding-ada-002", "Azure-OpenAI"],
default="OpenAI-text-embedding-ada-002",
),
inquirer.List(
"llm",
message="Which LLM do you want to use?",
choices=["GPT-3.5-Turbo", "GPT-4"],
default="GPT-3.5-Turbo",
"llm-host",
message="Which remote LLM do you want to use?",
choices=[
"OpenAI" "Azure-OpenAI",
],
default="OpenAI",
),
]

Expand All @@ -107,8 +112,55 @@ def create_config():
config = {
"local": confirm["confirm"],
"embeddings": answers["embeddings"],
"llm": answers["llm"],
"llm-host": answers["llm-host"],
}
if answers["embeddings"] == "Azure-OpenAI":
# TODO add azure config
exit("Azure-OpenAI not implemented yet.")

if answers["llm-host"] == "Llamacpp":
questions = [
inquirer.Text(
"chat-model",
message="Please enter the path to the LLM model.",
default="",
),
]
elif answers["llm-host"] == "Ollama":
questions = [
inquirer.List(
"chat-model",
message="Which Ollama chat model do you want to use?",
choices=[
"llama2",
"llama2:13b",
"llama2:70b",
"codellama",
],
default="gpt-3.5-turbo",
),
]
elif answers["llm-host"] == "Azure-OpenAI":
# TODO add azure config
exit("Azure-OpenAI not implemented yet.")
elif answers["llm-host"] == "OpenAI":
questions = [
inquirer.List(
"chat-model",
message="Which OpenAI chat model do you want to use?",
choices=[
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
],
default="gpt-3.5-turbo",
),
]

answers = inquirer.prompt(questions)
if answers and answers["chat-model"]:
config["chat-model"] = answers["chat-model"]

save_config(config)

return config
Expand Down
9 changes: 6 additions & 3 deletions codeqai/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ class Language(Enum):

class EmbeddingsModel(Enum):
SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2 = "SentenceTransformers-all-mpnet-base-v2"
SENTENCETRANSFORMERS_ALL_MINILM_L6_V2 = "SentenceTransformers-all-MiniLM-L6-v2"
INSTRUCTOR_LARGE = "Instructor-Large"
OLLAMA = "Ollama"
OPENAI_TEXT_EMBEDDING_ADA_002 = "OpenAI-text-embedding-ada-002"
AZURE_OPENAI = "Azure-OpenAI"


class LocalLLMModel(Enum):
GPT_3_5_TURBO = "gpt-3.5-turbo"
class LllmHost(Enum):
LLAMACPP = "Llamacpp"
OLLAMA = "Ollama"
OPENAI = "OpenAI"
AZURE_OPENAI = "Azure-OpenAI"
31 changes: 15 additions & 16 deletions codeqai/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,30 @@ class Embeddings:
def __init__(
self, local=False, model=EmbeddingsModel.OPENAI_TEXT_EMBEDDING_ADA_002
):
self.model = model

if not local:
if model == EmbeddingsModel.OPENAI_TEXT_EMBEDDING_ADA_002:
self.embeddings = OpenAIEmbeddings(
client=None, model="text_embedding_ada_002"
)
else:
if model == EmbeddingsModel.OLLAMA:
pass
else:
try:
import sentence_transformers # noqa: F401
except ImportError:
self._install_sentence_transformers()

if model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2:
self.embeddings = HuggingFaceEmbeddings()
elif model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MINILM_L6_V2:
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
)
elif model == EmbeddingsModel.INSTRUCTOR_LARGE:
try:
import sentence_transformers # noqa: F401
from InstructorEmbedding import INSTRUCTOR # noqa: F401
except ImportError:
self._install_sentence_transformers()
self._install_instructor_embedding()

if model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2:
self.embeddings = HuggingFaceEmbeddings()
elif model == EmbeddingsModel.INSTRUCTOR_LARGE:
try:
from InstructorEmbedding import \
INSTRUCTOR # noqa: F401
except ImportError:
self._install_instructor_embedding()
self.embeddings = HuggingFaceInstructEmbeddings()
self.embeddings = HuggingFaceInstructEmbeddings()

def _install_sentence_transformers(self):
question = [
Expand Down
134 changes: 132 additions & 2 deletions codeqai/llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,133 @@
from langchain.chains import ConversationalRetrievalChain
import os
import subprocess
import sys

import inquirer
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.llms import LlamaCpp, Ollama

from codeqai import utils
from codeqai.constants import LllmHost


class LLM:
def __init__(self, llm_host: LllmHost, chat_model: str):
if llm_host == LllmHost.OPENAI:
self.chat_model = ChatOpenAI(
temperature=0.9, max_tokens=2048, model=chat_model
)
elif llm_host == LllmHost.LLAMACPP:
self.install_llama_cpp()
self.chat_model = LlamaCpp(
model_path=chat_model,
temperature=0.9,
max_tokens=2048,
verbose=False,
)
elif llm_host == LllmHost.OLLAMA:
self.chat_model = Ollama(
base_url="http://localhost:11434",
model=chat_model,
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
)

def install_llama_cpp(self):
try:
from llama_cpp import Llama # noqa: F401
except ImportError:
question = [
inquirer.Confirm(
"confirm",
message=f"Local LLM interface package not found. Install {utils.get_bold_text('llama-cpp-python')}?",
default=True,
),
]

answers = inquirer.prompt(question)
if answers and answers["confirm"]:
import platform

def check_command(command):
try:
subprocess.run(
command,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
return True
except subprocess.CalledProcessError:
return False
except FileNotFoundError:
return False

def install_llama(backend):
env_vars = {"FORCE_CMAKE": "1"}

if backend == "cuBLAS":
env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
elif backend == "hipBLAS":
env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on"
elif backend == "Metal":
env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
else: # Default to OpenBLAS
env_vars[
"CMAKE_ARGS"
] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"

try:
subprocess.run(
[
sys.executable,
"-m",
"pip",
"install",
"llama-cpp-python",
],
env={**os.environ, **env_vars},
check=True,
)
except subprocess.CalledProcessError as e:
print(f"Error during installation with {backend}: {e}")

def supports_metal():
# Check for macOS version
if platform.system() == "Darwin":
mac_version = tuple(map(int, platform.mac_ver()[0].split(".")))
# Metal requires macOS 10.11 or later
if mac_version >= (10, 11):
return True
return False

# Check system capabilities
if check_command(["nvidia-smi"]):
install_llama("cuBLAS")
elif check_command(["rocminfo"]):
install_llama("hipBLAS")
elif supports_metal():
install_llama("Metal")
else:
install_llama("OpenBLAS")

print("Finished downloading `Code-Llama` interface.")

# Check if on macOS
if platform.system() == "Darwin":
# Check if it's Apple Silicon
if platform.machine() != "arm64":
print(
"Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture."
)
print(
"The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac."
)
print(
"\nTo install the correct version of Python that supports 'arm64' architecture visit:"
"https://github.com/conda-forge/miniforge"
)

else:
print("", "Installation cancelled. Exiting.", "")
return None
Loading

0 comments on commit 7cc5034

Please sign in to comment.