From 7cc503422321e4f6c04608e2e4e3f03c790edd50 Mon Sep 17 00:00:00 2001 From: fynnfluegge Date: Sun, 24 Sep 2023 22:53:31 +0200 Subject: [PATCH] Refactored code to support different LLM hosts and added installation process for llama-cpp-python package. --- codeqai/app.py | 22 +++++-- codeqai/config.py | 76 ++++++++++++++++++++---- codeqai/constants.py | 9 ++- codeqai/embeddings.py | 31 +++++----- codeqai/llm.py | 134 +++++++++++++++++++++++++++++++++++++++++- poetry.lock | 69 +++++++++++++++++++++- pyproject.toml | 1 + 7 files changed, 302 insertions(+), 40 deletions(-) diff --git a/codeqai/app.py b/codeqai/app.py index b24a0df..0786cf3 100644 --- a/codeqai/app.py +++ b/codeqai/app.py @@ -2,15 +2,17 @@ import os from langchain.chains import ConversationalRetrievalChain -from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationSummaryMemory +from rich.console import Console +from rich.syntax import Syntax from yaspin import yaspin from codeqai import codeparser, repo from codeqai.config import (create_cache_dir, create_config, get_cache_path, load_config) -from codeqai.constants import EmbeddingsModel +from codeqai.constants import EmbeddingsModel, LllmHost from codeqai.embeddings import Embeddings +from codeqai.llm import LLM from codeqai.vector_store import VectorStore @@ -25,6 +27,7 @@ def run(): if args.action == "configure": create_config() + exit() # load config config = {} @@ -56,12 +59,15 @@ def run(): else: vector_store = VectorStore(repo_name, embeddings=embeddings_model.embeddings) - llm = ChatOpenAI(temperature=0.9, max_tokens=2048, model="gpt-3.5-turbo") + llm = LLM( + llm_host=LllmHost[config["llm-host"].upper().replace("-", "_")], + chat_model=config["chat-model"], + ) memory = ConversationSummaryMemory( - llm=llm, memory_key="chat_history", return_messages=True + llm=llm.chat_model, memory_key="chat_history", return_messages=True ) qa = ConversationalRetrievalChain.from_llm( - llm, retriever=vector_store.retriever, memory=memory + llm.chat_model, retriever=vector_store.retriever, memory=memory ) while True: @@ -73,7 +79,11 @@ def run(): similarity_result = vector_store.similarity_search(search_pattern) spinner.stop() for doc in similarity_result: - print(doc.page_content) + syntax = Syntax( + doc.page_content, "python", theme="monokai", line_numbers=True + ) + console = Console() + console.print(syntax) choice = input("[?] (C)ontinue search or (E)xit [C]:").strip().lower() diff --git a/codeqai/config.py b/codeqai/config.py index 764d27b..3329138 100644 --- a/codeqai/config.py +++ b/codeqai/config.py @@ -72,16 +72,19 @@ def create_config(): "embeddings", message="Which local embeddings model do you want to use?", choices=[ - "SentenceTransformers-all-mpnet-base-v2", "Instructor-Large", - "Ollama", + "SentenceTransformers-all-mpnet-base-v2", + "SentenceTransformers-all-MiniLM-L6-v2", ], - default="SentenceTransformers-all-mpnet-base-v2", + default="Instructor-Large", ), inquirer.List( - "llm", - message="Which local LLM do you want to use?", - choices=["Llamacpp", "Ollama", "Huggingface"], + "llm-host", + message="Which local LLM host do you want to use?", + choices=[ + "Llamacpp", + "Ollama", + ], default="Llamacpp", ), ] @@ -89,15 +92,17 @@ def create_config(): questions = [ inquirer.List( "embeddings", - message="Which embeddings do you want to use?", + message="Which remote embeddings do you want to use?", choices=["OpenAI-text-embedding-ada-002", "Azure-OpenAI"], default="OpenAI-text-embedding-ada-002", ), inquirer.List( - "llm", - message="Which LLM do you want to use?", - choices=["GPT-3.5-Turbo", "GPT-4"], - default="GPT-3.5-Turbo", + "llm-host", + message="Which remote LLM do you want to use?", + choices=[ + "OpenAI" "Azure-OpenAI", + ], + default="OpenAI", ), ] @@ -107,8 +112,55 @@ def create_config(): config = { "local": confirm["confirm"], "embeddings": answers["embeddings"], - "llm": answers["llm"], + "llm-host": answers["llm-host"], } + if answers["embeddings"] == "Azure-OpenAI": + # TODO add azure config + exit("Azure-OpenAI not implemented yet.") + + if answers["llm-host"] == "Llamacpp": + questions = [ + inquirer.Text( + "chat-model", + message="Please enter the path to the LLM model.", + default="", + ), + ] + elif answers["llm-host"] == "Ollama": + questions = [ + inquirer.List( + "chat-model", + message="Which Ollama chat model do you want to use?", + choices=[ + "llama2", + "llama2:13b", + "llama2:70b", + "codellama", + ], + default="gpt-3.5-turbo", + ), + ] + elif answers["llm-host"] == "Azure-OpenAI": + # TODO add azure config + exit("Azure-OpenAI not implemented yet.") + elif answers["llm-host"] == "OpenAI": + questions = [ + inquirer.List( + "chat-model", + message="Which OpenAI chat model do you want to use?", + choices=[ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + ], + default="gpt-3.5-turbo", + ), + ] + + answers = inquirer.prompt(questions) + if answers and answers["chat-model"]: + config["chat-model"] = answers["chat-model"] + save_config(config) return config diff --git a/codeqai/constants.py b/codeqai/constants.py index 87566a1..cf87867 100644 --- a/codeqai/constants.py +++ b/codeqai/constants.py @@ -20,11 +20,14 @@ class Language(Enum): class EmbeddingsModel(Enum): SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2 = "SentenceTransformers-all-mpnet-base-v2" + SENTENCETRANSFORMERS_ALL_MINILM_L6_V2 = "SentenceTransformers-all-MiniLM-L6-v2" INSTRUCTOR_LARGE = "Instructor-Large" - OLLAMA = "Ollama" OPENAI_TEXT_EMBEDDING_ADA_002 = "OpenAI-text-embedding-ada-002" AZURE_OPENAI = "Azure-OpenAI" -class LocalLLMModel(Enum): - GPT_3_5_TURBO = "gpt-3.5-turbo" +class LllmHost(Enum): + LLAMACPP = "Llamacpp" + OLLAMA = "Ollama" + OPENAI = "OpenAI" + AZURE_OPENAI = "Azure-OpenAI" diff --git a/codeqai/embeddings.py b/codeqai/embeddings.py index b6778bc..7e7a61c 100644 --- a/codeqai/embeddings.py +++ b/codeqai/embeddings.py @@ -11,31 +11,30 @@ class Embeddings: def __init__( self, local=False, model=EmbeddingsModel.OPENAI_TEXT_EMBEDDING_ADA_002 ): - self.model = model - if not local: if model == EmbeddingsModel.OPENAI_TEXT_EMBEDDING_ADA_002: self.embeddings = OpenAIEmbeddings( client=None, model="text_embedding_ada_002" ) else: - if model == EmbeddingsModel.OLLAMA: - pass - else: + try: + import sentence_transformers # noqa: F401 + except ImportError: + self._install_sentence_transformers() + + if model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2: + self.embeddings = HuggingFaceEmbeddings() + elif model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MINILM_L6_V2: + self.embeddings = HuggingFaceEmbeddings( + model_name="sentence-transformers/all-MiniLM-L6-v2", + ) + elif model == EmbeddingsModel.INSTRUCTOR_LARGE: try: - import sentence_transformers # noqa: F401 + from InstructorEmbedding import INSTRUCTOR # noqa: F401 except ImportError: - self._install_sentence_transformers() + self._install_instructor_embedding() - if model == EmbeddingsModel.SENTENCETRANSFORMERS_ALL_MPNET_BASE_V2: - self.embeddings = HuggingFaceEmbeddings() - elif model == EmbeddingsModel.INSTRUCTOR_LARGE: - try: - from InstructorEmbedding import \ - INSTRUCTOR # noqa: F401 - except ImportError: - self._install_instructor_embedding() - self.embeddings = HuggingFaceInstructEmbeddings() + self.embeddings = HuggingFaceInstructEmbeddings() def _install_sentence_transformers(self): question = [ diff --git a/codeqai/llm.py b/codeqai/llm.py index c3ae207..590c109 100644 --- a/codeqai/llm.py +++ b/codeqai/llm.py @@ -1,3 +1,133 @@ -from langchain.chains import ConversationalRetrievalChain +import os +import subprocess +import sys + +import inquirer +from langchain.callbacks.manager import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.chat_models import ChatOpenAI -from langchain.memory import ConversationSummaryMemory +from langchain.llms import LlamaCpp, Ollama + +from codeqai import utils +from codeqai.constants import LllmHost + + +class LLM: + def __init__(self, llm_host: LllmHost, chat_model: str): + if llm_host == LllmHost.OPENAI: + self.chat_model = ChatOpenAI( + temperature=0.9, max_tokens=2048, model=chat_model + ) + elif llm_host == LllmHost.LLAMACPP: + self.install_llama_cpp() + self.chat_model = LlamaCpp( + model_path=chat_model, + temperature=0.9, + max_tokens=2048, + verbose=False, + ) + elif llm_host == LllmHost.OLLAMA: + self.chat_model = Ollama( + base_url="http://localhost:11434", + model=chat_model, + callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), + ) + + def install_llama_cpp(self): + try: + from llama_cpp import Llama # noqa: F401 + except ImportError: + question = [ + inquirer.Confirm( + "confirm", + message=f"Local LLM interface package not found. Install {utils.get_bold_text('llama-cpp-python')}?", + default=True, + ), + ] + + answers = inquirer.prompt(question) + if answers and answers["confirm"]: + import platform + + def check_command(command): + try: + subprocess.run( + command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return True + except subprocess.CalledProcessError: + return False + except FileNotFoundError: + return False + + def install_llama(backend): + env_vars = {"FORCE_CMAKE": "1"} + + if backend == "cuBLAS": + env_vars["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on" + elif backend == "hipBLAS": + env_vars["CMAKE_ARGS"] = "-DLLAMA_HIPBLAS=on" + elif backend == "Metal": + env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on" + else: # Default to OpenBLAS + env_vars[ + "CMAKE_ARGS" + ] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" + + try: + subprocess.run( + [ + sys.executable, + "-m", + "pip", + "install", + "llama-cpp-python", + ], + env={**os.environ, **env_vars}, + check=True, + ) + except subprocess.CalledProcessError as e: + print(f"Error during installation with {backend}: {e}") + + def supports_metal(): + # Check for macOS version + if platform.system() == "Darwin": + mac_version = tuple(map(int, platform.mac_ver()[0].split("."))) + # Metal requires macOS 10.11 or later + if mac_version >= (10, 11): + return True + return False + + # Check system capabilities + if check_command(["nvidia-smi"]): + install_llama("cuBLAS") + elif check_command(["rocminfo"]): + install_llama("hipBLAS") + elif supports_metal(): + install_llama("Metal") + else: + install_llama("OpenBLAS") + + print("Finished downloading `Code-Llama` interface.") + + # Check if on macOS + if platform.system() == "Darwin": + # Check if it's Apple Silicon + if platform.machine() != "arm64": + print( + "Warning: You are using Apple Silicon (M1/M2) Mac but your Python is not of 'arm64' architecture." + ) + print( + "The llama.ccp x86 version will be 10x slower on Apple Silicon (M1/M2) Mac." + ) + print( + "\nTo install the correct version of Python that supports 'arm64' architecture visit:" + "https://github.com/conda-forge/miniforge" + ) + + else: + print("", "Installation cancelled. Exiting.", "") + return None diff --git a/poetry.lock b/poetry.lock index 377ae91..3b9198e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -603,6 +603,30 @@ files = [ pydantic = ">=1,<3" requests = ">=2,<3" +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "marshmallow" version = "3.20.1" @@ -623,6 +647,17 @@ docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "s lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] tests = ["pytest", "pytz", "simplejson"] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "multidict" version = "6.0.4" @@ -978,6 +1013,20 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + [[package]] name = "pytest" version = "7.4.2" @@ -1193,6 +1242,24 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rich" +version = "13.5.3" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.5.3-py3-none-any.whl", hash = "sha256:9257b468badc3d347e146a4faa268ff229039d4c2d176ab0cffb4c4fbc73d5d9"}, + {file = "rich-13.5.3.tar.gz", hash = "sha256:87b43e0543149efa1253f485cd845bb7ee54df16c9617b8a893650ab84b4acb6"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "setuptools" version = "68.2.2" @@ -1697,4 +1764,4 @@ termcolor = ">=2.3,<3.0" [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "5329bb05a89056464d4484dd65184f8b4a4c6e641d1e09fb4e55a1b77e334a16" +content-hash = "be035d36e7707788221fd674228dd55c10c0a2313b1b164b30a30245aa39b49b" diff --git a/pyproject.toml b/pyproject.toml index faf7b19..f154565 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ openai = "^0.28.0" pyyaml = "^6.0.1" tree-sitter = "^0.20.2" tree-sitter-languages = "^1.7.0" +rich = "^13.5.3" [tool.poetry.scripts] codeqai = "codeqai.__main__:main"