diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 5968a8c7..76381c16 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -49,7 +49,7 @@ jobs: - name: Download model working-directory: ./model_servers/llamacpp_python/ - run: make llama-2-7b-chat.Q5_K_S.gguf + run: make mistral-7b-instruct-v0.1.Q4_K_M.gguf - name: Set up Python uses: actions/setup-python@v5.0.0 diff --git a/ai-lab-recipes-images.md b/ai-lab-recipes-images.md index 85ec8f4d..25e97e4b 100644 --- a/ai-lab-recipes-images.md +++ b/ai-lab-recipes-images.md @@ -1,8 +1,8 @@ ## Images (x86_64, aarch64) currently built from GH Actions in this repository -- quay.io/redhat-et/locallm-model-service:latest +- quay.io/ai-lab/llamacpp-python:latest - quay.io/redhat-et/locallm-text-summarizer:latest -- quay.io/redhat-et/locallm-chatbot:latest +- quay.io/ai-lab/chatbot:latest - quay.io/redhat-et/locallm-rag:latest - quay.io/redhat-et/locallm-codegen:latest - quay.io/redhat-et/locallm-chromadb:latest @@ -11,9 +11,7 @@ ## Model Images (x86_64, aarch64) currently in `quay.io/redhat-et/locallm-*` -- quay.io/redhat-et/locallm-llama-2-7b:latest - - [model download link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf) -- quay.io/redhat-et/locallm-mistral-7b-gguf:latest - - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf) +- quay.io/ai-lab/mistral-7b-instruct:latest + - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf) - quay.io/redhat-et/locallm-codellama-7b-gguf:latest - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf) diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 0e51b103..ec152e0f 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -5,13 +5,16 @@ build: llama-2-7b-chat.Q5_K_S.gguf: curl -s -S -L -f https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@ +mistral-7b-instruct-v0.1.Q4_K_M.gguf: + curl -s -S -L -f https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@ + .PHONY: install install: pip install -r tests/requirements-test.txt .PHONY: run run: - podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers + podman run -it -d -p 8001:8001 -v ./models:/locallm/models:ro,Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf -e HOST=0.0.0.0 -e PORT=8001 --net=host ghcr.io/redhat-et/model_servers .PHONY: test test: diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md index 90541ecf..98b7a5da 100644 --- a/model_servers/llamacpp_python/README.md +++ b/model_servers/llamacpp_python/README.md @@ -20,7 +20,7 @@ At the time of this writing, 2 models are known to work with this service - **Llama2-7b** - Download URL: [https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf) - **Mistral-7b** - - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf) + - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf) ```bash cd ../models @@ -29,7 +29,7 @@ cd ../ ``` or ```bash -make -f Makefile models/llama-2-7b-chat.Q5_K_S.gguf +make -f Makefile models/mistral-7b-instruct-v0.1.Q4_K_M.gguf ``` ### Deploy Model Service diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index a9af975b..26fa4014 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -12,7 +12,7 @@ ) ], extra_environment_variables={ - "MODEL_PATH": "models/llama-2-7b-chat.Q5_K_S.gguf", + "MODEL_PATH": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "HOST": "0.0.0.0", "PORT": "8001" }, diff --git a/model_servers/llamacpp_python/tooling_options.ipynb b/model_servers/llamacpp_python/tooling_options.ipynb index 5f830aa4..ebad2174 100644 --- a/model_servers/llamacpp_python/tooling_options.ipynb +++ b/model_servers/llamacpp_python/tooling_options.ipynb @@ -23,7 +23,7 @@ "This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n", "\n", "```bash\n", - "podman run -it -p 8000:8000 -v /locallm/models:/locallm/models:Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf playground\n", + "podman run -it -p 8000:8000 -v /locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n", "```" ] }, diff --git a/models/Containerfile b/models/Containerfile index e359bf7c..50f0abec 100644 --- a/models/Containerfile +++ b/models/Containerfile @@ -1,9 +1,9 @@ #https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf +#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf #https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf #https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin # podman build --build-arg MODEL_URL=https://... -t quay.io/yourimage . FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13 -ARG MODEL_URL +ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf WORKDIR /model ADD $MODEL_URL . diff --git a/recipes/natural_language_processing/chatbot/ai-lab.yaml b/recipes/natural_language_processing/chatbot/ai-lab.yaml index 812c9d28..ddac72f4 100644 --- a/recipes/natural_language_processing/chatbot/ai-lab.yaml +++ b/recipes/natural_language_processing/chatbot/ai-lab.yaml @@ -15,7 +15,7 @@ application: - amd64 ports: - 8001 - image: quay.io/redhat-et/locallm-model-service:latest + image: quay.io/ai-lab/llamacppp-python:latest - name: streamlit-chat-app contextdir: . containerfile: builds/Containerfile @@ -24,4 +24,4 @@ application: - amd64 ports: - 8501 - image: quay.io/redhat-et/locallm-chatbot:latest \ No newline at end of file + image: quay.io/ai-lab/chatbot:latest diff --git a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml index 540d8d0d..c7b47906 100644 --- a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml +++ b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml @@ -8,7 +8,7 @@ spec: initContainers: - name: model-file image: quay.io/ai-lab/mistral-7b-instruct:latest - command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] + command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"] volumeMounts: - name: model-file mountPath: /shared @@ -29,7 +29,7 @@ spec: - name: PORT value: 8001 - name: MODEL_PATH - value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf + value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf image: quay.io/ai-lab/llamacpp-python:latest name: chatbot-model-service ports: diff --git a/recipes/natural_language_processing/code-generation/ai-lab.yaml b/recipes/natural_language_processing/code-generation/ai-lab.yaml index 56331ecf..fa5e7eb9 100644 --- a/recipes/natural_language_processing/code-generation/ai-lab.yaml +++ b/recipes/natural_language_processing/code-generation/ai-lab.yaml @@ -15,7 +15,7 @@ application: - amd64 ports: - 8001 - image: quay.io/redhat-et/locallm-model-service:latest + image: quay.io/ai-lab/llamacpp-python:latest - name: codegen-app contextdir: . containerfile: builds/Containerfile diff --git a/recipes/natural_language_processing/code-generation/quadlet/codegen.image b/recipes/natural_language_processing/code-generation/quadlet/codegen.image index f64815c8..8e733138 100644 --- a/recipes/natural_language_processing/code-generation/quadlet/codegen.image +++ b/recipes/natural_language_processing/code-generation/quadlet/codegen.image @@ -3,5 +3,5 @@ WantedBy=codegen.service [Image] Image=quay.io/redhat-et/locallm-codellama-7b-gguf:latest -Image=quay.io/redhat-et/locallm-model-service:latest +Image=quay.io/ai-lab/llamacpp-python:latest Image=quay.io/redhat-et/locallm-codegen:latest diff --git a/recipes/natural_language_processing/rag/ai-lab.yaml b/recipes/natural_language_processing/rag/ai-lab.yaml index f22a7195..cc133910 100644 --- a/recipes/natural_language_processing/rag/ai-lab.yaml +++ b/recipes/natural_language_processing/rag/ai-lab.yaml @@ -15,7 +15,7 @@ application: - amd64 ports: - 8001 - image: quay.io/redhat-et/locallm-model-service:latest + image: quay.io/ai-lab/llamacpp-python:latest - name: chromadb-server contextdir: ../../../vector_dbs/chromadb containerfile: Containerfile @@ -34,4 +34,4 @@ application: - amd64 ports: - 8501 - image: quay.io/redhat-et/locallm-rag:latest \ No newline at end of file + image: quay.io/redhat-et/locallm-rag:latest diff --git a/recipes/natural_language_processing/summarizer/ai-lab.yaml b/recipes/natural_language_processing/summarizer/ai-lab.yaml index 22e72778..f3bbced4 100644 --- a/recipes/natural_language_processing/summarizer/ai-lab.yaml +++ b/recipes/natural_language_processing/summarizer/ai-lab.yaml @@ -15,7 +15,7 @@ application: - amd64 ports: - 8001 - image: quay.io/redhat-et/locallm-model-service:latest + image: quay.io/ai-lab/llamacpp-python:latest - name: streamlit-summary-app contextdir: . containerfile: builds/Containerfile @@ -24,4 +24,4 @@ application: - amd64 ports: - 8501 - image: quay.io/redhat-et/locallm-text-summarizer:latest \ No newline at end of file + image: quay.io/redhat-et/locallm-text-summarizer:latest diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.image b/recipes/natural_language_processing/summarizer/quadlet/summarizer.image index 9511d5df..c4336439 100644 --- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.image +++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.image @@ -2,6 +2,6 @@ WantedBy=summarizer.service [Image] -Image=quay.io/redhat-et/locallm-mistral-7b-gguf:latest -Image=quay.io/redhat-et/locallm-model-service:latest +Image=quay.io/ai-lab/mistral-7b-instruct:latest +Image=quay.io/ai-lab/llamacpp-python:latest Image=quay.io/redhat-et/locallm-text-summarizer:latest diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml index a1d8c708..94546b92 100644 --- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml +++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml @@ -7,8 +7,8 @@ metadata: spec: initContainers: - name: model-file - image: quay.io/redhat-et/locallm-mistral-7b-gguf:latest - command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] + image: quay.io/ai-lab/mistral-7b-instruct:latest + command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "/shared/"] volumeMounts: - name: model-file mountPath: /shared @@ -29,8 +29,8 @@ spec: - name: PORT value: 8001 - name: MODEL_PATH - value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf - image: quay.io/redhat-et/locallm-model-service:latest + value: /model/mistral-7b-instruct-v0.1.Q4_K_M.gguf + image: quay.io/ai-lab/llamacpp-python:latest name: summarizer-model-service ports: - containerPort: 8001