Skip to content

Commit

Permalink
Merge branch 'main' into renovate/actions-upload-artifact-4.x
Browse files Browse the repository at this point in the history
  • Loading branch information
justinthelaw authored Oct 4, 2024
2 parents 82c7271 + fd3cbc4 commit b9291a4
Show file tree
Hide file tree
Showing 31 changed files with 661 additions and 225 deletions.
2 changes: 1 addition & 1 deletion .github/actions/release/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ runs:
run: |
docker buildx build --build-arg LOCAL_VERSION=${{ inputs.releaseTag }} -t ghcr.io/defenseunicorns/leapfrogai/vllm:${{ inputs.releaseTag }} --push -f packages/vllm/Dockerfile .
zarf package create packages/vllm --set=IMAGE_VERSION=${{ inputs.releaseTag }} --flavor upstream --confirm
ZARF_CONFIG=packages/vllm/zarf-config.yaml zarf package create packages/vllm --set=IMAGE_VERSION=${{ inputs.releaseTag }} --flavor upstream --confirm
zarf package publish zarf-package-vllm-amd64-${{ inputs.releaseTag }}.tar.zst oci://ghcr.io/defenseunicorns/packages${{ inputs.subRepository }}leapfrogai
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,4 @@ jobs:
##########
- name: Build vLLM
run: |
make build-vllm LOCAL_VERSION=e2e-test
make build-vllm LOCAL_VERSION=e2e-test ZARF_CONFIG=packages/vllm/zarf-config.yaml
3 changes: 2 additions & 1 deletion .github/workflows/nightly-snapshot-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ defaults:

env:
SNAPSHOT_VERSION: snapshot-latest
SNAPSHOT_SUB_REPOSITORY: /uds/snapshots
SNAPSHOT_SUB_REPOSITORY: /uds/snapshots/

permissions:
contents: read
Expand Down Expand Up @@ -170,6 +170,7 @@ jobs:
env:
ANON_KEY: ${{ steps.generate_secrets.outputs.ANON_KEY }}
SERVICE_KEY: ${{ steps.generate_secrets.outputs.SERVICE_KEY }}
LEAPFROGAI_MODEL: llama-cpp-python
run: |
python -m pytest -vvv -s ./tests/e2e
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ build-vllm: local-registry docker-vllm ## Build the vllm container and Zarf pack
docker push ${DOCKER_FLAGS} localhost:${REG_PORT}/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm
ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package create packages/vllm --flavor ${FLAVOR} -a ${ARCH} -o packages/vllm --registry-override=ghcr.io=localhost:${REG_PORT} --insecure --set IMAGE_VERSION=${LOCAL_VERSION} ${ZARF_FLAGS} --confirm

docker-text-embeddings: sdk-wheel
## Build the image (and tag it for the local registry)
Expand Down Expand Up @@ -263,7 +263,7 @@ silent-deploy-llama-cpp-python-package:
silent-deploy-vllm-package:
@echo "Starting VLLM deployment..."
@mkdir -p .logs
@uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1
@ZARF_CONFIG=packages/vllm/zarf-config.yaml uds zarf package deploy packages/vllm/zarf-package-vllm-${ARCH}-${LOCAL_VERSION}.tar.zst ${ZARF_FLAGS} --confirm > .logs/deploy-vllm.log 2>&1
@echo "VLLM deployment completed"

silent-deploy-text-embeddings-package:
Expand Down
27 changes: 25 additions & 2 deletions bundles/dev/gpu/uds-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,31 @@ variables:
gpu_limit: 0 # runs on CPU until GPU limit is increased

vllm:
gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only
#tensor_parallel_size: 1 # TODO: reintroduce when vllm changes get pulled in
trust_remote_code: "True"
tensor_parallel_size: "1"
enforce_eager: "False"
gpu_memory_utilization: "0.90"
worker_use_ray: "True"
engine_use_ray: "True"
quantization: "None"
load_format: "auto"
# LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
max_context_length: "32768"
stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
prompt_format_chat_system: "SYSTEM: {}\n"
prompt_format_chat_user: "USER: {}\n"
prompt_format_chat_assistant: "ASSISTANT: {}\n"
temperature: "0.1"
top_p: "1.0"
top_k: "0"
repetition_penalty: "1.0"
max_new_tokens: "8192"
# Pod deployment configuration
gpu_limit: "1"
gpu_runtime: "nvidia"
pvc_size: "15Gi"
pvc_access_mode: "ReadWriteOnce"
pvc_storage_class: "local-path"

supabase:
domain: "uds.dev"
Expand Down
27 changes: 25 additions & 2 deletions bundles/latest/gpu/uds-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,31 @@ variables:
gpu_limit: 0 # runs on CPU until GPU limit is increased

vllm:
gpu_limit: 1 # if <1, vllm won't work, VLLM is GPU only
#tensor_parallel_size: 1 # TODO: reintroduce when vllm changes get pulled in
trust_remote_code: "True"
tensor_parallel_size: "1"
enforce_eager: "False"
gpu_memory_utilization: "0.90"
worker_use_ray: "True"
engine_use_ray: "True"
quantization: "None"
load_format: "auto"
# LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
max_context_length: "32768"
stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
prompt_format_chat_system: "SYSTEM: {}\n"
prompt_format_chat_user: "USER: {}\n"
prompt_format_chat_assistant: "ASSISTANT: {}\n"
temperature: "0.1"
top_p: "1.0"
top_k: "0"
repetition_penalty: "1.0"
max_new_tokens: "8192"
# Pod deployment configuration
gpu_limit: "1"
gpu_runtime: "nvidia"
pvc_size: "15Gi"
pvc_access_mode: "ReadWriteOnce"
pvc_storage_class: "local-path"

supabase:
domain: "uds.dev"
Expand Down
8 changes: 4 additions & 4 deletions docs/DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@ Please first see the pre-requisites listed on the LeapfrogAI documentation websi

It is **_HIGHLY RECOMMENDED_** that PyEnv be installed on your machine, and a new virtual environment is created for every new development branch.

Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.6:
Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.9:

```bash
# install the correct python version
pyenv install 3.11.6
pyenv install 3.11.9

# create a new virtual environment named "leapfrogai"
pyenv virtualenv 3.11.6 leapfrogai
pyenv virtualenv 3.11.9 leapfrogai

# activate the virtual environment
pyenv activate leapfrogai
```

If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.6:
If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.9:

```bash
sudo apt-get install build-essential zlib1g-dev libffi-dev \
Expand Down
24 changes: 24 additions & 0 deletions packages/api/chart/templates/istio-admin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{- if .Capabilities.APIVersions.Has "security.istio.io/v1beta1" }}
apiVersion: security.istio.io/v1beta1
kind: AuthorizationPolicy
metadata:
name: api-block-metrics-access-from-public-gateway
namespace: {{ .Release.Namespace }}
spec:
selector:
matchLabels:
{{- include "chart.selectorLabels" . | nindent 6 }}
action: DENY
rules:
- to:
- operation:
ports:
- "8080"
paths:
- /metrics*
from:
- source:
notNamespaces:
- istio-admin-gateway
- monitoring
{{- end }}
5 changes: 5 additions & 0 deletions packages/api/chart/templates/uds-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ metadata:
labels:
{{- include "chart.labels" . | nindent 4 }}
spec:
monitor:
- portName: http
targetPort: {{ .Values.api.service.port }}
selector:
{{- include "chart.selectorLabels" . | nindent 8 }}
network:
expose:
- service: {{ include "chart.fullname" . }}
Expand Down
25 changes: 12 additions & 13 deletions packages/vllm/.env.example
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
export LAI_HF_HUB_ENABLE_HF_TRANSFER="1"
export LAI_REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ"
export LAI_REVISION="gptq-4bit-32g-actorder_True"
export LAI_QUANTIZATION="gptq"
export LAI_TENSOR_PARALLEL_SIZE=1
export LAI_MODEL_SOURCE=".model/"
export LAI_MAX_CONTEXT_LENGTH=32768
export LAI_STOP_TOKENS='["</s>","<|endoftext|>","<|im_end|>"]'
export LAI_PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n"
export LAI_PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n"
export LAI_PROMPT_FORMAT_CHAT_USER="USER: {}\n"
export LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=1.0
export LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=0
LFAI_REPO_ID="TheBloke/SynthIA-7B-v2.0-GPTQ"
LFAI_REVISION="gptq-4bit-32g-actorder_True"

VLLM_TENSOR_PARALLEL_SIZE=1
VLLM_TRUST_REMOTE_CODE=True
VLLM_MAX_CONTEXT_LENGTH=32768
VLLM_ENFORCE_EAGER=False
VLLM_GPU_MEMORY_UTILIZATION=0.90
VLLM_WORKER_USE_RAY=True
VLLM_ENGINE_USE_RAY=True
VLLM_QUANTIZATION=None
VLLM_LOAD_FORMAT=auto
54 changes: 15 additions & 39 deletions packages/vllm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS builder
# set SDK location
# set the pyenv and Python versions
ARG SDK_DEST=src/leapfrogai_sdk/build \
PYTHON_VERSION=3.11.6 \
PYENV_GIT_TAG=v2.4.8
PYTHON_VERSION=3.11.9 \
PYENV_GIT_TAG=v2.4.8\
COMPONENT_DIRECTORY="packages/vllm"

# use root user for deps installation and nonroot user creation
USER root
Expand Down Expand Up @@ -41,7 +42,7 @@ USER nonroot
# copy-in SDK from sdk stage and vllm source code from host
WORKDIR /home/leapfrogai
COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST}
COPY --chown=nonroot:nonroot packages/vllm packages/vllm
COPY --chown=nonroot:nonroot ${COMPONENT_DIRECTORY} packages/vllm

# create virtual environment for light-weight portability and minimal libraries
RUN curl https://pyenv.run | bash && \
Expand All @@ -54,10 +55,10 @@ RUN curl https://pyenv.run | bash && \
ENV PYENV_ROOT="/home/nonroot/.pyenv" \
PATH="/home/nonroot/.pyenv/bin:$PATH"

# Install Python 3.11.6, set it as global, and create a venv
# Install Python, set it as global, and create a venv
RUN . ~/.bashrc && \
PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.6 && \
pyenv global 3.11.6 && \
PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.11.9 && \
pyenv global ${PYTHON_VERSION} && \
pyenv exec python -m venv .venv

# set path to venv python
Expand All @@ -67,26 +68,15 @@ RUN rm -f packages/vllm/build/*.whl && \
python -m pip wheel packages/vllm -w packages/vllm/build --find-links=${SDK_DEST} && \
pip install packages/vllm/build/lfai_vllm*.whl --no-index --find-links=packages/vllm/build/

#################
# FINAL CONTAINER
#################

FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04

# set SDK location
ARG SDK_DEST=src/leapfrogai_sdk/build

# model-specific arguments
ARG ARG HF_HUB_ENABLE_HF_TRANSFER="1" \
REPO_ID="TheBloke/Synthia-7B-v2.0-GPTQ" \
REVISION="gptq-4bit-32g-actorder_True" \
MODEL_SOURCE="/data/.model/" \
MAX_CONTEXT_LENGTH=32768 \
STOP_TOKENS='["</s>"]' \
PROMPT_FORMAT_CHAT_SYSTEM="SYSTEM: {}\n" \
PROMPT_FORMAT_CHAT_USER="USER: {}\n" \
PROMPT_FORMAT_CHAT_ASSISTANT="ASSISTANT: {}\n" \
PROMPT_FORMAT_DEFAULTS_TOP_P=1.0 \
PROMPT_FORMAT_DEFAULTS_TOP_K=0 \
TENSOR_PARALLEL_SIZE=1 \
QUANTIZATION="gptq"

# setup nonroot user and permissions
USER root
RUN groupadd -g 65532 vglusers && \
Expand All @@ -101,24 +91,10 @@ COPY --from=sdk --chown=nonroot:nonroot /leapfrogai/${SDK_DEST} ./${SDK_DEST}
COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/.venv /home/leapfrogai/.venv
COPY --from=builder --chown=nonroot:nonroot /home/leapfrogai/packages/vllm/src /home/leapfrogai/packages/vllm/src
# copy-in python binaries
COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/3.11.6/ /home/nonroot/.pyenv/versions/3.11.6/

# load ARG values into env variables for pickup by confz
ENV LAI_HF_HUB_ENABLE_HF_TRANSFER=${HF_HUB_ENABLE_HF_TRANSFER} \
LAI_REPO_ID=${REPO_ID} \
LAI_REVISION=${REVISION} \
LAI_MODEL_SOURCE=${MODEL_SOURCE} \
LAI_MAX_CONTEXT_LENGTH=${MAX_CONTEXT_LENGTH} \
LAI_STOP_TOKENS=${STOP_TOKENS} \
LAI_PROMPT_FORMAT_CHAT_SYSTEM=${PROMPT_FORMAT_CHAT_SYSTEM} \
LAI_PROMPT_FORMAT_CHAT_USER=${PROMPT_FORMAT_CHAT_USER} \
LAI_PROMPT_FORMAT_CHAT_ASSISTANT=${PROMPT_FORMAT_CHAT_ASSISTANT} \
LAI_PROMPT_FORMAT_DEFAULTS_TOP_P=${PROMPT_FORMAT_DEFAULTS_TOP_P} \
LAI_PROMPT_FORMAT_DEFAULTS_TOP_K=${PROMPT_FORMAT_DEFAULTS_TOP_K} \
LAI_TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} \
LAI_QUANTIZATION=${QUANTIZATION} \
# remove vLLM callback to stats server
VLLM_NO_USAGE_STATS=1
COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/ /home/nonroot/.pyenv/versions/${PYTHON_VERSION}/

# remove vLLM callback to stats server
ENV VLLM_NO_USAGE_STATS=1

ENV PATH="/home/leapfrogai/.venv/bin:$PATH"

Expand Down
25 changes: 23 additions & 2 deletions packages/vllm/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
ARCH ?= amd64
LOCAL_VERSION ?= $(shell git rev-parse --short HEAD)
DOCKER_FLAGS :=

install:
python -m pip install ../../src/leapfrogai_sdk
python -m pip install -e ".[dev]"

dev:
python -m leapfrogai_sdk.cli --app-dir=src/ main:Model
download:
@env $$(cat .env | xargs) python src/model_download.py

dev: download
@env $$(cat .env | xargs) python -m leapfrogai_sdk.cli --app-dir=src/ main:Model

docker: download
docker build ${DOCKER_FLAGS} \
--platform=linux/${ARCH} \
--build-arg LOCAL_VERSION=${LOCAL_VERSION} \
--build-arg COMPONENT_DIRECTORY="./" \
-t ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} \
-f ./Dockerfile .

docker run -it --rm \
--env-file ./.env \
-v $(PWD)/config.yaml:/home/leapfrogai/config.yaml \
-v $(PWD)/.model:/home/leapfrogai/.model \
ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}
Loading

0 comments on commit b9291a4

Please sign in to comment.