Skip to content

Commit

Permalink
Dockerfile changes, apply both tests
Browse files Browse the repository at this point in the history
  • Loading branch information
justinthelaw committed Sep 18, 2024
1 parent c9e7840 commit 1514ead
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 38 deletions.
73 changes: 38 additions & 35 deletions .github/workflows/e2e-vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,59 +105,62 @@ jobs:
- name: Create UDS Cluster
shell: bash
run: |
UDS_CONFIG=.github/config/uds-config.yaml DOCKER_FLAGS="--build-arg CUDA_TAG=12.1.0-base-ubuntu22.04" make create-uds-gpu-cluster
UDS_CONFIG=.github/config/uds-config.yaml DOCKER_FLAGS="--build-arg CUDA_TAG=12.2.0-base-ubuntu22.04" make create-uds-gpu-cluster
- name: Test UDS GPU Cluster
run: |
# Apply the tests in-cluster to ensure GPUs can be scheduled
uds zarf tools kubectl get nodes
uds zarf tools kubectl describe node k3d-uds-server-0
uds zarf tools kubectl get daemonset nvidia-device-plugin-daemonset -n kube-system
uds zarf tools kubectl get pods -o wide --all-namespaces
uds zarf tools kubectl exec -it daemonset/nvidia-device-plugin-daemonset -n kube-system -c nvidia-device-plugin-ctr -- nvidia-smi
# Apply the CUDA test pod
uds zarf tools kubectl apply -f packages/k3d-gpu/test/cuda-device-query.yaml
sleep 20
uds zarf tools kubectl logs -l app=gpu-pod --namespace=default
- name: Setup API and Supabase
uses: ./.github/actions/lfai-core
uds zarf tools kubectl apply -f packages/k3d-gpu/test/cuda-vector-add.yaml
sleep 20
uds zarf tools kubectl logs -l app=gpu-pod --namespace=default
- name: Setup Python
uses: ./.github/actions/python
with:
additionalOptionalDep: dev-vllm
# - name: Setup API and Supabase
# uses: ./.github/actions/lfai-core

#######
# vllm
#######
- name: Deploy vLLM
run: |
make build-vllm LOCAL_VERSION=e2e-test DOCKER_FLAGS="--build-arg MAX_CONTEXT_LENGTH=500"
# - name: Setup Python
# uses: ./.github/actions/python
# with:
# additionalOptionalDep: dev-vllm

make local-registry
make sdk-wheel LOCAL_VERSION=e2e-test
docker build --build-arg MAX_CONTEXT_LENGTH=500 --build-arg LOCAL_VERSION=e2e-test -t ghcr.io/defenseunicorns/leapfrogai/vllm:e2e-test -f packages/vllm/Dockerfile .
docker tag ghcr.io/defenseunicorns/leapfrogai/vllm:e2e-test localhost:5000/defenseunicorns/leapfrogai/vllm:e2e-test
docker push localhost:5000/defenseunicorns/leapfrogai/vllm:e2e-test
# #######
# # vllm
# #######
# - name: Deploy vLLM
# run: |
# make build-vllm LOCAL_VERSION=e2e-test DOCKER_FLAGS="--build-arg MAX_CONTEXT_LENGTH=500"

uds zarf package create packages/vllm --flavor upstream -o packages/vllm --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=e2e-test --confirm
# make local-registry
# make sdk-wheel LOCAL_VERSION=e2e-test
# docker build --build-arg MAX_CONTEXT_LENGTH=500 --build-arg LOCAL_VERSION=e2e-test -t ghcr.io/defenseunicorns/leapfrogai/vllm:e2e-test -f packages/vllm/Dockerfile .
# docker tag ghcr.io/defenseunicorns/leapfrogai/vllm:e2e-test localhost:5000/defenseunicorns/leapfrogai/vllm:e2e-test
# docker push localhost:5000/defenseunicorns/leapfrogai/vllm:e2e-test

docker image prune -af
# uds zarf package create packages/vllm --flavor upstream -o packages/vllm --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=e2e-test --confirm

uds zarf package deploy packages/vllm/zarf-package-vllm-amd64-e2e-test.tar.zst -l=trace --confirm
rm packages/vllm/zarf-package-vllm-amd64-e2e-test.tar.zst
# docker image prune -af

# Check vLLM deployment logs for issues
while [[ $(uds zarf tools kubectl get pod -l app=lfai-vllm --namespace=leapfrogai -o jsonpath='{.items[*].status.phase}') != "Running" ]]; do
echo "Waiting for pod to be ready..."
sleep 5
done
# uds zarf package deploy packages/vllm/zarf-package-vllm-amd64-e2e-test.tar.zst -l=trace --confirm
# rm packages/vllm/zarf-package-vllm-amd64-e2e-test.tar.zst

uds zarf tools kubectl logs -n leapfrogai deployment/vllm-model
# # Check vLLM deployment logs for issues
# while [[ $(uds zarf tools kubectl get pod -l app=lfai-vllm --namespace=leapfrogai -o jsonpath='{.items[*].status.phase}') != "Running" ]]; do
# echo "Waiting for pod to be ready..."
# sleep 5
# done

- name: Test vLLM
env:
MODEL_NAME: vllm
run: |
python -m pytest ./tests/e2e/test_llm_generation.py -vv
# uds zarf tools kubectl logs -n leapfrogai deployment/vllm-model

# - name: Test vLLM
# env:
# MODEL_NAME: vllm
# run: |
# python -m pytest ./tests/e2e/test_llm_generation.py -vv
3 changes: 2 additions & 1 deletion packages/k3d-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ RUN apt-get update && \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
apt-get update && \
apt-get install -y nvidia-container-toolkit-base nvidia-container-toolkit nvidia-container-runtime util-linux && \
nvidia-ctk runtime configure --runtime=containerd --set-as-default
nvidia-ctk runtime configure --runtime=containerd --set-as-default && \
systemctl restart containerd

COPY --from=k3s / / --exclude=/bin/
COPY --from=k3s /bin /bin
Expand Down
4 changes: 2 additions & 2 deletions packages/k3d-gpu/test/cuda-device-query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ spec:
restartPolicy: Never
containers:
- name: cuda-container
image: nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda11.7.1-ubuntu20.04
image: nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.2.0-ubuntu20.04
resources:
limits:
nvidia.com/gpu: "1" # requesting 1 GPU
nvidia.com/gpu: "1"
cpu: "1"
memory: 0.5Gi
tolerations:
Expand Down
1 change: 1 addition & 0 deletions packages/k3d-gpu/test/cuda-vector-add.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ spec:
nvidia.com/gpu: "1" # requesting 1 GPU
cpu: "1"
memory: 0.5Gi

tolerations:
- key: nvidia.com/gpu
operator: Exists
Expand Down

0 comments on commit 1514ead

Please sign in to comment.