Skip to content

Commit

Permalink
ci: test GPU
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Sep 28, 2023
1 parent a28ab18 commit 2626d4d
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 0 deletions.
83 changes: 83 additions & 0 deletions .github/workflows/test-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---
name: 'GPU tests'

on:
pull_request:
push:
branches:
- master
tags:
- '*'

concurrency:
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true

jobs:
ubuntu-latest:
runs-on: self-hosted
strategy:
matrix:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
# - name: Dependencies
# run: |
# # This fixes libc6-dev installations errors on containers...
# sudo rm -rfv /run/systemd/system

# sudo apt-get update
# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ffmpeg nvidia-cuda-toolkit cmake

# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates cmake curl patch
# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libopencv-dev
# [ ! -e /usr/include/opencv2 ] && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y pip wget
# sudo pip install -r extra/requirements.txt
# if [ ! -d /build ]; then
# sudo mkdir /build && \
# sudo chmod -R 777 /build && cd /build && \
# curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
# tar -xzvf - && \
# mkdir -p "spdlog-1.11.0/build" && \
# cd "spdlog-1.11.0/build" && \
# cmake .. && \
# make -j8 && \
# sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
# cd /build && \
# mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
# curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
# tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
# sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
# sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
# sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
# fi
- name: Build
run: |
if [ ! -e /run/systemd/system ]; then
sudo mkdir /run/systemd/system
fi
make prepare-e2e run-e2e-image test-e2e
- name: Release space from worker ♻
if: always()
run: |
sudo rm -rf build || true
sudo rm -rf bin || true
sudo rm -rf dist || true
make clean || true
make teardown-e2e || true
docker system prune -f -a --volumes || true
20 changes: 20 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=git

RANDOM := $(shell bash -c 'echo $$RANDOM')

VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=
Expand All @@ -61,6 +63,9 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)

# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1

ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
Expand Down Expand Up @@ -313,6 +318,21 @@ test: prepare test-models/testmodel grpcs
$(MAKE) test-tts
$(MAKE) test-stablediffusion

prepare-e2e:
wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin
docker build --build-arg BUILD_TYPE=cublas --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .

run-e2e-image:
docker run -p 5390:8080 -d --rm -v $(abspath ./tests/e2e-fixtures):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

test-e2e:
@echo 'Running e2e tests'
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390 $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e

teardown-e2e:
rm -rf ./tests/e2e-fixtures/ggllm-test-model.bin
docker stop $$(docker ps -q --filter ancestor=localai-tests)

test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
Expand Down
19 changes: 19 additions & 0 deletions tests/e2e-fixtures/gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
context_size: 2048
mirostat: 2
mirostat_tau: 5.0
mirostat_eta: 0.1
f16: true
threads: 1
gpu_layers: 90
name: gpt-4
mmap: true
parameters:
model: ggllm-test-model.bin
#model: llama2-22b-daydreamer-v3.ggmlv3.q6_K.bin
#model: wizardlm-30b-uncensored.ggmlv3.q4_K_M.bin
#model: upstage-llama-2-70b-instruct-v2.ggmlv3.q2_K.bin
rope_freq_base: 10000
rope_freq_scale: 1
temperature: 0.2
top_k: 40
top_p: 0.95
18 changes: 18 additions & 0 deletions tests/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package e2e_test

import (
"os"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var (
localAIURL = os.Getenv("LOCALAI_API")
)

func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI E2E test suite")
}
78 changes: 78 additions & 0 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package e2e_test

import (
"context"
"errors"
"io"
"os/exec"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
)

var _ = Describe("E2E test", func() {
var client *openai.Client
var client2 *openaigo.Client

Context("API with ephemeral models", func() {
BeforeEach(func() {
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = localAIURL

client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL

// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})

// Check that the GPU was used
AfterEach(func() {
// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
cmd := exec.Command("/bin/bash", "-xce", "docker logs $$(docker ps -q --filter ancestor=localai-tests)")
out, err := cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())
Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"))
Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"))
})

Context("Generates text", func() {
It("streams chat tokens", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(models.Models).ToNot(BeEmpty())

model := models.Models[0].ID
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
defer stream.Close()

tokens := 0
text := ""
for {
response, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}

Expect(err).ToNot(HaveOccurred())
text += response.Choices[0].Delta.Content
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))

Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
})
})
})

0 comments on commit 2626d4d

Please sign in to comment.