From 89cf25e71c02e202954557714a48b4e9f0a307f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 22 Sep 2023 15:50:36 +0200
Subject: [PATCH] ci: test GPU

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/test-gpu.yml | 80 ++++++++++++++++++++++++++++++++++
 Makefile                       | 16 +++++++
 tests/e2e-fixtures/gpu.yaml    | 19 ++++++++
 tests/e2e/e2e_suite_test.go    | 18 ++++++++
 tests/e2e/e2e_test.go          | 78 +++++++++++++++++++++++++++++++++
 5 files changed, 211 insertions(+)
 create mode 100644 .github/workflows/test-gpu.yml
 create mode 100644 tests/e2e-fixtures/gpu.yaml
 create mode 100644 tests/e2e/e2e_suite_test.go
 create mode 100644 tests/e2e/e2e_test.go

diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml
new file mode 100644
index 000000000000..b7d493275c9c
--- /dev/null
+++ b/.github/workflows/test-gpu.yml
@@ -0,0 +1,80 @@
+---
+name: 'GPU tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+concurrency:
+  group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  ubuntu-latest:
+    runs-on: self-hosted
+    strategy:
+      matrix:
+        go-version: ['1.21.x']
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Setup Go ${{ matrix.go-version }}
+        uses: actions/setup-go@v4
+        with:
+          go-version: ${{ matrix.go-version }}
+      # You can test your matrix by printing the current Go version
+      - name: Display Go version
+        run: go version
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
+      # - name: Dependencies
+      #   run: |
+      #     # This fixes libc6-dev installations errors on containers...
+      #     sudo rm -rfv /run/systemd/system
+
+      #     sudo apt-get update
+      #     sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ffmpeg nvidia-cuda-toolkit cmake
+          
+      #     sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates cmake curl patch
+      #     sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libopencv-dev 
+      #     [ ! -e /usr/include/opencv2 ] && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+      #     sudo DEBIAN_FRONTEND=noninteractive apt-get install -y pip wget
+      #     sudo pip install -r extra/requirements.txt
+      #     if [ ! -d /build ]; then
+      #       sudo mkdir /build && \
+      #       sudo chmod -R 777 /build && cd /build && \
+      #       curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
+      #       tar -xzvf - && \
+      #       mkdir -p "spdlog-1.11.0/build" && \
+      #       cd "spdlog-1.11.0/build" && \
+      #       cmake ..  && \
+      #       make -j8 && \
+      #       sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
+      #       cd /build && \
+      #       mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
+      #       curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
+      #       tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
+      #       sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
+      #       sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
+      #       sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
+      #     fi
+      - name: Build
+        run: |
+          make prepare-e2e test-e2e
+      - name: Release space from worker ♻
+        if: always()
+        run: |
+          sudo rm -rf build || true
+          sudo rm -rf bin || true
+          sudo rm -rf dist || true
+          make clean || true
+          make teardown-e2e || true
+          docker system prune -f -a --volumes || true
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 764e6a373897..cc7417c201c4 100644
--- a/Makefile
+++ b/Makefile
@@ -61,6 +61,9 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
+# Default Docker bridge IP
+E2E_BRIDGE_IP?=172.17.0.1
+
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
@@ -313,6 +316,19 @@ test: prepare test-models/testmodel grpcs
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion
 
+prepare-e2e:
+	wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin
+	docker build --build-arg BUILD_TYPE=cublas --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+	docker run -p 5390:8080 -d --rm -v $(abspath ./tests/e2e-fixtures):/models --gpus all --name localai-tests
+
+test-e2e:
+	@echo 'Running e2e tests'
+	LOCALAI_API=http://$(E2E_BRIDGE_IP):5390 $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
+
+teardown-e2e:
+	rm -rf ./tests/e2e-fixtures/ggllm-test-model.bin
+	docker stop $$(docker ps -q --filter ancestor=localai-tests)
+
 test-gpt4all: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
diff --git a/tests/e2e-fixtures/gpu.yaml b/tests/e2e-fixtures/gpu.yaml
new file mode 100644
index 000000000000..d37c7e671ac8
--- /dev/null
+++ b/tests/e2e-fixtures/gpu.yaml
@@ -0,0 +1,19 @@
+context_size: 2048
+mirostat: 2
+mirostat_tau: 5.0
+mirostat_eta: 0.1
+f16: true
+threads: 1
+gpu_layers: 90
+name: gpt-4
+mmap: true
+parameters:
+  model: ggllm-test-model.bin
+  #model: llama2-22b-daydreamer-v3.ggmlv3.q6_K.bin
+  #model: wizardlm-30b-uncensored.ggmlv3.q4_K_M.bin
+  #model: upstage-llama-2-70b-instruct-v2.ggmlv3.q2_K.bin
+  rope_freq_base: 10000 
+  rope_freq_scale: 1
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go
new file mode 100644
index 000000000000..f6ab238dfdac
--- /dev/null
+++ b/tests/e2e/e2e_suite_test.go
@@ -0,0 +1,18 @@
+package e2e_test
+
+import (
+	"os"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var (
+	localAIURL = os.Getenv("LOCALAI_API")
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI E2E test suite")
+}
diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
new file mode 100644
index 000000000000..9ff80147fe89
--- /dev/null
+++ b/tests/e2e/e2e_test.go
@@ -0,0 +1,78 @@
+package e2e_test
+
+import (
+	"context"
+	"errors"
+	"io"
+	"os/exec"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	openaigo "github.com/otiai10/openaigo"
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("E2E test", func() {
+	var client *openai.Client
+	var client2 *openaigo.Client
+
+	Context("API with ephemeral models", func() {
+		BeforeEach(func() {
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = localAIURL
+
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+
+		// Check that the GPU was used
+		AfterEach(func() {
+			// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
+			cmd := exec.Command("/bin/bash", "-xce", "docker logs $$(docker ps -q --filter ancestor=localai-tests)")
+			out, err := cmd.CombinedOutput()
+			Expect(err).ToNot(HaveOccurred())
+			Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"))
+			Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"))
+		})
+
+		Context("Generates text", func() {
+			It("streams chat tokens", func() {
+				models, err := client.ListModels(context.TODO())
+				Expect(err).ToNot(HaveOccurred())
+				Expect(models.Models).ToNot(BeEmpty())
+
+				model := models.Models[0].ID
+				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{
+					Model:    model,
+					Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
+				Expect(err).ToNot(HaveOccurred())
+				defer stream.Close()
+
+				tokens := 0
+				text := ""
+				for {
+					response, err := stream.Recv()
+					if errors.Is(err, io.EOF) {
+						break
+					}
+
+					Expect(err).ToNot(HaveOccurred())
+					text += response.Choices[0].Delta.Content
+					tokens++
+				}
+				Expect(text).ToNot(BeEmpty())
+				Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
+
+				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
+			})
+		})
+	})
+})