diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml new file mode 100644 index 000000000000..c6f19a92a012 --- /dev/null +++ b/.github/workflows/test-gpu.yml @@ -0,0 +1,83 @@ +--- +name: 'GPU tests' + +on: + pull_request: + push: + branches: + - master + tags: + - '*' + +concurrency: + group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + ubuntu-latest: + runs-on: self-hosted + strategy: + matrix: + go-version: ['1.21.x'] + steps: + - name: Clone + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + # You can test your matrix by printing the current Go version + - name: Display Go version + run: go version + - name: Dependencies + run: | + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget + # - name: Dependencies + # run: | + # # This fixes libc6-dev installations errors on containers... + # sudo rm -rfv /run/systemd/system + + # sudo apt-get update + # sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ffmpeg nvidia-cuda-toolkit cmake + + # sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates cmake curl patch + # sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libopencv-dev + # [ ! -e /usr/include/opencv2 ] && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # sudo DEBIAN_FRONTEND=noninteractive apt-get install -y pip wget + # sudo pip install -r extra/requirements.txt + # if [ ! -d /build ]; then + # sudo mkdir /build && \ + # sudo chmod -R 777 /build && cd /build && \ + # curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \ + # tar -xzvf - && \ + # mkdir -p "spdlog-1.11.0/build" && \ + # cd "spdlog-1.11.0/build" && \ + # cmake .. && \ + # make -j8 && \ + # sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \ + # cd /build && \ + # mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ + # curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \ + # tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ + # sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ + # sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ + # sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ + # fi + - name: Build + run: | + if [ ! -e /run/systemd/system ]; then + sudo mkdir /run/systemd/system + fi + make prepare-e2e run-e2e-image test-e2e + - name: Release space from worker ♻ + if: always() + run: | + sudo rm -rf build || true + sudo rm -rf bin || true + sudo rm -rf dist || true + make clean || true + make teardown-e2e || true + docker system prune -f -a --volumes || true \ No newline at end of file diff --git a/Makefile b/Makefile index 764e6a373897..9cfa0d23ce8c 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,8 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/ GO_TAGS?= BUILD_ID?=git +RANDOM := $(shell bash -c 'echo $$RANDOM') + VERSION?=$(shell git describe --always --tags || echo "dev" ) # go tool nm ./local-ai | grep Commit LD_FLAGS?= @@ -61,6 +63,9 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) +# Default Docker bridge IP +E2E_BRIDGE_IP?=172.17.0.1 + ifndef UNAME_S UNAME_S := $(shell uname -s) endif @@ -313,6 +318,21 @@ test: prepare test-models/testmodel grpcs $(MAKE) test-tts $(MAKE) test-stablediffusion +prepare-e2e: + wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin + docker build --build-arg BUILD_TYPE=cublas --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests . + +run-e2e-image: + docker run -p 5390:8080 -d --rm -v $(abspath ./tests/e2e-fixtures):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests + +test-e2e: + @echo 'Running e2e tests' + LOCALAI_API=http://$(E2E_BRIDGE_IP):5390 $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e + +teardown-e2e: + rm -rf ./tests/e2e-fixtures/ggllm-test-model.bin + docker stop $$(docker ps -q --filter ancestor=localai-tests) + test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg diff --git a/tests/e2e-fixtures/gpu.yaml b/tests/e2e-fixtures/gpu.yaml new file mode 100644 index 000000000000..d37c7e671ac8 --- /dev/null +++ b/tests/e2e-fixtures/gpu.yaml @@ -0,0 +1,19 @@ +context_size: 2048 +mirostat: 2 +mirostat_tau: 5.0 +mirostat_eta: 0.1 +f16: true +threads: 1 +gpu_layers: 90 +name: gpt-4 +mmap: true +parameters: + model: ggllm-test-model.bin + #model: llama2-22b-daydreamer-v3.ggmlv3.q6_K.bin + #model: wizardlm-30b-uncensored.ggmlv3.q4_K_M.bin + #model: upstage-llama-2-70b-instruct-v2.ggmlv3.q2_K.bin + rope_freq_base: 10000 + rope_freq_scale: 1 + temperature: 0.2 + top_k: 40 + top_p: 0.95 diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go new file mode 100644 index 000000000000..f6ab238dfdac --- /dev/null +++ b/tests/e2e/e2e_suite_test.go @@ -0,0 +1,18 @@ +package e2e_test + +import ( + "os" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var ( + localAIURL = os.Getenv("LOCALAI_API") +) + +func TestLocalAI(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI E2E test suite") +} diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go new file mode 100644 index 000000000000..9ff80147fe89 --- /dev/null +++ b/tests/e2e/e2e_test.go @@ -0,0 +1,78 @@ +package e2e_test + +import ( + "context" + "errors" + "io" + "os/exec" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + openaigo "github.com/otiai10/openaigo" + "github.com/sashabaranov/go-openai" +) + +var _ = Describe("E2E test", func() { + var client *openai.Client + var client2 *openaigo.Client + + Context("API with ephemeral models", func() { + BeforeEach(func() { + defaultConfig := openai.DefaultConfig("") + defaultConfig.BaseURL = localAIURL + + client2 = openaigo.NewClient("") + client2.BaseURL = defaultConfig.BaseURL + + // Wait for API to be ready + client = openai.NewClientWithConfig(defaultConfig) + Eventually(func() error { + _, err := client.ListModels(context.TODO()) + return err + }, "2m").ShouldNot(HaveOccurred()) + }) + + // Check that the GPU was used + AfterEach(func() { + // Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output + cmd := exec.Command("/bin/bash", "-xce", "docker logs $$(docker ps -q --filter ancestor=localai-tests)") + out, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + Expect(string(out)).To(ContainSubstring("found 1 CUDA devices")) + Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration")) + }) + + Context("Generates text", func() { + It("streams chat tokens", func() { + models, err := client.ListModels(context.TODO()) + Expect(err).ToNot(HaveOccurred()) + Expect(models.Models).ToNot(BeEmpty()) + + model := models.Models[0].ID + stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{ + Model: model, + Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) + Expect(err).ToNot(HaveOccurred()) + defer stream.Close() + + tokens := 0 + text := "" + for { + response, err := stream.Recv() + if errors.Is(err, io.EOF) { + break + } + + Expect(err).ToNot(HaveOccurred()) + text += response.Choices[0].Delta.Content + tokens++ + } + Expect(text).ToNot(BeEmpty()) + Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five"))) + + Expect(tokens).ToNot(Or(Equal(1), Equal(0))) + }) + }) + }) +})