From 6a7c92af224aa1c88742228745ca2e579ae52569 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 4 Jun 2024 13:02:22 +0200 Subject: [PATCH] feat(amdgpu): try to build in single binary Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 26 ++++++++++++++++++++++++++ Makefile | 8 ++++++++ pkg/model/initializers.go | 20 ++++++++++++++++++-- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 618c81a39af7..0d7da468c32d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -38,6 +38,31 @@ jobs: sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} env: CUDA_VERSION: 12-3 + - name: "Install Hipblas" + env: + ROCM_VERSION: "5.3" + AMDGPU_VERSION: "5.3" + run: | + set -ex + + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg + + curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - + + printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list + + printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list + + sudo apt-get update + + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ + hipblas-dev rocm-dev \ + rocblas-dev + + sudo apt-get clean + sudo rm -rf /var/lib/apt/lists/* + sudo ldconfig - name: Cache grpc id: cache-grpc uses: actions/cache@v4 @@ -61,6 +86,7 @@ jobs: go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 export PATH=$PATH:$GOPATH/bin export PATH=/usr/local/cuda/bin:$PATH + export PATH=/opt/rocm/bin:$PATH GO_TAGS=p2p make dist - uses: actions/upload-artifact@v4 with: diff --git a/Makefile b/Makefile index f2c030866628..c0abfc2ae80c 100644 --- a/Makefile +++ b/Makefile @@ -327,6 +327,7 @@ ifeq ($(OS),Darwin) $(info ${GREEN}I Skip CUDA build on MacOS${RESET}) else $(MAKE) backend-assets/grpc/llama-cpp-cuda + $(MAKE) backend-assets/grpc/llama-cpp-hipblas endif $(MAKE) build mkdir -p release @@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda +backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-hipblas + $(MAKE) -C backend/cpp/llama-hipblas purge + $(info ${GREEN}I llama-cpp build info:hipblas${RESET}) + BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas + backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc cp -rf backend/cpp/llama backend/cpp/llama-grpc $(MAKE) -C backend/cpp/llama-grpc purge diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index d013740ce5d6..e9001f0a9684 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -37,6 +37,7 @@ const ( LLamaCPPAVX = "llama-cpp-avx" LLamaCPPFallback = "llama-cpp-fallback" LLamaCPPCUDA = "llama-cpp-cuda" + LLamaCPPHipblas = "llama-cpp-hipblas" LLamaCPPGRPC = "llama-cpp-grpc" Gpt4AllLlamaBackend = "gpt4all-llama" @@ -93,7 +94,7 @@ ENTRY: if autoDetect { // if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up // when starting the service - foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false + foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false if _, ok := backends[LLamaCPP]; !ok { for _, e := range entry { if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 { @@ -116,6 +117,10 @@ ENTRY: backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA) foundLCPPCuda = true } + if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas) + foundLCPPHipblas = true + } } } } @@ -169,6 +174,7 @@ ENTRY: // selectGRPCProcess selects the GRPC process to start based on system capabilities func selectGRPCProcess(backend, assetDir string) string { foundCUDA := false + foundAMDGPU := false var grpcProcess string // Select backend now just for llama.cpp @@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string { log.Info().Msgf("GPU device found but no CUDA backend present") } } + if strings.Contains(gpu.String(), "amd") { + p := backendPath(assetDir, LLamaCPPHipblas) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend) + grpcProcess = p + foundAMDGPU = true + } else { + log.Info().Msgf("GPU device found but no HIPBLAS backend present") + } + } } } - if foundCUDA { + if foundCUDA || foundAMDGPU { return grpcProcess }