Skip to content

Commit

Permalink
feat(amdgpu): try to build in single binary
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Jun 4, 2024
1 parent 34ab442 commit 6a7c92a
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 2 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,31 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env:
CUDA_VERSION: 12-3
- name: "Install Hipblas"
env:
ROCM_VERSION: "5.3"
AMDGPU_VERSION: "5.3"
run: |
set -ex
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \
rocblas-dev
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
Expand All @@ -61,6 +86,7 @@ jobs:
go install google.golang.org/protobuf/cmd/[email protected]
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH
GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
with:
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
else
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
endif
$(MAKE) build
mkdir -p release
Expand Down Expand Up @@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda

backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas

backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge
Expand Down
20 changes: 18 additions & 2 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
LLamaCPPAVX = "llama-cpp-avx"
LLamaCPPFallback = "llama-cpp-fallback"
LLamaCPPCUDA = "llama-cpp-cuda"
LLamaCPPHipblas = "llama-cpp-hipblas"
LLamaCPPGRPC = "llama-cpp-grpc"

Gpt4AllLlamaBackend = "gpt4all-llama"
Expand Down Expand Up @@ -93,7 +94,7 @@ ENTRY:
if autoDetect {
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
// when starting the service
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
if _, ok := backends[LLamaCPP]; !ok {
for _, e := range entry {
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
Expand All @@ -116,6 +117,10 @@ ENTRY:
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
foundLCPPCuda = true
}
if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
foundLCPPHipblas = true
}
}
}
}
Expand Down Expand Up @@ -169,6 +174,7 @@ ENTRY:
// selectGRPCProcess selects the GRPC process to start based on system capabilities
func selectGRPCProcess(backend, assetDir string) string {
foundCUDA := false
foundAMDGPU := false
var grpcProcess string

// Select backend now just for llama.cpp
Expand All @@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string {
log.Info().Msgf("GPU device found but no CUDA backend present")
}
}
if strings.Contains(gpu.String(), "amd") {
p := backendPath(assetDir, LLamaCPPHipblas)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
grpcProcess = p
foundAMDGPU = true
} else {
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
}
}
}
}

if foundCUDA {
if foundCUDA || foundAMDGPU {
return grpcProcess
}

Expand Down

0 comments on commit 6a7c92a

Please sign in to comment.