Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(amdgpu): try to build in single binary #2485

Merged
merged 2 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,46 @@ jobs:
build-linux:
runs-on: ubuntu-latest
steps:
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get remove -y microsoft-edge-stable || true
sudo apt-get remove -y firefox || true
sudo apt-get remove -y powershell || true
sudo apt-get remove -y r-base-core || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Clone
uses: actions/checkout@v4
with:
Expand All @@ -38,6 +78,31 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env:
CUDA_VERSION: 12-3
- name: "Install Hipblas"
env:
ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1"
run: |
set -ex

sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg

curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -

printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list

printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update

sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \
rocblas-dev

sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
Expand All @@ -61,6 +126,7 @@ jobs:
go install google.golang.org/protobuf/cmd/[email protected]
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH
GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
with:
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
else
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
endif
$(MAKE) build
mkdir -p release
Expand Down Expand Up @@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda

backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas

backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge
Expand Down
20 changes: 18 additions & 2 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
LLamaCPPAVX = "llama-cpp-avx"
LLamaCPPFallback = "llama-cpp-fallback"
LLamaCPPCUDA = "llama-cpp-cuda"
LLamaCPPHipblas = "llama-cpp-hipblas"
LLamaCPPGRPC = "llama-cpp-grpc"

Gpt4AllLlamaBackend = "gpt4all-llama"
Expand Down Expand Up @@ -93,7 +94,7 @@ ENTRY:
if autoDetect {
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
// when starting the service
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
if _, ok := backends[LLamaCPP]; !ok {
for _, e := range entry {
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
Expand All @@ -116,6 +117,10 @@ ENTRY:
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
foundLCPPCuda = true
}
if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
foundLCPPHipblas = true
}
}
}
}
Expand Down Expand Up @@ -169,6 +174,7 @@ ENTRY:
// selectGRPCProcess selects the GRPC process to start based on system capabilities
func selectGRPCProcess(backend, assetDir string) string {
foundCUDA := false
foundAMDGPU := false
var grpcProcess string

// Select backend now just for llama.cpp
Expand All @@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string {
log.Info().Msgf("GPU device found but no CUDA backend present")
}
}
if strings.Contains(gpu.String(), "amd") {
p := backendPath(assetDir, LLamaCPPHipblas)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
grpcProcess = p
foundAMDGPU = true
} else {
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
}
}
}
}

if foundCUDA {
if foundCUDA || foundAMDGPU {
return grpcProcess
}

Expand Down