Skip to content

Commit

Permalink
feat: llama.cpp gRPC C++ backend (#1170)
Browse files Browse the repository at this point in the history
* wip: llama.cpp c++ gRPC server

Signed-off-by: Ettore Di Giacinto <[email protected]>

* make it work, attach it to the build process

Signed-off-by: Ettore Di Giacinto <[email protected]>

* update deps

Signed-off-by: Ettore Di Giacinto <[email protected]>

* fix: add protobuf dep

Signed-off-by: Ettore Di Giacinto <[email protected]>

* try fix protobuf on cmake

* cmake: workarounds

Signed-off-by: Ettore Di Giacinto <[email protected]>

* add packages

* cmake: use fixed version of grpc

Signed-off-by: Ettore Di Giacinto <[email protected]>

* cmake(grpc): install locally

* install grpc

Signed-off-by: Ettore Di Giacinto <[email protected]>

* install required deps for grpc on debian bullseye

Signed-off-by: Ettore Di Giacinto <[email protected]>

* debug

* debug

* Fixups

* no need to install cmake manually

Signed-off-by: Ettore Di Giacinto <[email protected]>

* ci: fixup macOS

* use brew whenever possible

Signed-off-by: Ettore Di Giacinto <[email protected]>

* macOS fixups

* debug

* fix container build

Signed-off-by: Ettore Di Giacinto <[email protected]>

* workaround

* try mac

https://stackoverflow.com/questions/23905661/on-mac-g-clang-fails-to-search-usr-local-include-and-usr-local-lib-by-def

* Disable temp. arm64 docker image builds

---------

Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored Oct 16, 2023
1 parent 8034ed3 commit 1286942
Show file tree
Hide file tree
Showing 10 changed files with 1,145 additions and 16 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/bump_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ jobs:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ jobs:
matrix:
include:
- build-type: ''
platforms: 'linux/amd64,linux/arm64'
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
Expand All @@ -38,7 +39,7 @@ jobs:
tag-suffix: '-cublas-cuda12'
ffmpeg: ''
- build-type: ''
platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make -j12 install
- name: Build
id: build
env:
Expand Down Expand Up @@ -66,12 +72,20 @@ jobs:
- uses: actions/setup-go@v4
with:
go-version: '>=1.21.0'
- name: Dependencies
run: |
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install && rm -rf grpc
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
make dist
- uses: actions/upload-artifact@v3
with:
Expand Down
20 changes: 19 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,15 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo pip install -r extra/requirements.txt
# Pre-build stable diffusion before we install a newever version of abseil (not compatible with stablediffusion-ncn)
GO_TAGS="tts stablediffusion" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
tar -xzvf - && \
Expand All @@ -87,6 +91,12 @@ jobs:
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make -j12 install
- name: Test
run: |
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
Expand All @@ -108,6 +118,14 @@ jobs:
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install && rm -rf grpc
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
17 changes: 16 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/i
ARG GO_TAGS="stablediffusion tts"

RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch pip
apt-get install -y ca-certificates curl patch pip cmake


# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
Expand Down Expand Up @@ -104,6 +105,15 @@ RUN make prepare
COPY . .
COPY .git .

# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

RUN git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install && rm -rf grpc

# Rebuild with defaults backends
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build

###################################
Expand Down Expand Up @@ -132,8 +142,13 @@ WORKDIR /build
# https://github.com/go-skynet/LocalAI/pull/434
COPY . .
RUN make prepare-sources

# Copy the binary
COPY --from=builder /build/local-ai ./

# do not let piper rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/piper ./backend-assets/grpc/piper

# Copy VALLE-X as it's not a real "lib"
RUN cp -rfv /usr/lib/vall-e-x/* ./

Expand Down
26 changes: 21 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ GOLLAMA_VERSION?=1676dcd7a139b6cdfbaea5fd67f46dc25d9d8bcf

GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

CPPLLAMA_VERSION?=24ba3d829e31a6eda3fa1723f692608c2fa3adda

# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
Expand Down Expand Up @@ -120,7 +122,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_GRPC+=backend-assets/grpc/piper
endif

GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)

.PHONY: all test build vendor

Expand Down Expand Up @@ -223,7 +225,7 @@ go-llama/libbinding.a: go-llama
go-llama-stable/libbinding.a: go-llama-stable
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a

go-piper/libpiper_binding.a:
go-piper/libpiper_binding.a: go-piper
$(MAKE) -C go-piper libpiper_binding.a example/main

get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
Expand Down Expand Up @@ -280,6 +282,7 @@ clean: ## Remove build related file
rm -rf ./go-ggllm
rm -rf $(BINARY_NAME)
rm -rf release/
$(MAKE) -C backend/cpp/llama clean

## Build:

Expand Down Expand Up @@ -395,6 +398,16 @@ ifeq ($(BUILD_TYPE),metal)
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
endif

backend/cpp/llama/grpc-server:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server

backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
endif

backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
Expand Down Expand Up @@ -451,9 +464,12 @@ backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/

backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
backend-assets/grpc/stablediffusion: backend-assets/grpc
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
fi

backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
Expand Down
57 changes: 57 additions & 0 deletions backend/cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
set(CMAKE_CXX_STANDARD 17)
cmake_minimum_required(VERSION 3.15)
set(TARGET grpc-server)
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)

find_package(absl CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)

find_program(_PROTOBUF_PROTOC protoc)
set(_GRPC_GRPCPP grpc++)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)

include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${Protobuf_INCLUDE_DIRS})

message(STATUS "Using protobuf ${Protobuf_VERSION} ${Protobuf_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}")


# Proto file
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
get_filename_component(hw_proto_path "${hw_proto}" PATH)

# Generated sources
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")

add_custom_command(
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${hw_proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
"${hw_proto}"
DEPENDS "${hw_proto}")

# hw_grpc_proto
add_library(hw_grpc_proto
${hw_grpc_srcs}
${hw_grpc_hdrs}
${hw_proto_srcs}
${hw_proto_hdrs})

add_executable(${TARGET} grpc-server.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO)
endif()
44 changes: 44 additions & 0 deletions backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

LLAMA_VERSION?=24ba3d829e31a6eda3fa1723f692608c2fa3adda

CMAKE_ARGS?=
BUILD_TYPE?=

# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblast)
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
endif

llama.cpp:
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1

llama.cpp/examples/grpc-server:
mkdir -p llama.cpp/examples/grpc-server
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt

rebuild:
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
rm -rf grpc-server
$(MAKE) grpc-server

clean:
rm -rf llama.cpp
rm -rf grpc-server

grpc-server: llama.cpp llama.cpp/examples/grpc-server
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
cp llama.cpp/build/bin/grpc-server .
Loading

0 comments on commit 1286942

Please sign in to comment.