-
-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: llama.cpp gRPC C++ backend (#1170)
* wip: llama.cpp c++ gRPC server Signed-off-by: Ettore Di Giacinto <[email protected]> * make it work, attach it to the build process Signed-off-by: Ettore Di Giacinto <[email protected]> * update deps Signed-off-by: Ettore Di Giacinto <[email protected]> * fix: add protobuf dep Signed-off-by: Ettore Di Giacinto <[email protected]> * try fix protobuf on cmake * cmake: workarounds Signed-off-by: Ettore Di Giacinto <[email protected]> * add packages * cmake: use fixed version of grpc Signed-off-by: Ettore Di Giacinto <[email protected]> * cmake(grpc): install locally * install grpc Signed-off-by: Ettore Di Giacinto <[email protected]> * install required deps for grpc on debian bullseye Signed-off-by: Ettore Di Giacinto <[email protected]> * debug * debug * Fixups * no need to install cmake manually Signed-off-by: Ettore Di Giacinto <[email protected]> * ci: fixup macOS * use brew whenever possible Signed-off-by: Ettore Di Giacinto <[email protected]> * macOS fixups * debug * fix container build Signed-off-by: Ettore Di Giacinto <[email protected]> * workaround * try mac https://stackoverflow.com/questions/23905661/on-mac-g-clang-fails-to-search-usr-local-include-and-usr-local-lib-by-def * Disable temp. arm64 docker image builds --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
- Loading branch information
Showing
10 changed files
with
1,145 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
set(CMAKE_CXX_STANDARD 17) | ||
cmake_minimum_required(VERSION 3.15) | ||
set(TARGET grpc-server) | ||
set(_PROTOBUF_LIBPROTOBUF libprotobuf) | ||
set(_REFLECTION grpc++_reflection) | ||
|
||
find_package(absl CONFIG REQUIRED) | ||
find_package(Protobuf CONFIG REQUIRED) | ||
find_package(gRPC CONFIG REQUIRED) | ||
|
||
find_program(_PROTOBUF_PROTOC protoc) | ||
set(_GRPC_GRPCPP grpc++) | ||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) | ||
|
||
include_directories(${CMAKE_CURRENT_BINARY_DIR}) | ||
include_directories(${Protobuf_INCLUDE_DIRS}) | ||
|
||
message(STATUS "Using protobuf ${Protobuf_VERSION} ${Protobuf_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}") | ||
|
||
|
||
# Proto file | ||
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE) | ||
get_filename_component(hw_proto_path "${hw_proto}" PATH) | ||
|
||
# Generated sources | ||
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc") | ||
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h") | ||
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc") | ||
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h") | ||
|
||
add_custom_command( | ||
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}" | ||
COMMAND ${_PROTOBUF_PROTOC} | ||
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" | ||
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}" | ||
-I "${hw_proto_path}" | ||
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" | ||
"${hw_proto}" | ||
DEPENDS "${hw_proto}") | ||
|
||
# hw_grpc_proto | ||
add_library(hw_grpc_proto | ||
${hw_grpc_srcs} | ||
${hw_grpc_hdrs} | ||
${hw_proto_srcs} | ||
${hw_proto_hdrs}) | ||
|
||
add_executable(${TARGET} grpc-server.cpp) | ||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto | ||
absl::flags_parse | ||
gRPC::${_REFLECTION} | ||
gRPC::${_GRPC_GRPCPP} | ||
protobuf::${_PROTOBUF_LIBPROTOBUF}) | ||
target_compile_features(${TARGET} PRIVATE cxx_std_11) | ||
if(TARGET BUILD_INFO) | ||
add_dependencies(${TARGET} BUILD_INFO) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
LLAMA_VERSION?=24ba3d829e31a6eda3fa1723f692608c2fa3adda | ||
|
||
CMAKE_ARGS?= | ||
BUILD_TYPE?= | ||
|
||
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically | ||
ifeq ($(BUILD_TYPE),cublas) | ||
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON | ||
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS | ||
# to CMAKE_ARGS automatically | ||
else ifeq ($(BUILD_TYPE),openblas) | ||
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS | ||
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path | ||
else ifeq ($(BUILD_TYPE),clblast) | ||
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path | ||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ | ||
else ifeq ($(BUILD_TYPE),hipblas) | ||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON | ||
endif | ||
|
||
llama.cpp: | ||
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp | ||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1 | ||
|
||
llama.cpp/examples/grpc-server: | ||
mkdir -p llama.cpp/examples/grpc-server | ||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ | ||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ | ||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt | ||
|
||
rebuild: | ||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ | ||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ | ||
rm -rf grpc-server | ||
$(MAKE) grpc-server | ||
|
||
clean: | ||
rm -rf llama.cpp | ||
rm -rf grpc-server | ||
|
||
grpc-server: llama.cpp llama.cpp/examples/grpc-server | ||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release | ||
cp llama.cpp/build/bin/grpc-server . |
Oops, something went wrong.