Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/concedo'
Browse files Browse the repository at this point in the history
  • Loading branch information
YellowRoseCx committed Mar 6, 2024
2 parents 893a1c8 + f44df0e commit ba3f5e3
Show file tree
Hide file tree
Showing 106 changed files with 592,859 additions and 4,436 deletions.
98 changes: 31 additions & 67 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,60 +3,42 @@ name: Server

on:
workflow_dispatch: # allows manual triggering
inputs:
slow_tests:
description: 'Run slow tests'
required: true
type: boolean
push:
branches:
- master
- test/server-add-ci-test # FIXME remove
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
pull_request:
types: [opened, synchronize, reopened]
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
schedule:
- cron: '0 0 * * *'

jobs:
server:
runs-on: ubuntu-latest

strategy:
matrix:
build: [noavx, avx2, avx, avx512, cublas, clblast, openblas, kompute, vulkan]
sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug, Release]
include:
- build: 'noavx'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
image: ubuntu:latest
- build: 'avx2'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
image: ubuntu:latest
- build: 'avx'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF'
image: ubuntu:latest
- build: 'avx512'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON'
image: ubuntu:latest
experimental: true
- build: 'cublas'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON'
image: nvidia/cuda:12.3.1-devel-ubuntu22.04
arch_not_available: true # require nvidia docker engine
- build: 'clblast'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON'
image: ubuntu:latest
arch_not_available: true
- build: 'openblas'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS'
image: ubuntu:latest
- build: 'kompute'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
image: ubuntu:latest
arch_not_available: true
- build: 'vulkan'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON'
image: ubuntu:latest
arch_not_available: true
- build_type: Release
sanitizer: ""
exclude:
- build_type: Release
sanitizer: ADDRESS
- build_type: Release
sanitizer: THREAD
- build_type: Release
sanitizer: UNDEFINED

container:
image: ${{ matrix.image }}
image: ubuntu:latest
ports:
- 8888
options: --cpus 4
Expand All @@ -72,56 +54,38 @@ jobs:
apt-get update
apt-get -y install \
build-essential \
pkg-config \
git \
cmake \
python3-pip \
wget \
psmisc
- name: Download CLBlast
id: get_clblast
if: ${{ matrix.build == 'clblast' }}
run: |
apt install -y libclblast-dev
- name: Download OpenBLAS
id: get_openblas
if: ${{ matrix.build == 'openblas' }}
run: |
apt-get -y install libopenblas-dev
- name: Install Vulkan SDK
id: get_vulkan
if: ${{ matrix.build == 'kompute' || matrix.build == 'vulkan' }}
run: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | tee /etc/apt/trusted.gpg.d/lunarg.asc
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
apt-get update
apt-get -y install vulkan-sdk
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.defines }}
cmake .. \
-DLLAMA_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Download models
id: download_models
- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
../../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf
PORT=8888 ./tests.sh
- name: Tests
id: server_integration_test
continue-on-error: ${{ matrix.experimental || matrix.arch_not_available }}
- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }}
run: |
cd examples/server/tests
PORT=8888 ./tests.sh
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
24 changes: 15 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,8 @@ if (LLAMA_CUBLAS)
enable_language(CUDA)

add_compile_definitions(GGML_USE_CUBLAS)
#add_compile_definitions(GGML_CUDA_CUBLAS) #remove to not use cublas
add_compile_definitions(SD_USE_CUBLAS)
add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
#add_compile_definitions(GGML_CUDA_FORCE_DMMV) #non dmmv broken for me

add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
Expand Down Expand Up @@ -156,7 +155,7 @@ if (LLAMA_HIPBLAS)

if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "HIP and hipBLAS found")
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS SD_USE_CUBLAS)
add_library(ggml-rocm OBJECT ${GGML_SOURCES_CUDA})
if (LLAMA_CUDA_FORCE_DMMV)
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
Expand Down Expand Up @@ -429,39 +428,46 @@ add_library(common2
common/common.h
common/grammar-parser.h
common/grammar-parser.cpp)
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_library(sdtype_adapter
otherarch/sdcpp/sdtype_adapter.cpp)
target_include_directories(sdtype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_library(gpttype_adapter
gpttype_adapter.cpp)
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

if (LLAMA_CUBLAS)
set(TARGET koboldcpp_cublas)
add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
endif()

if (LLAMA_HIPBLAS)
set(TARGET koboldcpp_hipblas)
add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
endif()

Loading

0 comments on commit ba3f5e3

Please sign in to comment.