From 2da30b196d7a0225ff8692516372c04d132fd8b3 Mon Sep 17 00:00:00 2001 From: NamiQ AI Date: Sat, 17 Feb 2024 15:48:21 +0000 Subject: [PATCH] feat: Add openapi build to support Intel Scalable CPU (#631) * Support new feature: openapi * Change compiler to Intel llvm when compiling llama.cpp * Support Intel BLAS (Intel10_64lp) --- crates/llama-cpp-bindings/Cargo.toml | 1 + crates/llama-cpp-bindings/build.rs | 34 +++++++++++++++++++++++++++- crates/tabby/Cargo.toml | 1 + crates/tabby/src/main.rs | 11 +++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml index bf0fdd4c5818..e84115a928c4 100644 --- a/crates/llama-cpp-bindings/Cargo.toml +++ b/crates/llama-cpp-bindings/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [features] cuda = [] rocm = [] +oneapi = [] [build-dependencies] cxx-build = "1.0" diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs index 06629ac485bd..da4f39cb4102 100644 --- a/crates/llama-cpp-bindings/build.rs +++ b/crates/llama-cpp-bindings/build.rs @@ -4,7 +4,6 @@ use cmake::Config; fn main() { const LLAMA_CMAKE_PATH: &str = "llama.cpp/CMakeLists.txt"; - assert!( Path::new(LLAMA_CMAKE_PATH).exists(), "Please init submodules with `git submodule update --init --recursive` and try again" @@ -83,6 +82,39 @@ fn build_llama_cpp() { println!("cargo:rustc-link-lib=hipblas"); } + if cfg!(feature = "oneapi") { + let mkl_root = env::var("MKLROOT") + .expect("MKLROOT needs to be defined to compile for oneAPI (use setvars.sh to set)"); + let compiler_root = env::var("CMPLR_ROOT") + .expect("CMPLR_ROOT needs to be defined to compile for oneAPI (use setvars.sh to set)"); + + config.define("LLAMA_NATIVE", "ON"); + config.define("LLAMA_BLAS", "ON"); + config.define("LLAMA_BLAS_VENDOR", "Intel10_64lp"); + config.define("CMAKE_CXX_COMPILER_ID", "IntelLLVM"); + config.define("CMAKE_CXX_COMPILER_VERSION", "Intel LLVM 2024.0.2 (2024.0.2.20231213)"); + config.define("CMAKE_C_COMPILER", format!("{}/bin/icx", compiler_root)); + config.define("CMAKE_CXX_COMPILER", format!("{}/bin/icpx", compiler_root)); + println!("cargo:rustc-link-search=native={}/lib", compiler_root); + println!("cargo:rustc-link-search=native={}/lib", mkl_root); + println!("cargo:rustc-link-lib=svml"); + println!("cargo:rustc-link-lib=mkl_sycl_blas"); + println!("cargo:rustc-link-lib=mkl_sycl_lapack"); + println!("cargo:rustc-link-lib=mkl_sycl_dft"); + println!("cargo:rustc-link-lib=mkl_sycl_sparse"); + println!("cargo:rustc-link-lib=mkl_sycl_vm"); + println!("cargo:rustc-link-lib=mkl_sycl_rng"); + println!("cargo:rustc-link-lib=mkl_sycl_stats"); + println!("cargo:rustc-link-lib=mkl_sycl_data_fitting"); + println!("cargo:rustc-link-lib=mkl_intel_ilp64"); + println!("cargo:rustc-link-lib=mkl_tbb_thread"); + println!("cargo:rustc-link-lib=mkl_core"); + println!("cargo:rustc-link-lib=intlc"); + println!("cargo:rustc-link-lib=pthread"); + println!("cargo:rustc-link-lib=m"); + println!("cargo:rustc-link-lib=dl"); + } + // By default, this value is automatically inferred from Rust’s compilation profile. // For Windows platform, we always build llama.cpp in release mode. // See https://github.com/TabbyML/tabby/pull/948 for more details. diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index 94e3085a6016..00b269aca7c4 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -8,6 +8,7 @@ default = ["ee", "experimental-http"] ee = ["dep:tabby-webserver"] cuda = ["llama-cpp-bindings/cuda"] rocm = ["llama-cpp-bindings/rocm"] +oneapi = ["llama-cpp-bindings/oneapi"] experimental-http = ["dep:http-api-bindings"] # If compiling on a system without OpenSSL installed, or cross-compiling for a different # architecture, enable this feature to compile OpenSSL as part of the build. diff --git a/crates/tabby/src/main.rs b/crates/tabby/src/main.rs index f35054b3a22c..b71994a7bf6a 100644 --- a/crates/tabby/src/main.rs +++ b/crates/tabby/src/main.rs @@ -82,6 +82,10 @@ pub enum Device { #[strum(serialize = "rocm")] Rocm, + #[cfg(feature = "oneapi")] + #[strum(serialize = "oneapi")] + OneApi, + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] #[strum(serialize = "metal")] Metal, @@ -108,10 +112,17 @@ impl Device { *self == Device::Rocm } + #[cfg(feature = "oneapi")] + pub fn ggml_use_gpu(&self) -> bool { + *self == Device::OneApi + } + + #[cfg(not(any( all(target_os = "macos", target_arch = "aarch64"), feature = "cuda", feature = "rocm", + feature = "oneapi", )))] pub fn ggml_use_gpu(&self) -> bool { false