feat: Add openapi build to support Intel Scalable CPU (TabbyML#631)

* Support new feature: openapi * Change compiler to Intel llvm when compiling llama.cpp * Support Intel BLAS (Intel10_64lp)
i3automation · Feb 17, 2024 · 2da30b1 · 2da30b1
1 parent 76f733b
commit 2da30b1
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 1 deletion.
diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [features]
 cuda = []
 rocm = []
+oneapi = []
 
 [build-dependencies]
 cxx-build = "1.0"

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
@@ -4,7 +4,6 @@ use cmake::Config;
 
 fn main() {
     const LLAMA_CMAKE_PATH: &str = "llama.cpp/CMakeLists.txt";
-
     assert!(
         Path::new(LLAMA_CMAKE_PATH).exists(),
         "Please init submodules with `git submodule update --init --recursive` and try again"
@@ -83,6 +82,39 @@ fn build_llama_cpp() {
         println!("cargo:rustc-link-lib=hipblas");
     }
 
+    if cfg!(feature = "oneapi") {
+        let mkl_root = env::var("MKLROOT")
+            .expect("MKLROOT needs to be defined to compile for oneAPI (use setvars.sh to set)");
+        let compiler_root = env::var("CMPLR_ROOT")
+            .expect("CMPLR_ROOT needs to be defined to compile for oneAPI (use setvars.sh to set)");
+
+        config.define("LLAMA_NATIVE", "ON");
+        config.define("LLAMA_BLAS", "ON");
+        config.define("LLAMA_BLAS_VENDOR", "Intel10_64lp");
+        config.define("CMAKE_CXX_COMPILER_ID", "IntelLLVM");
+        config.define("CMAKE_CXX_COMPILER_VERSION", "Intel LLVM 2024.0.2 (2024.0.2.20231213)");
+        config.define("CMAKE_C_COMPILER", format!("{}/bin/icx", compiler_root));
+        config.define("CMAKE_CXX_COMPILER", format!("{}/bin/icpx", compiler_root));
+        println!("cargo:rustc-link-search=native={}/lib", compiler_root);
+        println!("cargo:rustc-link-search=native={}/lib", mkl_root);
+        println!("cargo:rustc-link-lib=svml");
+        println!("cargo:rustc-link-lib=mkl_sycl_blas");
+        println!("cargo:rustc-link-lib=mkl_sycl_lapack");
+        println!("cargo:rustc-link-lib=mkl_sycl_dft");
+        println!("cargo:rustc-link-lib=mkl_sycl_sparse");
+        println!("cargo:rustc-link-lib=mkl_sycl_vm");
+        println!("cargo:rustc-link-lib=mkl_sycl_rng");
+        println!("cargo:rustc-link-lib=mkl_sycl_stats");
+        println!("cargo:rustc-link-lib=mkl_sycl_data_fitting");
+        println!("cargo:rustc-link-lib=mkl_intel_ilp64");
+        println!("cargo:rustc-link-lib=mkl_tbb_thread");
+        println!("cargo:rustc-link-lib=mkl_core");
+        println!("cargo:rustc-link-lib=intlc");
+        println!("cargo:rustc-link-lib=pthread");
+        println!("cargo:rustc-link-lib=m");
+        println!("cargo:rustc-link-lib=dl");
+    }
+
     // By default, this value is automatically inferred from Rust’s compilation profile.
     // For Windows platform, we always build llama.cpp in release mode.
     // See https://github.com/TabbyML/tabby/pull/948 for more details.

diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
@@ -8,6 +8,7 @@ default = ["ee", "experimental-http"]
 ee = ["dep:tabby-webserver"]
 cuda = ["llama-cpp-bindings/cuda"]
 rocm = ["llama-cpp-bindings/rocm"]
+oneapi = ["llama-cpp-bindings/oneapi"]
 experimental-http = ["dep:http-api-bindings"]
 # If compiling on a system without OpenSSL installed, or cross-compiling for a different
 # architecture, enable this feature to compile OpenSSL as part of the build.

diff --git a/crates/tabby/src/main.rs b/crates/tabby/src/main.rs
@@ -82,6 +82,10 @@ pub enum Device {
     #[strum(serialize = "rocm")]
     Rocm,
 
+    #[cfg(feature = "oneapi")]
+    #[strum(serialize = "oneapi")]
+    OneApi,
+
     #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
     #[strum(serialize = "metal")]
     Metal,
@@ -108,10 +112,17 @@ impl Device {
         *self == Device::Rocm
     }
 
+    #[cfg(feature = "oneapi")]
+    pub fn ggml_use_gpu(&self) -> bool {
+        *self == Device::OneApi
+    }
+
+
     #[cfg(not(any(
         all(target_os = "macos", target_arch = "aarch64"),
         feature = "cuda",
         feature = "rocm",
+        feature = "oneapi",
     )))]
     pub fn ggml_use_gpu(&self) -> bool {
         false