Skip to content

Commit

Permalink
feat: Add openapi build to support Intel Scalable CPU (TabbyML#631)
Browse files Browse the repository at this point in the history
    * Support new feature: openapi
    * Change compiler to Intel llvm when compiling llama.cpp
    * Support Intel BLAS (Intel10_64lp)
  • Loading branch information
hungle-i3 committed Feb 17, 2024
1 parent 76f733b commit 2da30b1
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 1 deletion.
1 change: 1 addition & 0 deletions crates/llama-cpp-bindings/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
[features]
cuda = []
rocm = []
oneapi = []

[build-dependencies]
cxx-build = "1.0"
Expand Down
34 changes: 33 additions & 1 deletion crates/llama-cpp-bindings/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use cmake::Config;

fn main() {
const LLAMA_CMAKE_PATH: &str = "llama.cpp/CMakeLists.txt";

assert!(
Path::new(LLAMA_CMAKE_PATH).exists(),
"Please init submodules with `git submodule update --init --recursive` and try again"
Expand Down Expand Up @@ -83,6 +82,39 @@ fn build_llama_cpp() {
println!("cargo:rustc-link-lib=hipblas");
}

if cfg!(feature = "oneapi") {
let mkl_root = env::var("MKLROOT")
.expect("MKLROOT needs to be defined to compile for oneAPI (use setvars.sh to set)");
let compiler_root = env::var("CMPLR_ROOT")
.expect("CMPLR_ROOT needs to be defined to compile for oneAPI (use setvars.sh to set)");

config.define("LLAMA_NATIVE", "ON");
config.define("LLAMA_BLAS", "ON");
config.define("LLAMA_BLAS_VENDOR", "Intel10_64lp");
config.define("CMAKE_CXX_COMPILER_ID", "IntelLLVM");
config.define("CMAKE_CXX_COMPILER_VERSION", "Intel LLVM 2024.0.2 (2024.0.2.20231213)");
config.define("CMAKE_C_COMPILER", format!("{}/bin/icx", compiler_root));
config.define("CMAKE_CXX_COMPILER", format!("{}/bin/icpx", compiler_root));
println!("cargo:rustc-link-search=native={}/lib", compiler_root);
println!("cargo:rustc-link-search=native={}/lib", mkl_root);
println!("cargo:rustc-link-lib=svml");
println!("cargo:rustc-link-lib=mkl_sycl_blas");
println!("cargo:rustc-link-lib=mkl_sycl_lapack");
println!("cargo:rustc-link-lib=mkl_sycl_dft");
println!("cargo:rustc-link-lib=mkl_sycl_sparse");
println!("cargo:rustc-link-lib=mkl_sycl_vm");
println!("cargo:rustc-link-lib=mkl_sycl_rng");
println!("cargo:rustc-link-lib=mkl_sycl_stats");
println!("cargo:rustc-link-lib=mkl_sycl_data_fitting");
println!("cargo:rustc-link-lib=mkl_intel_ilp64");
println!("cargo:rustc-link-lib=mkl_tbb_thread");
println!("cargo:rustc-link-lib=mkl_core");
println!("cargo:rustc-link-lib=intlc");
println!("cargo:rustc-link-lib=pthread");
println!("cargo:rustc-link-lib=m");
println!("cargo:rustc-link-lib=dl");
}

// By default, this value is automatically inferred from Rust’s compilation profile.
// For Windows platform, we always build llama.cpp in release mode.
// See https://github.com/TabbyML/tabby/pull/948 for more details.
Expand Down
1 change: 1 addition & 0 deletions crates/tabby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ default = ["ee", "experimental-http"]
ee = ["dep:tabby-webserver"]
cuda = ["llama-cpp-bindings/cuda"]
rocm = ["llama-cpp-bindings/rocm"]
oneapi = ["llama-cpp-bindings/oneapi"]
experimental-http = ["dep:http-api-bindings"]
# If compiling on a system without OpenSSL installed, or cross-compiling for a different
# architecture, enable this feature to compile OpenSSL as part of the build.
Expand Down
11 changes: 11 additions & 0 deletions crates/tabby/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ pub enum Device {
#[strum(serialize = "rocm")]
Rocm,

#[cfg(feature = "oneapi")]
#[strum(serialize = "oneapi")]
OneApi,

#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
#[strum(serialize = "metal")]
Metal,
Expand All @@ -108,10 +112,17 @@ impl Device {
*self == Device::Rocm
}

#[cfg(feature = "oneapi")]
pub fn ggml_use_gpu(&self) -> bool {
*self == Device::OneApi
}


#[cfg(not(any(
all(target_os = "macos", target_arch = "aarch64"),
feature = "cuda",
feature = "rocm",
feature = "oneapi",
)))]
pub fn ggml_use_gpu(&self) -> bool {
false
Expand Down

0 comments on commit 2da30b1

Please sign in to comment.