Skip to content

Commit

Permalink
Merge pull request #246 from tinglou/main
Browse files Browse the repository at this point in the history
new feature `metal` to turn on/off metal framework on macos
  • Loading branch information
MarcusDunn authored Apr 13, 2024
2 parents 12f895e + 6ae0e01 commit b170442
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
4 changes: 4 additions & 0 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ tracing = { workspace = true }

[features]
cublas = ["llama-cpp-sys-2/cublas"]
metal = ["llama-cpp-sys-2/metal"]
sampler = []

[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }

[lints]
workspace = true

Expand Down
1 change: 1 addition & 0 deletions llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ cc = { workspace = true, features = ["parallel"] }

[features]
cublas = []
metal = []

35 changes: 29 additions & 6 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ fn main() {
ggml.cpp(false);
llama_cpp.cpp(true);

// CMakeFiles.txt: set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
// get LLAMA_SCHED_MAX_COPIES from env, default to 4
let mut max_copies = "4".to_owned();
if let Ok(env_max_copies) = env::var("LLAMA_SCHED_MAX_COPIES") {
if let Ok(v) = env_max_copies.parse::<u32>() {
if v > 0 {
max_copies = env_max_copies;
}
}
}
ggml.define("GGML_SCHED_MAX_COPIES", Some(max_copies.as_str()));

// https://github.com/ggerganov/llama.cpp/blob/a836c8f534ab789b02da149fbdaf7735500bff74/Makefile#L364-L368
if let Some(ggml_cuda) = &mut ggml_cuda {
for lib in [
Expand Down Expand Up @@ -118,22 +130,30 @@ fn main() {
if cfg!(target_os = "macos") {
assert!(!cublas_enabled, "CUBLAS is not supported on macOS");

println!("cargo:rustc-link-lib=framework=Metal");
let metal_enabled = env::var("CARGO_FEATURE_METAL").is_ok();

println!("cargo:rustc-link-lib=framework=Foundation");
println!("cargo:rustc-link-lib=framework=MetalPerformanceShaders");
println!("cargo:rustc-link-lib=framework=MetalKit");
if metal_enabled {
println!("cargo:rustc-link-lib=framework=Metal");
println!("cargo:rustc-link-lib=framework=MetalPerformanceShaders");
println!("cargo:rustc-link-lib=framework=MetalKit");
}

llama_cpp.define("_DARWIN_C_SOURCE", None);

// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L340-L343
llama_cpp.define("GGML_USE_METAL", None);
if metal_enabled {
llama_cpp.define("GGML_USE_METAL", None);
}
llama_cpp.define("GGML_USE_ACCELERATE", None);
llama_cpp.define("ACCELERATE_NEW_LAPACK", None);
llama_cpp.define("ACCELERATE_LAPACK_ILP64", None);
println!("cargo:rustc-link-lib=framework=Accelerate");

metal_hack(&mut ggml);
ggml.include("./llama.cpp/ggml-metal.h");
if metal_enabled {
metal_hack(&mut ggml);
ggml.include("./llama.cpp/ggml-metal.h");
}
}

if cfg!(target_os = "dragonfly") {
Expand Down Expand Up @@ -167,6 +187,9 @@ fn main() {
if let Some(cuda) = ggml_cuda.as_mut() {
cuda.define("NDEBUG", None);
}

ggml.opt_level(3);
llama_cpp.opt_level(3);
}

if let Some(ggml_cuda) = ggml_cuda {
Expand Down
1 change: 1 addition & 0 deletions simple/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ encoding_rs = { workspace = true }

[features]
cublas = ["llama-cpp-2/cublas"]
metal = ["llama-cpp-2/metal"]

[lints]
workspace = true

0 comments on commit b170442

Please sign in to comment.