Skip to content

Commit

Permalink
Merge branch 'main' into feat/dynamic_link
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcusDunn authored Jul 9, 2024
2 parents d441204 + 38686de commit 70f26c2
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 44 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/llama-cpp-rs-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ jobs:
- name: checkout
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
- name: Setup QEMU
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
uses: docker/setup-qemu-action@5927c834f5b4fdf503fca6f4c7eccda82949e1ee
with:
platforms: arm64,amd64
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb
uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4
- name: Build
uses: docker/build-push-action@v6
with:
Expand Down
28 changes: 14 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ hf-hub = { version = "0.3.2" }
criterion = "0.5.1"
pprof = "0.13.0"
bindgen = "0.69.4"
cc = "1.0.100"
cc = "1.0.105"
anyhow = "1.0.86"
clap = "4.5.4"
clap = "4.5.8"
encoding_rs = "0.8.34"

[workspace.lints.rust]
Expand Down
12 changes: 8 additions & 4 deletions embeddings/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
[package]
name = "embeddings"
version = "0.1.60"
version = "0.1.61"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
hf-hub = { workspace = true }
clap = { workspace = true , features = ["derive"] }
anyhow = { workspace = true }

[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
workspace = true
8 changes: 4 additions & 4 deletions embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct Args {
#[clap(short)]
normalise: bool,
/// Disable offloading layers to the gpu
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
#[clap(long)]
disable_gpu: bool,
}
Expand Down Expand Up @@ -78,7 +78,7 @@ fn main() -> Result<()> {
model,
prompt,
normalise,
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
disable_gpu,
} = Args::parse();

Expand All @@ -87,13 +87,13 @@ fn main() -> Result<()> {

// offload all layers to the gpu
let model_params = {
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
if !disable_gpu {
LlamaModelParams::default().with_n_gpu_layers(1000)
} else {
LlamaModelParams::default()
}
#[cfg(not(feature = "cuda"))]
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
LlamaModelParams::default()
};

Expand Down
21 changes: 19 additions & 2 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "llama-cpp-2"
description = "llama.cpp bindings for Rust"
version = "0.1.60"
version = "0.1.61"
edition = "2021"
license = "MIT OR Apache-2.0"
repository = "https://github.com/utilityai/llama-cpp-rs"
Expand All @@ -10,16 +10,33 @@ repository = "https://github.com/utilityai/llama-cpp-rs"

[dependencies]
enumflags2 = "0.7.10"
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.60" }
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.61" }
thiserror = { workspace = true }
tracing = { workspace = true }

[features]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
dynamic_link = ["llama-cpp-sys-2/dynamic_link"]
vulkan = ["llama-cpp-sys-2/vulkan"]
native = ["llama-cpp-sys-2/native"]
sampler = []

[target.'cfg(target_feature = "avx")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx"] }
[target.'cfg(target_feature = "avx2")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx2"] }
[target.'cfg(target_feature = "avx512f")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512"] }
[target.'cfg(target_feature = "avx512vbmi")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vmbi"] }
[target.'cfg(target_feature = "avx512vnni")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vnni"] }
[target.'cfg(target_feature = "f16c")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["f16c"] }
[target.'cfg(target_feature = "fma")'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["fma"] }

[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }

Expand Down
11 changes: 10 additions & 1 deletion llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "llama-cpp-sys-2"
description = "Low Level Bindings to llama.cpp"
version = "0.1.60"
version = "0.1.61"
edition = "2021"
license = "MIT OR Apache-2.0"
repository = "https://github.com/utilityai/llama-cpp-rs"
Expand Down Expand Up @@ -53,6 +53,15 @@ cc = { workspace = true, features = ["parallel"] }
once_cell = "1.19.0"

[features]
avx = []
avx2 = []
avx512 = []
avx512_vmbi = []
avx512_vnni = []
cuda = []
f16c = []
fma = []
metal = []
dynamic_link = []
vulkan = []
native = []
56 changes: 47 additions & 9 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
let common = LLAMA_PATH.join("ggml-common.h");

let input_file = File::open(ggml_metal_shader_path).expect("Failed to open input file");
let mut output_file =
let output_file =
File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");

let output = Command::new("sed")
Expand Down Expand Up @@ -583,11 +583,35 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
.file(LLAMA_PATH.join("ggml-metal.m"));
}

fn find_windows_vulkan_sdk() -> PathBuf {
// if the vulkan sdk is installed in the standard location then this should be pretty fast,
// but we still must search recursively because we don't know the exact version number.
// if it's installed somewhere else, this will take a while, but it's better than failing.
let vulkan_root = Command::new("powershell.exe")
.arg("-Command")
.arg(r#"
if (test-path -pathtype Container "/VulkanSDK") {
$root = "/VulkanSDK"
} else {
$root = "/"
}
get-childitem -path $root -recurse -filter "vulkan.h" 2>$null | foreach-object { $_.directory.parent.parent.fullname }
"#)
.output()
.expect("could not find vulkan.h")
.stdout;
let vulkan_root = String::from_utf8_lossy(
vulkan_root
.split(|c| *c == b'\n')
.next()
.expect("could not find vulkan.h"),
);
PathBuf::from(vulkan_root.trim())
}

fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
println!("Compiling Vulkan GGML..");

// Vulkan gets linked through the ash crate.

if cfg!(debug_assertions) {
cx.define("GGML_VULKAN_DEBUG", None)
.define("GGML_VULKAN_CHECK_RESULTS", None)
Expand All @@ -602,12 +626,25 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {

let lib_name = "ggml-vulkan";

cxx.clone()
.include("./thirdparty/Vulkan-Headers/include/")
.include(LLAMA_PATH.as_path())
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
.compile(lib_name);

if cfg!(target_os = "windows") {
let vulkan_root = find_windows_vulkan_sdk();
cxx.clone()
.include(vulkan_root.join("Include"))
.include(LLAMA_PATH.as_path())
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
.compile(lib_name);
println!(
"cargo:rustc-link-search=native={}",
vulkan_root.join("Lib").display()
);
println!("cargo:rustc-link-lib=vulkan-1");
} else {
cxx.clone()
.include(LLAMA_PATH.as_path())
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
.compile(lib_name);
println!("cargo:rustc-link-lib=vulkan");
}
lib_name
}

Expand Down Expand Up @@ -673,6 +710,7 @@ fn main() {
push_warn_flags(&mut cx, &mut cxx);
push_feature_flags(&mut cx, &mut cxx);

#[allow(unused_variables)]
let feat_lib = if cfg!(feature = "vulkan") {
Some(compile_vulkan(&mut cx, &mut cxx))
} else if cfg!(feature = "cuda") {
Expand Down
6 changes: 4 additions & 2 deletions simple/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[package]
name = "simple"
version = "0.1.60"
version = "0.1.61"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
hf-hub = { workspace = true }
clap = { workspace = true , features = ["derive"] }
anyhow = { workspace = true }
Expand All @@ -15,6 +15,8 @@ encoding_rs = { workspace = true }
[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
workspace = true
8 changes: 4 additions & 4 deletions simple/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct Args {
#[arg(short = 'o', value_parser = parse_key_val)]
key_value_overrides: Vec<(String, ParamOverrideValue)>,
/// Disable offloading layers to the gpu
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
#[clap(long)]
disable_gpu: bool,
#[arg(short = 's', long, help = "RNG seed (default: 1234)")]
Expand Down Expand Up @@ -124,7 +124,7 @@ fn main() -> Result<()> {
model,
prompt,
file,
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
disable_gpu,
key_value_overrides,
seed,
Expand All @@ -138,13 +138,13 @@ fn main() -> Result<()> {

// offload all layers to the gpu
let model_params = {
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "vulkan"))]
if !disable_gpu {
LlamaModelParams::default().with_n_gpu_layers(1000)
} else {
LlamaModelParams::default()
}
#[cfg(not(feature = "cuda"))]
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
LlamaModelParams::default()
};

Expand Down

0 comments on commit 70f26c2

Please sign in to comment.