Skip to content

Commit

Permalink
Merge pull request #400 from utilityai/update-llama-cpp-2024-07-09
Browse files Browse the repository at this point in the history
Update llama cpp [MAC TESTING WANTED]
  • Loading branch information
MarcusDunn authored Jul 26, 2024
2 parents 09088ec + 1788762 commit 8ea1482
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 49 deletions.
9 changes: 6 additions & 3 deletions llama-cpp-2/src/model.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! A safe wrapper around `llama_model`.
use std::ffi::CString;
use std::num::NonZeroU16;
use std::os::raw::c_int;
use std::path::Path;
use std::ptr::NonNull;
Expand Down Expand Up @@ -131,7 +132,7 @@ impl LlamaModel {
token: LlamaToken,
special: Special,
) -> Result<Vec<u8>, TokenToStringError> {
self.token_to_bytes_with_size(token, 32, special)
self.token_to_bytes_with_size(token, 32, special, None)
}

/// Convert a vector of tokens to a single string.
Expand Down Expand Up @@ -264,7 +265,7 @@ impl LlamaModel {
buffer_size: usize,
special: Special,
) -> Result<String, TokenToStringError> {
let bytes = self.token_to_bytes_with_size(token, buffer_size, special)?;
let bytes = self.token_to_bytes_with_size(token, buffer_size, special, None)?;
Ok(String::from_utf8(bytes)?)
}

Expand All @@ -287,6 +288,7 @@ impl LlamaModel {
token: LlamaToken,
buffer_size: usize,
special: Special,
lstrip: Option<NonZeroU16>
) -> Result<Vec<u8>, TokenToStringError> {
if token == self.token_nl() {
return Ok(String::from("\n").into_bytes());
Expand Down Expand Up @@ -314,8 +316,9 @@ impl LlamaModel {
let len = string.as_bytes().len();
let len = c_int::try_from(len).expect("length fits into c_int");
let buf = string.into_raw();
let lstrip = lstrip.map(|it| i32::from(it.get())).unwrap_or(0);
let size = unsafe {
llama_cpp_sys_2::llama_token_to_piece(self.model.as_ptr(), token.0, buf, len, special)
llama_cpp_sys_2::llama_token_to_piece(self.model.as_ptr(), token.0, buf, len, lstrip, special)
};

match size {
Expand Down
90 changes: 45 additions & 45 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,24 @@ compile_error!("feature \"vulkan\" cannot be enabled alongside other GPU based f

static LLAMA_PATH: Lazy<PathBuf> = Lazy::new(|| PathBuf::from("./llama.cpp"));

fn compile_bindings(out_path: &Path, llama_header_path: &Path) {
fn compile_bindings(
out_path: &Path,
llama_header_path: &Path,
) -> Result<(), Box<dyn std::error::Error + 'static>> {
println!("Generating bindings..");

let includes = [
llama_header_path.join("ggml").join("include"),
];

let bindings = bindgen::Builder::default()
// .header(llama_header_path.join("ggml.h").to_string_lossy())
.header(llama_header_path.join("llama.h").to_string_lossy())
.clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
.header(
llama_header_path
.join("include")
.join("llama.h")
.to_string_lossy(),
)
.derive_partialeq(true)
.allowlist_function("ggml_.*")
.allowlist_type("ggml_.*")
Expand All @@ -106,11 +119,11 @@ fn compile_bindings(out_path: &Path, llama_header_path: &Path) {
bindings = bindings.parse_callbacks(Box::new(GGMLLinkRename {}));
}

let bindings = bindings.generate().expect("Unable to generate bindings");
let bindings = bindings.generate()?;

bindings
.write_to_file(out_path.join("bindings.rs"))
.expect("Couldn't write bindings!");
bindings.write_to_file(out_path.join("bindings.rs"))?;

Ok(())
}

#[cfg(all(
Expand Down Expand Up @@ -324,26 +337,6 @@ fn push_feature_flags(cx: &mut Build, cxx: &mut Build) {
}
}

fn compile_opencl(cx: &mut Build, cxx: &mut Build) {
println!("Compiling OpenCL GGML..");

// TODO
println!("cargo:warning=OpenCL compilation and execution has not been properly tested yet");

cx.define("GGML_USE_CLBLAST", None);
cxx.define("GGML_USE_CLBLAST", None);

if cfg!(target_os = "linux") {
println!("cargo:rustc-link-lib=OpenCL");
println!("cargo:rustc-link-lib=clblast");
} else if cfg!(target_os = "macos") {
println!("cargo:rustc-link-lib=framework=OpenCL");
println!("cargo:rustc-link-lib=clblast");
}

cxx.file(LLAMA_PATH.join("ggml-opencl.cpp"));
}

fn compile_openblas(cx: &mut Build) {
println!("Compiling OpenBLAS GGML..");

Expand Down Expand Up @@ -462,7 +455,9 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
}

let lib_name = "ggml-cuda";
let cuda_path = LLAMA_PATH.join("ggml-cuda");
let ggml_path = LLAMA_PATH.join("ggml");
let ggml_src = ggml_path.join("src");
let cuda_path = ggml_src.join("ggml-cuda");
let cuda_sources = read_dir(cuda_path.as_path())
.unwrap()
.map(|f| f.unwrap())
Expand All @@ -476,10 +471,11 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
.map(|entry| entry.path());

nvcc.include(cuda_path.as_path())
.include(LLAMA_PATH.as_path())
.include(ggml_src.as_path())
.include(ggml_path.join("include").as_path())
.files(cuda_sources)
.files(template_instances)
.file(LLAMA_PATH.join("ggml-cuda.cu"))
.file(ggml_src.join("ggml-cuda.cu"))
.compile(lib_name);

lib_name
Expand Down Expand Up @@ -655,22 +651,28 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {

fn compile_ggml(mut cx: Build) {
println!("Compiling GGML..");
let ggml_src = LLAMA_PATH.join("ggml").join("src");
let ggml_include = LLAMA_PATH.join("ggml").join("include");
cx.std("c11")
.include(LLAMA_PATH.as_path())
.file(LLAMA_PATH.join("ggml.c"))
.file(LLAMA_PATH.join("ggml-alloc.c"))
.file(LLAMA_PATH.join("ggml-backend.c"))
.file(LLAMA_PATH.join("ggml-quants.c"))
.include(ggml_include)
.file(ggml_src.join("ggml.c"))
.file(ggml_src.join("ggml-alloc.c"))
.file(ggml_src.join("ggml-backend.c"))
.file(ggml_src.join("ggml-quants.c"))
.compile("ggml");
}

fn compile_llama(mut cxx: Build, _out_path: impl AsRef<Path>) {
println!("Compiling Llama.cpp..");
let llama_cpp_src = LLAMA_PATH.join("src");
let llama_include = LLAMA_PATH.join("include");
let ggml_include = LLAMA_PATH.join("ggml").join("include");
cxx.std("c++11")
.include(LLAMA_PATH.as_path())
.file(LLAMA_PATH.join("unicode.cpp"))
.file(LLAMA_PATH.join("unicode-data.cpp"))
.file(LLAMA_PATH.join("llama.cpp"))
.include(llama_include)
.include(ggml_include)
.file(llama_cpp_src.join("unicode.cpp"))
.file(llama_cpp_src.join("unicode-data.cpp"))
.file(llama_cpp_src.join("llama.cpp"))
.compile("llama");
}

Expand All @@ -683,9 +685,10 @@ fn main() {

let llama_header_path = std::env::var("LLAMA_HEADE");
if let Ok(llama_header_path) = llama_header_path {
compile_bindings(&out_path, Path::new(&llama_header_path));
compile_bindings(&out_path, Path::new(&llama_header_path))
.expect("failed to generate bindings");
} else {
compile_bindings(&out_path, &LLAMA_PATH);
compile_bindings(&out_path, &LLAMA_PATH).expect("failed to generate bindings");
}

if let Ok(llama_lib_path) = std::env::var("LLAMA_LIB") {
Expand All @@ -703,7 +706,7 @@ fn main() {

println!("cargo:rerun-if-changed={}", LLAMA_PATH.display());

compile_bindings(&out_path, &LLAMA_PATH);
compile_bindings(&out_path, &LLAMA_PATH).expect("failed to generate bindings");

let mut cx = Build::new();
let mut cxx = Build::new();
Expand All @@ -720,9 +723,6 @@ fn main() {
Some(compile_vulkan(&mut cx, &mut cxx))
} else if cfg!(feature = "cuda") {
Some(compile_cuda(&mut cx, &mut cxx, featless_cxx))
} else if cfg!(feature = "opencl") {
compile_opencl(&mut cx, &mut cxx);
None
} else if cfg!(feature = "openblas") {
compile_openblas(&mut cx);
None
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-sys-2/llama.cpp
Submodule llama.cpp updated 542 files

0 comments on commit 8ea1482

Please sign in to comment.