utilityai · MarcusDunn · Mar 26, 2024 · Mar 29, 2024 · Mar 29, 2024 · Mar 29, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -16,9 +16,9 @@ hf-hub = { version = "0.3.2" }
 criterion = "0.5.1"
 pprof = "0.13.0"
 bindgen = "0.69.4"
-cc = "1.0.90"
 anyhow = "1.0.81"
 clap = "4.5.3"
+cmake = "0.1.50"
 
 [workspace.lints.rust]
 missing_docs = { level = "warn" }

diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -15,6 +15,7 @@ tracing = { workspace = true }
 
 [features]
 cublas = ["llama-cpp-sys-2/cublas"]
+vulkan = ["llama-cpp-sys-2/vulkan"]
 sampler = []
 
 [lints]

diff --git a/llama-cpp-2/src/context/sample/sampler.rs b/llama-cpp-2/src/context/sample/sampler.rs
@@ -3,7 +3,7 @@
 //! like [`crate::context::LlamaContext`] or token history to the sampler.
 //!
 //! # Example
-//! 
+//!
 //! **Llama.cpp default sampler**
 //!
 //! ```rust

diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -242,3 +242,13 @@ pub fn ggml_time_us() -> i64 {
 pub fn llama_supports_mlock() -> bool {
     unsafe { llama_cpp_sys_2::llama_supports_mlock() }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn smoke_test() {
+        ggml_time_us();
+    }
+}
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -280,17 +280,16 @@ impl LlamaModel {
     /// Get chat template from model.
     ///
     /// # Errors
-    /// 
+    ///
     /// * If the model has no chat template
     /// * If the chat template is not a valid [`CString`].
     #[allow(clippy::missing_panics_doc)] // we statically know this will not panic as
     pub fn get_chat_template(&self, buf_size: usize) -> Result<String, ChatTemplateError> {
-
         // longest known template is about 1200 bytes from llama.cpp
         let chat_temp = CString::new(vec![b'*'; buf_size]).expect("no null");
         let chat_ptr = chat_temp.into_raw();
         let chat_name = CString::new("tokenizer.chat_template").expect("no null bytes");
-        
+
         let chat_template: String = unsafe {
             let ret = llama_cpp_sys_2::llama_model_meta_val_str(
                 self.model.as_ptr(),
@@ -305,7 +304,7 @@ impl LlamaModel {
             debug_assert_eq!(usize::try_from(ret).unwrap(), template.len(), "llama.cpp guarantees that the returned int {ret} is the length of the string {} but that was not the case", template.len());
             template
         };
-        
+
         Ok(chat_template)
     }
 

diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -42,8 +42,9 @@ include = [
 
 [build-dependencies]
 bindgen = { workspace = true }
-cc = { workspace = true, features = ["parallel"] }
+cmake = { workspace = true }
 
 [features]
 cublas = []
+vulkan = []