Merge pull request #326 from SilasMarvin/silas-redo-build-script

Switched build script
utilityai · Jun 6, 2024 · 7618646 · 7618646
2 parents 0d60d79 + 94e6b10
commit 7618646
Show file tree

Hide file tree

Showing 13 changed files with 1,010 additions and 301 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ git clone --recursive https://github.com/utilityai/llama-cpp-rs
 cd llama-cpp-rs
 ```
 
-Run the simple example (add `--featues cublas` if you have a cuda gpu)
+Run the simple example (add `--featues cuda` if you have a cuda gpu)
 
 ```bash
 cargo run --release --bin simple "The way to kill a linux process is" hf-model TheBloke/Llama-2-7B-GGUF llama-2-7b.Q4_K_M.gguf

diff --git a/embeddings/src/main.rs b/embeddings/src/main.rs
@@ -20,8 +20,8 @@ use llama_cpp_2::ggml_time_us;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::LlamaModelParams;
-use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::model::LlamaModel;
+use llama_cpp_2::model::{AddBos, Special};
 
 #[derive(clap::Parser, Debug, Clone)]
 struct Args {
@@ -35,7 +35,7 @@ struct Args {
     #[clap(short)]
     normalise: bool,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cublas")]
+    #[cfg(feature = "cuda")]
     #[clap(long)]
     disable_gpu: bool,
 }
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         normalise,
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         disable_gpu,
     } = Args::parse();
 
@@ -87,13 +87,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cublas"))]
+        #[cfg(not(feature = "cuda"))]
         LlamaModelParams::default()
     };
 

diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -14,7 +14,7 @@ thiserror = { workspace = true }
 tracing = { workspace = true }
 
 [features]
-cublas = ["llama-cpp-sys-2/cublas"]
+cuda = ["llama-cpp-sys-2/cuda"]
 metal = ["llama-cpp-sys-2/metal"]
 sampler = []
 
@@ -25,4 +25,4 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "
 workspace = true
 
 [package.metadata.docs.rs]
-features = ["sampler"]
+features = ["sampler"]
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -11,7 +11,7 @@
 //!
 //! # Feature Flags
 //!
-//! - `cublas` enables CUDA gpu support.
+//! - `cuda` enables CUDA gpu support.
 //! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
 use std::ffi::NulError;
 use std::fmt::Debug;

diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -48,8 +48,9 @@ include = [
 [build-dependencies]
 bindgen = { workspace = true }
 cc = { workspace = true, features = ["parallel"] }
+once_cell = "1.19.0"
 
 [features]
-cublas = []
+cuda = []
 metal = []
 
diff --git a/llama-cpp-sys-2/README.md b/llama-cpp-sys-2/README.md
@@ -1,5 +1,5 @@
 # llama-cpp-sys
 
-Raw bindings to llama.cpp with cublas support.
+Raw bindings to llama.cpp with cuda support.
 
-See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.
+See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.