Skip to content

Commit

Permalink
Merge pull request #341 from utilityai/update-llama-cpp-2024-06-13
Browse files Browse the repository at this point in the history
Updated llama-cpp (bot)
  • Loading branch information
MarcusDunn authored Jun 14, 2024
2 parents 2fb71a8 + 678ab2c commit 4c3de32
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 49 deletions.
6 changes: 3 additions & 3 deletions embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ fn main() -> Result<()> {
for token in token_line {
// Attempt to convert token to string and print it; if it fails, print the token instead
match model.token_to_str(*token, Special::Tokenize) {
Ok(token_str) => eprintln!(" {} --> {}", token, token_str),
Ok(token_str) => eprintln!("{token} --> {token_str}"),
Err(e) => {
eprintln!("Failed to convert token to string, error: {}", e);
eprintln!("Token value: {}", token);
eprintln!("Failed to convert token to string, error: {e}");
eprintln!("Token value: {token}");
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-2/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl<'model> LlamaContext<'model> {

match NonZeroI32::new(result) {
None => {
self.initialized_logits = batch.initialized_logits.clone();
self.initialized_logits.clone_from(&batch.initialized_logits);
Ok(())
}
Some(error) => Err(DecodeError::from(error)),
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ pub enum StringToTokenError {
#[error("{0}")]
NulError(#[from] NulError),
#[error("{0}")]
/// Failed to convert a provided integer to a c_int.
/// Failed to convert a provided integer to a [`c_int`].
CIntConversionError(#[from] std::num::TryFromIntError),
}

Expand Down
31 changes: 18 additions & 13 deletions llama-cpp-2/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::context::LlamaContext;
use crate::llama_backend::LlamaBackend;
use crate::model::params::LlamaModelParams;
use crate::token::LlamaToken;
use crate::token_type::LlamaTokenType;
use crate::token_type::LlamaTokenAttr;
use crate::{
ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, LlamaModelLoadError,
NewLlamaChatMessageError, StringToTokenError, TokenToStringError,
Expand Down Expand Up @@ -238,9 +238,9 @@ impl LlamaModel {
///
/// If the token type is not known to this library.
#[must_use]
pub fn token_type(&self, LlamaToken(id): LlamaToken) -> LlamaTokenType {
let token_type = unsafe { llama_cpp_sys_2::llama_token_get_type(self.model.as_ptr(), id) };
LlamaTokenType::try_from(token_type).expect("token type is valid")
pub fn token_attr(&self, LlamaToken(id): LlamaToken) -> LlamaTokenAttr {
let token_type = unsafe { llama_cpp_sys_2::llama_token_get_attr(self.model.as_ptr(), id) };
LlamaTokenAttr::try_from(token_type).expect("token type is valid")
}

/// Convert a token to a string with a specified buffer size.
Expand Down Expand Up @@ -292,18 +292,23 @@ impl LlamaModel {
return Ok(String::from("\n").into_bytes());
}

// unsure what to do with this in the face of the 'special' arg
match self.token_type(token) {
LlamaTokenType::Normal | LlamaTokenType::UserDefined => {}
LlamaTokenType::Control => {
// unsure what to do with this in the face of the 'special' arg + attr changes
match self.token_attr(token) {
LlamaTokenAttr::Normal
| LlamaTokenAttr::UserDefined
| LlamaTokenAttr::Normalized
| LlamaTokenAttr::LStrip
| LlamaTokenAttr::RStrip
| LlamaTokenAttr::SingleWord => {}
LlamaTokenAttr::Control => {
if token == self.token_bos() || token == self.token_eos() {
return Ok(Vec::new());
}
}
LlamaTokenType::Unknown
| LlamaTokenType::Undefined
| LlamaTokenType::Byte
| LlamaTokenType::Unused => {
LlamaTokenAttr::Unknown
| LlamaTokenAttr::Undefined
| LlamaTokenAttr::Byte
| LlamaTokenAttr::Unused => {
return Ok(Vec::new());
}
}
Expand Down Expand Up @@ -471,7 +476,7 @@ impl LlamaModel {
// Set the tmpl pointer
let tmpl = tmpl.map(CString::new);
let tmpl_ptr = match &tmpl {
Some(str) => str.as_ref().map_err(|e| e.clone())?.as_ptr(),
Some(str) => str.as_ref().map_err(Clone::clone)?.as_ptr(),
None => std::ptr::null(),
};

Expand Down
57 changes: 29 additions & 28 deletions llama-cpp-2/src/token_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,19 @@
/// A rust flavored equivalent of `llama_token_type`.
#[repr(u32)]
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
#[allow(clippy::module_name_repetitions)]
pub enum LlamaTokenType {
/// An undefined token type.
Undefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNDEFINED as _,
/// A normal token type.
Normal = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_NORMAL as _,
/// An unknown token type.
Unknown = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNKNOWN as _,
/// A control token type.
Control = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_CONTROL as _,
/// A user defined token type.
UserDefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_USER_DEFINED as _,
/// An unused token type.
Unused = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNUSED as _,
/// A byte token type.
Byte = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_BYTE as _,
#[allow(clippy::module_name_repetitions, missing_docs)]
pub enum LlamaTokenAttr {
Undefined = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNDEFINED as _,
Unknown = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNKNOWN as _,
Unused = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNUSED as _,
Normal = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_NORMAL as _,
Control = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_CONTROL as _,
UserDefined = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_USER_DEFINED as _,
Byte = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_BYTE as _,
Normalized = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_NORMALIZED as _,
LStrip = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_LSTRIP as _,
RStrip = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_RSTRIP as _,
SingleWord = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_SINGLE_WORD as _,
}

/// A safe wrapper for converting potentially deceptive `llama_token_type` values into
Expand All @@ -31,27 +28,31 @@ pub enum LlamaTokenType {
/// # use std::ffi::c_int;
/// # use std::num::TryFromIntError;
/// # use std::result::Result;
/// # use llama_cpp_2::token_type::{LlamaTokenTypeFromIntError, LlamaTokenType};
/// # use llama_cpp_2::token_type::{LlamaTokenTypeFromIntError, LlamaTokenAttr};
/// # fn main() -> Result<(), LlamaTokenTypeFromIntError> {
/// let llama_token_type = LlamaTokenType::try_from(0 as llama_cpp_sys_2::llama_token_type)?;
/// assert_eq!(llama_token_type, LlamaTokenType::Undefined);
/// let llama_token_type = LlamaTokenAttr::try_from(0 as llama_cpp_sys_2::llama_token_type)?;
/// assert_eq!(llama_token_type, LlamaTokenAttr::Undefined);
///
/// let bad_llama_token_type = LlamaTokenType::try_from(100 as llama_cpp_sys_2::llama_token_type);
/// let bad_llama_token_type = LlamaTokenAttr::try_from(100 as llama_cpp_sys_2::llama_token_type);
/// assert_eq!(Err(LlamaTokenTypeFromIntError::UnknownValue(100)), bad_llama_token_type);
/// # Ok(())
/// # }
impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenType {
impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttr {
type Error = LlamaTokenTypeFromIntError;

fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
match value {
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNDEFINED => Ok(LlamaTokenType::Undefined),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_NORMAL => Ok(LlamaTokenType::Normal),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNKNOWN => Ok(LlamaTokenType::Unknown),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_CONTROL => Ok(LlamaTokenType::Control),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_USER_DEFINED => Ok(LlamaTokenType::UserDefined),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNUSED => Ok(LlamaTokenType::Unused),
llama_cpp_sys_2::LLAMA_TOKEN_TYPE_BYTE => Ok(LlamaTokenType::Byte),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNDEFINED => Ok(Self::Undefined),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNKNOWN => Ok(Self::Unknown),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_UNUSED => Ok(Self::Unused),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_NORMAL => Ok(Self::Normal),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_CONTROL => Ok(Self::Control),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_USER_DEFINED => Ok(Self::UserDefined),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_BYTE => Ok(Self::Byte),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_NORMALIZED => Ok(Self::Normalized),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_LSTRIP => Ok(Self::LStrip),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_RSTRIP => Ok(Self::RStrip),
llama_cpp_sys_2::LLAMA_TOKEN_ATTR_SINGLE_WORD => Ok(Self::SingleWord),
_ => Err(LlamaTokenTypeFromIntError::UnknownValue(value as _)),
}
}
Expand Down
2 changes: 2 additions & 0 deletions llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ include = [
"/llama.cpp/ggml-common.h",
"/llama.cpp/ggml-cuda",
"/llama.cpp/sgemm.h",
"/llama.cpp/ggml-cuda/*",
"/llama.cpp/ggml-cuda/template_instances/*",
]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
9 changes: 8 additions & 1 deletion llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,17 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
.map(|f| f.unwrap())
.filter(|entry| entry.file_name().to_string_lossy().ends_with(".cu"))
.map(|entry| entry.path());

let template_instances = read_dir(cuda_path.join("template-instances"))
.unwrap()
.map(|f| f.unwrap())
.filter(|entry| entry.file_name().to_string_lossy().ends_with(".cu"))
.map(|entry| entry.path());

nvcc.include(cuda_path.as_path())
.include(LLAMA_PATH.as_path())
.files(cuda_sources)
.files(template_instances)
.file(LLAMA_PATH.join("ggml-cuda.cu"))
.compile(lib_name);

Expand Down Expand Up @@ -555,7 +562,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
// Create a static library for our metal embed code.
let ggml_metal_embed_library_path = PathBuf::from(&out_dir).join("libggml-metal-embed.a");
Command::new("ar")
.args(&[
.args([
"crus",
ggml_metal_embed_library_path.to_str().unwrap(),
ggml_metal_embed_object_path.to_str().unwrap(),
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-sys-2/llama.cpp
Submodule llama.cpp updated 422 files
3 changes: 2 additions & 1 deletion simple/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ impl Model {
}
}

#[allow(clippy::too_many_lines)]
fn main() -> Result<()> {
let Args {
n_len,
Expand Down Expand Up @@ -263,7 +264,7 @@ either reduce n_len or increase n_ctx"
// use `Decoder.decode_to_string()` to avoid the intermediate buffer
let mut output_string = String::with_capacity(32);
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
print!("{}", output_string);
print!("{output_string}");
std::io::stdout().flush()?;

batch.clear();
Expand Down

0 comments on commit 4c3de32

Please sign in to comment.