From 42a3f0a41d77d2a432803469dcbdbf0b808dcedb Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 26 Sep 2024 10:14:23 +0200 Subject: [PATCH] Drop faster-hex dependency --- Cargo.lock | 1 - cli/src/commands/debug/tree.rs | 2 +- lib/Cargo.toml | 1 - lib/src/backend.rs | 1 - lib/src/git_backend.rs | 2 +- lib/src/hex_util.rs | 127 +++++++++++++++++++++++++-------- lib/src/object_id.rs | 7 +- lib/src/simple_op_store.rs | 6 +- 8 files changed, 104 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 285cdf52298..8002adf3c92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1881,7 +1881,6 @@ dependencies = [ "digest", "either", "esl01-renderdag", - "faster-hex", "futures 0.3.30", "git2", "gix", diff --git a/cli/src/commands/debug/tree.rs b/cli/src/commands/debug/tree.rs index bf5e5fddef9..1752a631e41 100644 --- a/cli/src/commands/debug/tree.rs +++ b/cli/src/commands/debug/tree.rs @@ -47,7 +47,7 @@ pub fn cmd_debug_tree( let workspace_command = command.workspace_helper(ui)?; let tree = if let Some(tree_id_hex) = &args.id { let tree_id = - TreeId::try_from_hex(tree_id_hex).map_err(|_| user_error("Invalid tree id"))?; + TreeId::try_from_hex(tree_id_hex).ok_or_else(|| user_error("Invalid tree id"))?; let dir = if let Some(dir_str) = &args.dir { workspace_command.parse_file_path(dir_str)? } else { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8621b2ca40a..4834c40e299 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -48,7 +48,6 @@ git2 = { workspace = true, optional = true } gix = { workspace = true, optional = true } gix-filter = { workspace = true, optional = true } glob = { workspace = true } -faster-hex = { workspace = true } ignore = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 2a335496d80..ac4ec272315 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -26,7 +26,6 @@ use thiserror::Error; use crate::content_hash::ContentHash; use crate::hex_util; -use crate::hex_util::to_reverse_hex_digit; use crate::index::Index; use crate::merge::Merge; use crate::object_id::id_type; diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index fc2fb676410..84ba228f21e 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -500,7 +500,7 @@ fn root_tree_from_header(git_commit: &CommitRef) -> Result, if *key == JJ_TREES_COMMIT_HEADER { let mut tree_ids = SmallVec::new(); for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') { - let tree_id = TreeId::try_from_hex(hex).or(Err(()))?; + let tree_id = TreeId::try_from_hex(hex).ok_or(())?; if tree_id.as_bytes().len() != HASH_LENGTH { return Err(()); } diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index c435e352baa..4b977ec34cf 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -14,27 +14,23 @@ #![allow(missing_docs)] -pub fn to_reverse_hex_digit(b: u8) -> u8 { - let offset = match b { - b'0'..=b'9' => b - b'0', - b'A'..=b'F' => b - b'A' + 10, - b'a'..=b'f' => b - b'a' + 10, - b => return b, - }; - b'z' - offset +use std::iter; + +/// Converts a hexadecimal ASCII character to a 0-based index. +fn hex_to_relative(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'A'..=b'F' => Some(b - b'A' + 10), + b'a'..=b'f' => Some(b - b'a' + 10), + _ => None, + } } -fn to_reverse_hex_digit_checked(b: u8) -> Option { - let value = match b { - b'0'..=b'9' => b - b'0', - b'A'..=b'F' => b - b'A' + 10, - b'a'..=b'f' => b - b'a' + 10, - _ => return None, - }; - Some(b'z' - value) +fn to_reverse_hex_digit(b: u8) -> Option { + Some(b'z' - hex_to_relative(b)?) } -fn to_forward_hex_digit_checked(b: u8) -> Option { +fn to_forward_hex_digit(b: u8) -> Option { let value = match b { b'k'..=b'z' => b'z' - b, b'K'..=b'Z' => b'Z' - b, @@ -47,25 +43,37 @@ fn to_forward_hex_digit_checked(b: u8) -> Option { } } +/// Tranforms a reverse hex into a forward hex. +/// +/// If the reverse hex string contains non reverse hex characters the function will return None. pub fn to_forward_hex(reverse_hex: &str) -> Option { reverse_hex .bytes() - .map(|b| to_forward_hex_digit_checked(b).map(char::from)) + .map(|b| to_forward_hex_digit(b).map(char::from)) .collect() } +/// Tranforms a forward hex into a reverse hex. +/// +/// If the forward hex string contains non forward hex characters the function will return None. pub fn to_reverse_hex(forward_hex: &str) -> Option { forward_hex .bytes() - .map(|b| to_reverse_hex_digit_checked(b).map(char::from)) + .map(|b| to_reverse_hex_digit(b).map(char::from)) .collect() } -pub fn decode_hex_string(hex: &str) -> Option> { - let mut dst = vec![0; hex.len() / 2]; - faster_hex::hex_decode(hex.as_bytes(), &mut dst) - .ok() - .map(|()| dst) +pub fn decode_hex_string(src: &str) -> Option> { + if src.len() % 2 != 0 { + return None; + } + let mut dst = vec![0; src.len() / 2]; + for (slot, bytes) in iter::zip(&mut dst, src.as_bytes().chunks_exact(2)) { + let a = hex_to_relative(bytes[0])? << 4; + let b = hex_to_relative(bytes[1])?; + *slot = a | b; + } + Some(dst) } /// Calculates common prefix length of two byte sequences. The length @@ -82,23 +90,80 @@ pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { } pub fn encode_hex_string_reverse(src: &[u8]) -> String { - let mut buffer = vec![0; src.len() * 2]; - faster_hex::hex_encode(src, &mut buffer).unwrap(); - buffer - .iter_mut() - .for_each(|b| *b = to_reverse_hex_digit(*b)); + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower_reverse((src >> 4) & 0xf); + dst[1] = hex_lower_reverse(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower_reverse emits ascii character bytes") +} - String::from_utf8(buffer).unwrap() +fn hex_lower_reverse(byte: u8) -> u8 { + static TABLE: &[u8] = b"zyxwvutsrqponmlk"; + TABLE[byte as usize] } pub fn encode_hex_string(src: &[u8]) -> String { - faster_hex::hex_string(src) + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower((src >> 4) & 0xf); + dst[1] = hex_lower(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower emits ascii character bytes") +} + +fn hex_lower(byte: u8) -> u8 { + static TABLE: &[u8] = b"0123456789abcdef"; + TABLE[byte as usize] } #[cfg(test)] mod tests { use super::*; + #[test] + fn test_common_hex_len() { + assert_eq!(common_hex_len(b"", b""), 0); + assert_eq!(common_hex_len(b"abc", b"abc"), 6); + + assert_eq!(common_hex_len(b"aaa", b"bbb"), 1); + assert_eq!(common_hex_len(b"aab", b"aac"), 5); + } + + #[test] + fn test_encode_hex_string() { + assert_eq!(&encode_hex_string(b""), ""); + assert_eq!(&encode_hex_string(b"012"), "303132"); + assert_eq!(&encode_hex_string(b"0123"), "30313233"); + assert_eq!(&encode_hex_string(b"abdz"), "6162647a"); + } + + #[test] + fn test_encode_hex_string_reverse() { + assert_eq!(&encode_hex_string_reverse(b""), ""); + assert_eq!(&encode_hex_string_reverse(b"012"), "wzwywx"); + assert_eq!(&encode_hex_string_reverse(b"0123"), "wzwywxww"); + assert_eq!(&encode_hex_string_reverse(b"abdz"), "tytxtvsp"); + } + + #[test] + fn test_decode_hex_string() { + // Empty string + assert_eq!(decode_hex_string(""), Some(vec![])); + + // Odd number of digits + assert_eq!(decode_hex_string("0"), None); + + // Invalid digit + assert_eq!(decode_hex_string("g0"), None); + assert_eq!(decode_hex_string("0g"), None); + + assert_eq!( + decode_hex_string("0123456789abcdefABCDEF"), + Some(b"\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF".to_vec()) + ); + } + #[test] fn test_reverse_hex() { // Empty string diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index b03aa5430ac..d3abe1ac514 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -66,9 +66,8 @@ macro_rules! impl_id_type { } /// Parses the given hex string into an ObjectId. - pub fn try_from_hex(hex: &str) -> Result { - let mut dst = vec![0; hex.len() / 2]; - faster_hex::hex_decode(hex.as_bytes(), &mut dst).map(|()| Self(dst)) + pub fn try_from_hex(hex: &str) -> Option { + $crate::hex_util::decode_hex_string(hex).map(Self) } } @@ -96,7 +95,7 @@ macro_rules! impl_id_type { } fn hex(&self) -> String { - faster_hex::hex_string(&self.0) + $crate::hex_util::encode_hex_string(&self.0) } } }; diff --git a/lib/src/simple_op_store.rs b/lib/src/simple_op_store.rs index 1cadb9ce9a0..c76e49dfd50 100644 --- a/lib/src/simple_op_store.rs +++ b/lib/src/simple_op_store.rs @@ -228,7 +228,7 @@ impl OpStore for SimpleOpStore { if !name.starts_with(&hex_prefix) { continue; } - let Ok(id) = OperationId::try_from_hex(&name) else { + let Some(id) = OperationId::try_from_hex(&name) else { continue; // Skip invalid hex }; if matched.is_some() { @@ -251,11 +251,11 @@ impl OpStore for SimpleOpStore { fn gc(&self, head_ids: &[OperationId], keep_newer: SystemTime) -> OpStoreResult<()> { let to_op_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - OperationId::try_from_hex(&name).ok() + OperationId::try_from_hex(&name) }; let to_view_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - ViewId::try_from_hex(&name).ok() + ViewId::try_from_hex(&name) }; let remove_file_if_not_new = |entry: &fs::DirEntry| -> Result<(), PathError> { let path = entry.path();