From f106b7784d7f514764c0922e968ddd9e945ad639 Mon Sep 17 00:00:00 2001 From: SymmetricChaos <42520289+SymmetricChaos@users.noreply.github.com> Date: Thu, 28 Nov 2024 09:55:09 -0500 Subject: [PATCH] negative base --- codes/src/ids/code_id.rs | 2 + codes/src/mathematical/mod.rs | 9 ++- codes/src/mathematical/negative_base.rs | 102 ++++++++++++++++++++++++ hashers/src/blake/blake3.rs | 36 ++++----- 4 files changed, 127 insertions(+), 22 deletions(-) create mode 100644 codes/src/mathematical/negative_base.rs diff --git a/codes/src/ids/code_id.rs b/codes/src/ids/code_id.rs index f9fcf5f5..40f3dbcc 100644 --- a/codes/src/ids/code_id.rs +++ b/codes/src/ids/code_id.rs @@ -34,6 +34,7 @@ code_ids_and_names!( BalancedTernary, "Balanced Ternary"; Barbier, "Barbier"; BaseN, "Base-N"; + BaseNegativeTwo, "Base Negative 2"; BaseNBijective, "Bijective Base-N"; BaseX, "BaseX"; Base16, "Base16"; @@ -68,6 +69,7 @@ code_ids_and_names!( MofN, "M-of-N"; Morse, "Morse"; Needle, "Needle"; + NegativeBase, "Negative Base"; ParityBit, "Parity Bit"; Pgp, "PGP Words"; Primorial, "Primorial"; diff --git a/codes/src/mathematical/mod.rs b/codes/src/mathematical/mod.rs index b2e76aea..ceb5cf1c 100644 --- a/codes/src/mathematical/mod.rs +++ b/codes/src/mathematical/mod.rs @@ -1,6 +1,8 @@ +pub mod arithmetic; pub mod balanced_ternary; pub mod base_n; pub mod base_n_bijective; +pub mod base_negative_two; pub mod biquinary_decimal; pub mod combinadic; pub mod elias; @@ -10,13 +12,12 @@ pub mod fibonacci; pub mod fibonacci_integers; pub mod godel; pub mod gray; +pub mod leb128; pub mod levenshtein; pub mod levenshtein_integers; +pub mod negative_base; +pub mod primorial; pub mod roman_numeral; pub mod symmetric_unary; pub mod twos_complement; pub mod unary; -pub mod arithmetic; -pub mod leb128; -pub mod primorial; - diff --git a/codes/src/mathematical/negative_base.rs b/codes/src/mathematical/negative_base.rs new file mode 100644 index 00000000..2a113d67 --- /dev/null +++ b/codes/src/mathematical/negative_base.rs @@ -0,0 +1,102 @@ +use crate::{errors::CodeError, traits::Code}; +use itertools::Itertools; +use num::{Integer, Zero}; +use utils::text_functions::num_to_digit; + +pub struct BaseN { + pub radix: i32, + pub little_endian: bool, +} + +impl Default for BaseN { + fn default() -> Self { + Self { + radix: -2, + little_endian: true, + } + } +} + +impl BaseN { + pub fn validate(&self) -> Result<(), CodeError> { + if self.radix > -2 || self.radix < -36 { + return Err(CodeError::state( + "radix must be between -2 and -36, inclusive", + )); + } + Ok(()) + } + + pub fn encode_i32(&self, n: i32) -> Result { + if n.is_zero() { + return Ok(String::from("0")); + } + let mut n = n; + let mut s = Vec::new(); + while n != 0 { + let (q, r) = n.div_rem(&self.radix); + s.push(num_to_digit(-r as u32).expect("remainder should always be less than 36")); + + n = q; + } + if self.little_endian { + Ok(s.iter().rev().collect()) + } else { + Ok(s.iter().collect()) + } + } + + pub fn decode_to_i32(&self, s: &str) -> Result { + let word: String = if self.little_endian { + s.chars().collect() + } else { + s.chars().rev().collect() + }; + + i32::from_str_radix(&word, -self.radix as u32).map_err(|e| CodeError::Input(e.to_string())) + } +} + +impl Code for BaseN { + fn encode(&self, text: &str) -> Result { + self.validate()?; + let mut output = Vec::new(); + + for group in text.split(" ") { + if group.is_empty() { + continue; + } + let n = i32::from_str_radix(group, 10) + .map_err(|_| CodeError::invalid_input_group(group))?; + output.push(self.encode_i32(n)?); + } + + Ok(output.into_iter().join(" ")) + } + + fn decode(&self, text: &str) -> Result { + self.validate()?; + let mut output = String::new(); + + for s in text.split(" ") { + if s.is_empty() { + continue; + } + output.push_str(&format!("{} ", self.decode_to_i32(s)?)) + } + output.pop(); + + Ok(output) + } +} + +#[cfg(test)] +mod negative_base_n_tests { + use super::*; + + const PLAINTEXT_INT: &'static str = "0 1 2 3 4 5"; + const ENCODEDTEXT: &'static str = "0 1 10 11 100 101"; + + const PLAINTEXT_INT_BE: &'static str = "0 1 2 3 4 5"; + const ENCODEDTEXT_BE: &'static str = "0 1 01 11 001 101"; +} diff --git a/hashers/src/blake/blake3.rs b/hashers/src/blake/blake3.rs index 05e02380..9591e06a 100644 --- a/hashers/src/blake/blake3.rs +++ b/hashers/src/blake/blake3.rs @@ -1,6 +1,6 @@ use crate::{blake_double_round, traits::ClassicHasher}; use std::cmp::min; -use utils::byte_formatting::ByteFormat; +use utils::byte_formatting::{make_u32s_le, ByteFormat}; // https://github.com/BLAKE3-team/BLAKE3 // https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf @@ -8,9 +8,13 @@ use utils::byte_formatting::ByteFormat; const OUT_LEN: usize = 32; const KEY_LEN: usize = 32; + +// Each chunk of 1024 bytes (256 words) is divided up into blocks of 64 bytes (16 words). +// Chunks are arranged into a tree structure while blocks are a simple array within each chunk const BLOCK_LEN: usize = 64; const CHUNK_LEN: usize = 1024; +// Bitflags that can be set for chunks const CHUNK_START: u32 = 1 << 0; const CHUNK_END: u32 = 1 << 1; const PARENT: u32 = 1 << 2; @@ -19,6 +23,7 @@ const KEYED_HASH: u32 = 1 << 4; const DERIVE_KEY_CONTEXT: u32 = 1 << 5; const DERIVE_KEY_MATERIAL: u32 = 1 << 6; +// Same IV as BLAKE2s, sqrt of the first eight primes const IV: [u32; 8] = [ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, ]; @@ -36,6 +41,8 @@ fn permute(m: &mut [u32; 16]) { *m = permuted; } +// The compression function. +// Compresses a block into a chaining value. fn compress( chaining_value: &[u32; 8], block_words: &[u32; 16], @@ -88,13 +95,6 @@ fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] { compression_output[0..8].try_into().unwrap() } -fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) { - debug_assert_eq!(bytes.len(), 4 * words.len()); - for (four_bytes, word) in bytes.chunks_exact(4).zip(words) { - *word = u32::from_le_bytes(four_bytes.try_into().unwrap()); - } -} - // Each chunk or parent node can produce either an 8-word chaining value or, by // setting the ROOT flag, any number of final output bytes. The Output struct // captures the state just prior to choosing between those two possibilities. @@ -174,8 +174,7 @@ impl ChunkState { // If the block buffer is full, compress it and clear it. More // input is coming, so this compression is not CHUNK_END. if self.block_len as usize == BLOCK_LEN { - let mut block_words = [0; 16]; - words_from_little_endian_bytes(&self.block, &mut block_words); + let block_words = make_u32s_le::<16>(&self.block); self.chaining_value = first_8_words(compress( &self.chaining_value, &block_words, @@ -198,8 +197,7 @@ impl ChunkState { } fn output(&self) -> Output { - let mut block_words = [0; 16]; - words_from_little_endian_bytes(&self.block, &mut block_words); + let block_words = make_u32s_le::<16>(&self.block); Output { input_chaining_value: self.chaining_value, block_words, @@ -247,6 +245,8 @@ pub struct Blake3Hasher { } impl Blake3Hasher { + // Create a new instance directly + // The key_words are fn new_internal(key_words: [u32; 8], flags: u32) -> Self { Self { chunk_state: ChunkState::new(key_words, 0, flags), @@ -259,26 +259,26 @@ impl Blake3Hasher { /// Construct a new `Hasher` for the regular hash function. pub fn new() -> Self { + // The default IV and no modes set Self::new_internal(IV, 0) } /// Construct a new `Hasher` for the keyed hash function. pub fn new_keyed(key: &[u8; KEY_LEN]) -> Self { - let mut key_words = [0; 8]; - words_from_little_endian_bytes(key, &mut key_words); - Self::new_internal(key_words, KEYED_HASH) + // The same as .new() but with the key material instead of the default IV and they KEYED_HASH mode set + Self::new_internal(make_u32s_le::<8>(key), KEYED_HASH) } /// Construct a new `Hasher` for the key derivation function. The context /// string should be hardcoded, globally unique, and application-specific. pub fn new_derive_key(context: &str) -> Self { + // The context is converted into a IV by hashing it in the DERIVE_KEY_CONTEXT mode let mut context_hasher = Self::new_internal(IV, DERIVE_KEY_CONTEXT); context_hasher.update(context.as_bytes()); let mut context_key = [0; KEY_LEN]; context_hasher.finalize(&mut context_key); - let mut context_key_words = [0; 8]; - words_from_little_endian_bytes(&context_key, &mut context_key_words); - Self::new_internal(context_key_words, DERIVE_KEY_MATERIAL) + // The hasher used in DERIVE_KEY_MATERIAL mode + Self::new_internal(make_u32s_le::<8>(&context_key), DERIVE_KEY_MATERIAL) } fn push_stack(&mut self, cv: [u32; 8]) {