From f01e5c8ab90137926ee4be002a43e2cf40fc8299 Mon Sep 17 00:00:00 2001 From: Avi Dessauer Date: Mon, 15 Jul 2024 15:30:26 -0700 Subject: [PATCH 1/2] Start to move the prefix out of the Branch, replacing prior word and vec This will allow bytemuck cast to be used to serialize Snapshots. This was a goal of the original design, but was not possible due to switch to prefixes from extension nodes. --- src/transaction.rs | 4 ++- src/transaction/nodes.rs | 67 +++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/transaction.rs b/src/transaction.rs index 45087c8..cb75290 100644 --- a/src/transaction.rs +++ b/src/transaction.rs @@ -15,12 +15,14 @@ use crate::{ }; use self::nodes::{ - Branch, KeyPosition, KeyPositionAdjacent, Leaf, Node, NodeRef, StoredLeafRef, TrieRoot, + Branch, KeyPosition, KeyPositionAdjacent, Leaf, Node, NodeRef, PrefixesBuffer, StoredLeafRef, + TrieRoot, }; pub struct Transaction { pub data_store: S, current_root: TrieRoot>, + prefixes_buffer: PrefixesBuffer, } impl, V: Clone + PortableHash> Transaction, V> { diff --git a/src/transaction/nodes.rs b/src/transaction/nodes.rs index b5f4c63..ee6b9fe 100644 --- a/src/transaction/nodes.rs +++ b/src/transaction/nodes.rs @@ -1,5 +1,5 @@ use alloc::boxed::Box; -use core::{fmt, iter, mem}; +use core::{fmt, iter, mem, slice, usize}; use crate::{hash::PortableHasher, stored, KeyHash, NodeHash, PortableHash, PortableUpdate}; @@ -263,19 +263,68 @@ mod tests { } } +pub struct Prefix { + /// This value will be 0 if the branch occurs in the first word of the hash key. + /// The value is the prior word if the branches parent's word index no more than 1 less. + /// If the parent's word index is more than 1 word prior, + /// we must store the multiword prefix outside of the branch, so the value is the index of the prefix. + /// The length of the prefix is the difference between the parent's word index and the branch's word index. + prior_word_or_prefix_idx: u32, +} + +impl Prefix { + pub fn get_prefix<'s, 'txn: 's>( + &'s self, + prefixies: &'txn PrefixesBuffer, + word_idx: usize, + parent_word_idx: usize, + ) -> Option<&'s [u32]> { + if word_idx - parent_word_idx <= 1 { + if cfg!(debug_assertions) && word_idx == 0 { + debug_assert_eq!(self.prior_word_or_prefix_idx, 0); + } + + Some(slice::from_ref(&self.prior_word_or_prefix_idx)) + } else { + let prefix_len = word_idx - parent_word_idx; + debug_assert!(prefix_len > 1); + prefixies.get_prefix( + self.prior_word_or_prefix_idx as usize, + word_idx - parent_word_idx, + ) + } + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct PrefixesBuffer { + buffer: Vec, +} + +impl PrefixesBuffer { + pub fn new() -> Self { + Self { buffer: Vec::new() } + } + + pub fn push_prefix(&mut self, prefix: &[u32]) -> u32 { + let idx = self.buffer.len() as u32; + self.buffer.extend_from_slice(prefix); + idx + } + + pub fn get_prefix(&self, idx: usize, len: usize) -> Option<&[u32]> { + let end = idx + len; + self.buffer.get(idx..end) + } +} + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Branch { pub left: NR, pub right: NR, pub mask: BranchMask, - /// The word at the `(bit_idx / 32) - 1`. - /// Common to both children. - /// Will be 0 if this node is the root. - pub prior_word: u32, - /// The the segment of the hash key from the parent branch to `prior_word`. - /// Will be empty if the parent_branch.mask.bit_idx / 32 == self.mask.bit_idx / 32. - pub prefix: Box<[u32]>, + pub prefix: Prefix, } impl fmt::Debug for Branch { @@ -352,6 +401,7 @@ impl Branch { /// Hash a branch node with known child hashes. /// /// Caller must ensure that the hasher is reset before calling this function. + /// #[inline] pub fn hash_branch>( &self, @@ -361,6 +411,7 @@ impl Branch { ) -> NodeHash { hasher.portable_update(left); hasher.portable_update(right); + // Security: it's important to hash the metadata to avoid a potential trie corruption attack. hasher.portable_update(self.mask.bit_idx.to_le_bytes()); hasher.portable_update(self.mask.left_prefix.to_le_bytes()); hasher.portable_update(self.prior_word.to_le_bytes()); From b38d346aa8ab36bece002f34192a553a360fda34 Mon Sep 17 00:00:00 2001 From: Avi Dessauer Date: Wed, 17 Jul 2024 15:26:32 -0700 Subject: [PATCH 2/2] WIP: Try another approach, no luck yet --- src/stored.rs | 5 ++- src/stored/merkle.rs | 6 ++-- src/transaction.rs | 5 +-- src/transaction/nodes.rs | 68 ++++++++++++++++++++-------------------- 4 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/stored.rs b/src/stored.rs index 0ea1bd6..c557ec2 100644 --- a/src/stored.rs +++ b/src/stored.rs @@ -21,7 +21,10 @@ pub trait Store { hash_idx: Idx, ) -> Result; - fn get_node(&self, hash_idx: Idx) -> Result, &Leaf>, Self::Error>; + fn get_node<'s>( + &'s self, + hash_idx: Idx, + ) -> Result, &'s Leaf>, Self::Error>; } impl> Store for &S { diff --git a/src/stored/merkle.rs b/src/stored/merkle.rs index a052a0b..0633d1c 100644 --- a/src/stored/merkle.rs +++ b/src/stored/merkle.rs @@ -5,7 +5,7 @@ use bumpalo::Bump; use ouroboros::self_referencing; use crate::{ - transaction::nodes::{NodeRef, TrieRoot}, + transaction::nodes::{NodeRef, PrefixesBuffer, TrieRoot}, Branch, Leaf, PortableHash, PortableHasher, TrieError, }; @@ -20,12 +20,14 @@ type Result = core::result::Result; #[derive(Clone, Debug, PartialEq, Eq)] pub struct Snapshot { /// The last branch is the root of the trie if it exists. - branches: Box<[Branch]>, + branches: Box<[Branch<'static, Idx>]>, /// A Snapshot containing only leaves: Box<[Leaf]>, // we only store the hashes of the nodes that have not been visited. unvisited_nodes: Box<[NodeHash]>, + + prefixies_buffer: PrefixesBuffer, } impl Snapshot { diff --git a/src/transaction.rs b/src/transaction.rs index cb75290..97323df 100644 --- a/src/transaction.rs +++ b/src/transaction.rs @@ -22,7 +22,6 @@ use self::nodes::{ pub struct Transaction { pub data_store: S, current_root: TrieRoot>, - prefixes_buffer: PrefixesBuffer, } impl, V: Clone + PortableHash> Transaction, V> { @@ -44,7 +43,6 @@ impl, V: Clone + PortableHash> Transaction, V> Transaction { mask: new_branch.mask, prior_word: new_branch.prior_word, prefix: new_branch.prefix.clone(), + prefix, })); continue; @@ -549,6 +548,7 @@ impl Transaction, V> { Transaction { current_root: builder.trie_root(), data_store: builder, + prefixes_buffer: builder.prefixes_buffer, } } } @@ -582,6 +582,7 @@ impl Transaction, V> { Ok(Transaction { current_root: snapshot.trie_root()?, data_store: snapshot, + prefixes_buffer: snapshot.prefixes_buffer, }) } } diff --git a/src/transaction/nodes.rs b/src/transaction/nodes.rs index ee6b9fe..1611a1d 100644 --- a/src/transaction/nodes.rs +++ b/src/transaction/nodes.rs @@ -74,20 +74,20 @@ pub enum Node { /// When executing against a `SnapshotBuilder`, it's a reference to a `NodeHash`, /// which can in turn be used to retrieve the `Node`. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum NodeRef { - ModBranch(Box>), +pub enum NodeRef<'s, V> { + ModBranch(Box>), ModLeaf(Box>), Stored(stored::Idx), } -impl NodeRef { +impl NodeRef<'_, V> { #[inline(always)] pub fn temp_null_stored() -> Self { NodeRef::Stored(u32::MAX) } } -impl fmt::Debug for NodeRef { +impl fmt::Debug for NodeRef<'_, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::ModBranch(b) => f.debug_tuple("ModBranch").field(b).finish(), @@ -97,7 +97,7 @@ impl fmt::Debug for NodeRef { } } -impl From>>> for NodeRef { +impl<'s, V> From>>> for NodeRef<'s, V> { #[inline] fn from(branch: Box>>) -> Self { NodeRef::ModBranch(branch) @@ -263,7 +263,7 @@ mod tests { } } -pub struct Prefix { +pub struct PrefixBufferRef { /// This value will be 0 if the branch occurs in the first word of the hash key. /// The value is the prior word if the branches parent's word index no more than 1 less. /// If the parent's word index is more than 1 word prior, @@ -272,7 +272,7 @@ pub struct Prefix { prior_word_or_prefix_idx: u32, } -impl Prefix { +impl PrefixBufferRef { pub fn get_prefix<'s, 'txn: 's>( &'s self, prefixies: &'txn PrefixesBuffer, @@ -296,7 +296,7 @@ impl Prefix { } } -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct PrefixesBuffer { buffer: Vec, } @@ -318,21 +318,32 @@ impl PrefixesBuffer { } } +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum PrefixCow<'a> { + StartOfKey, + PriorWord(u32), + Segment(&'a [u32]), + SegmentOwned(Box<[u32]>), +} + +/// A branch node in the trie. +/// `NR` is the type of the node references. +/// `PR` is the type of reference to the prefix. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Branch { +pub struct Branch<'s, NR> { pub left: NR, pub right: NR, pub mask: BranchMask, - pub prefix: Prefix, + pub prefix: PrefixCow<'s>, } -impl fmt::Debug for Branch { +impl fmt::Debug for Branch<'_, NR> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Branch") .field("mask", &self.mask) - .field("prior_word", &self.prior_word) .field("prefix", &self.prefix) .finish() } @@ -359,7 +370,7 @@ pub enum KeyPositionAdjacent { PrefixVec(usize), } -impl Branch { +impl Branch> { /// Returns the position of the key relative to the branch. #[inline(always)] pub fn key_position(&self, key_hash: &KeyHash) -> KeyPosition { @@ -424,19 +435,7 @@ impl Branch { } } -impl Branch> { - pub(crate) fn from_stored(branch: &Branch) -> Branch> { - Branch { - left: NodeRef::Stored(branch.left), - right: NodeRef::Stored(branch.right), - mask: branch.mask, - prior_word: branch.prior_word, - // TODO remove the clone - // Maybe use a AsRef<[u32]> instead of Box<[u32]> - prefix: branch.prefix.clone(), - } - } - +impl Branch, PR> { /// A wrapper around `new_at_branch_ret` which returns nothing. /// This exists to aid compiler inlining. /// @@ -591,17 +590,19 @@ impl Branch> { debug_assert!(new_leaf.key_hash.0[..word_idx] == old_leaf.as_ref().key_hash.0[..word_idx]); - let prior_word_idx = word_idx.saturating_sub(1); - let prefix = new_leaf.key_hash.0[prefix_start_idx..prior_word_idx].into(); - let prior_word = if word_idx == 0 { - 0 - } else { + let prefix = if word_idx == 0 { + PrefixCow::StartOfKey + } else if prefix_start_idx == word_idx { + let prior_word_idx = word_idx - 1; debug_assert_eq!( new_leaf.key_hash.0[prior_word_idx], old_leaf.as_ref().key_hash.0[prior_word_idx] ); - - new_leaf.key_hash.0[prior_word_idx] + PrefixCow::PriorWord(new_leaf.key_hash.0[prior_word_idx]) + } else if prefix_start_idx == word_idx - 1 { + PrefixCow::PriorWord(new_leaf.key_hash.0[word_idx - 1]) + } else { + PrefixCow::Segment(&new_leaf.key_hash.0[prefix_start_idx..word_idx]) }; let mask = BranchMask::new(word_idx as u32, a, b); @@ -628,7 +629,6 @@ impl Branch> { left, right, mask, - prior_word, prefix, }), // TODO use an enum