Chia-Network · matt-o-how · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/benches/deserialize.rs b/benches/deserialize.rs
@@ -1,7 +1,8 @@
 use clvmr::allocator::Allocator;
 use clvmr::serde::{
-    node_from_bytes, node_from_bytes_backrefs, node_to_bytes_backrefs,
-    serialized_length_from_bytes, serialized_length_from_bytes_trusted, tree_hash_from_stream,
+    node_from_bytes, node_from_bytes_backrefs, node_from_bytes_backrefs_old,
+    node_to_bytes_backrefs, serialized_length_from_bytes, serialized_length_from_bytes_trusted,
+    tree_hash_from_stream,
 };
 use criterion::{criterion_group, criterion_main, Criterion};
 use std::include_bytes;
@@ -64,6 +65,15 @@ fn deserialize_benchmark(c: &mut Criterion) {
                 start.elapsed()
             })
         });
+
+        group.bench_function(format!("node_from_bytes_backrefs_old{name_suffix}"), |b| {
+            b.iter(|| {
+                a.restore_checkpoint(&iter_checkpoint);
+                let start = Instant::now();
+                node_from_bytes_backrefs_old(&mut a, bl).expect("node_from_bytes_backrefs_old");
+                start.elapsed()
+            })
+        });
     }
 
     let mut a = Allocator::new();

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -10,7 +10,7 @@ cargo-fuzz = true
 
 [dependencies]
 libfuzzer-sys = { workspace = true }
-clvmr = { workspace = true }
+clvmr = { workspace = true, features = ["counters"] }
 chia-sha2 = { workspace = true }
 hex = { workspace = true }
 arbitrary = { workspace = true }
@@ -57,6 +57,12 @@ path = "fuzz_targets/deserialize_br_rand_tree.rs"
 test = false
 doc = false
 
+[[bin]]
+name = "fuzz_deserialize_br_test_pair_count"
+path = "fuzz_targets/deserialize_br_test_pair_count.rs"
+test = false
+doc = false
+
 [[bin]]
 name = "fuzz_parse_triples"
 path = "fuzz_targets/parse_triples.rs"

diff --git a/fuzz/fuzz_targets/deserialize_br.rs b/fuzz/fuzz_targets/deserialize_br.rs
@@ -1,6 +1,7 @@
 #![no_main]
 use clvmr::allocator::Allocator;
 use clvmr::serde::node_from_bytes_backrefs;
+use clvmr::serde::node_from_bytes_backrefs_old;
 use clvmr::serde::node_to_bytes_backrefs;
 use libfuzzer_sys::fuzz_target;
 
@@ -15,9 +16,37 @@ fuzz_target!(|data: &[u8]| {
 
     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
 
-    let mut allocator = Allocator::new();
+    // let cp = allocator.checkpoint();
     let program = node_from_bytes_backrefs(&mut allocator, &b1).unwrap();
-
+    // let new_pair_count = allocator.pair_count();
+    // allocator.restore_checkpoint(&cp);
+    let program_old = node_from_bytes_backrefs_old(&mut allocator, &b1).unwrap();
+    // assert!(new_pair_count <= allocator.pair_count());
+    let b3 = node_to_bytes_backrefs(&allocator, program_old).unwrap();
+    assert_eq!(b1, b3);
     let b2 = node_to_bytes_backrefs(&allocator, program).unwrap();
     assert_eq!(b1, b2);
 });
+
+// // #[cfg(feature = "counters")]
+// fuzz_target!(|data: &[u8]| {
+//     let mut allocator = Allocator::new();
+//     let cp = allocator.checkpoint();
+//     let program = match node_from_bytes_backrefs(&mut allocator, data) {
+//         Err(_) => {
+//             return;
+//         }
+//         Ok(r) => r,
+//     };
+
+//     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
+
+//     // reset allocators
+//     allocator.restore_checkpoint(&cp);
+
+//     let _program = node_from_bytes_backrefs(&mut allocator, &b1).unwrap();
+//     let new_pair_count = allocator.pair_count();
+//     allocator.restore_checkpoint(&cp);
+//     let _program_old = node_from_bytes_backrefs_old(&mut allocator, &b1).unwrap();
+//     assert!(new_pair_count <= allocator.pair_count());
+// });
diff --git a/fuzz/fuzz_targets/deserialize_br_test_pair_count.rs b/fuzz/fuzz_targets/deserialize_br_test_pair_count.rs
@@ -0,0 +1,29 @@
+#![no_main]
+use clvmr::allocator::Allocator;
+use clvmr::serde::node_from_bytes_backrefs;
+use clvmr::serde::node_from_bytes_backrefs_old;
+use clvmr::serde::node_to_bytes_backrefs;
+use libfuzzer_sys::fuzz_target;
+
+// #[cfg(feature = "counters")]
+fuzz_target!(|data: &[u8]| {
+    let mut allocator = Allocator::new();
+    let cp = allocator.checkpoint();
+    let program = match node_from_bytes_backrefs(&mut allocator, data) {
+        Err(_) => {
+            return;
+        }
+        Ok(r) => r,
+    };
+
+    let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
+
+    // reset allocators
+    allocator.restore_checkpoint(&cp);
+
+    let _program = node_from_bytes_backrefs(&mut allocator, &b1).unwrap();
+    let new_pair_count = allocator.pair_count();
+    allocator.restore_checkpoint(&cp);
+    let _program_old = node_from_bytes_backrefs_old(&mut allocator, &b1).unwrap();
+    assert!(new_pair_count <= allocator.pair_count());
+});
diff --git a/src/serde/de_br.rs b/src/serde/de_br.rs
@@ -2,10 +2,10 @@ use std::collections::HashSet;
 use std::io;
 use std::io::{Cursor, Read};
 
-use crate::allocator::{Allocator, NodePtr, SExp};
-use crate::traverse_path::traverse_path;
-
 use super::parse_atom::{parse_atom, parse_path};
+use crate::allocator::{Allocator, NodePtr, SExp};
+use crate::reduction::EvalErr;
+use crate::traverse_path::{first_non_zero, msb_mask, traverse_path};
 
 const BACK_REFERENCE: u8 = 0xfe;
 const CONS_BOX_MARKER: u8 = 0xff;
@@ -21,6 +21,47 @@ pub fn node_from_stream_backrefs(
     allocator: &mut Allocator,
     f: &mut Cursor<&[u8]>,
     mut backref_callback: impl FnMut(NodePtr),
+) -> io::Result<NodePtr> {
+    let mut values = Vec::<(NodePtr, Option<NodePtr>)>::new();
+    let mut ops = vec![ParseOp::SExp];
+
+    let mut b = [0; 1];
+    while let Some(op) = ops.pop() {
+        match op {
+            ParseOp::SExp => {
+                f.read_exact(&mut b)?;
+                if b[0] == CONS_BOX_MARKER {
+                    ops.push(ParseOp::Cons);
+                    ops.push(ParseOp::SExp);
+                    ops.push(ParseOp::SExp);
+                } else if b[0] == BACK_REFERENCE {
+                    let path = parse_path(f)?;
+                    let back_reference = traverse_path_with_vec(allocator, path, &mut values)?;
+                    backref_callback(back_reference);
+                    values.push((back_reference, None));
+                } else {
+                    let new_atom = parse_atom(allocator, b[0], f)?;
+                    values.push((new_atom, None));
+                }
+            }
+            ParseOp::Cons => {
+                // cons
+                // pop left and right values off of the "values" stack, then
+                // push the new pair onto it
+                let right = values.pop().expect("No cons without two vals.");
+                let left = values.pop().expect("No cons without two vals.");
+                let root_node = allocator.new_pair(left.0, right.0)?;
+                values.push((root_node, None));
+            }
+        }
+    }
+    Ok(values.pop().expect("Top of the stack").0)
+}
+
+fn node_from_stream_backrefs_old(
+    allocator: &mut Allocator,
+    f: &mut Cursor<&[u8]>,
+    mut backref_callback: impl FnMut(NodePtr),
 ) -> io::Result<NodePtr> {
     let mut values = allocator.nil();
     let mut ops = vec![ParseOp::SExp];
@@ -71,6 +112,11 @@ pub fn node_from_bytes_backrefs(allocator: &mut Allocator, b: &[u8]) -> io::Resu
     node_from_stream_backrefs(allocator, &mut buffer, |_node| {})
 }
 
+pub fn node_from_bytes_backrefs_old(allocator: &mut Allocator, b: &[u8]) -> io::Result<NodePtr> {
+    let mut buffer = Cursor::new(b);
+    node_from_stream_backrefs_old(allocator, &mut buffer, |_node| {})
+}
+
 pub fn node_from_bytes_backrefs_record(
     allocator: &mut Allocator,
     b: &[u8],
@@ -83,8 +129,95 @@ pub fn node_from_bytes_backrefs_record(
     Ok((ret, backrefs))
 }
 
+pub fn traverse_path_with_vec(
+    allocator: &mut Allocator,
+    node_index: &[u8],
+    args: &mut [(NodePtr, Option<NodePtr>)],
+) -> io::Result<NodePtr> {
+    // the vec is a stack so a ChiaLisp list of (3 . (2 . (1 . NIL))) would be [1, 2, 3]
+    // however entries in this vec may be ChiaLisp SExps so it may look more like [1, (2 . NIL), 3]
+
+    let mut parsing_sexp = args.is_empty();
+
+    // instead of popping, we treat this as a pointer to the end of the virtual stack
+    let mut arg_index: usize = if parsing_sexp { 0 } else { args.len() - 1 };
+
+    // find first non-zero byte
+    let first_bit_byte_index = first_non_zero(node_index);
+    if first_bit_byte_index >= node_index.len() {
+        return Ok(NodePtr::NIL);
+    }
+
+    // find first non-zero bit (the most significant bit is a sentinel)
+    let last_bitmask = msb_mask(node_index[first_bit_byte_index]);
+
+    // follow through the bits, moving left and right
+    let mut byte_idx = node_index.len() - 1;
+    let mut bitmask = 0x01;
+
+    // if we move from parsing the Vec stack to parsing the SExp stack use the following variables
+    let mut sexp_to_parse = NodePtr::NIL;
+
+    while byte_idx > first_bit_byte_index || bitmask < last_bitmask {
+        let is_bit_set: bool = (node_index[byte_idx] & bitmask) != 0;
+        if parsing_sexp {
+            match allocator.sexp(sexp_to_parse) {
+                SExp::Atom => {
+                    return Err(EvalErr(
+                        sexp_to_parse,
+                        "invalid backreference during deserialisation".into(),
+                    )
+                    .into());
+                }
+                SExp::Pair(left, right) => {
+                    sexp_to_parse = if is_bit_set { right } else { left };
+                }
+            }
+        } else if is_bit_set {
+            // we have traversed right ("rest"), so we keep processing the Vec
+            // pop from the stack
+            if arg_index == 0 {
+                // if we have reached the end of the stack, we must start parsing as NIL
+                parsing_sexp = true;
+            } else {
+                arg_index -= 1;
+            }
+        } else {
+            // we have traversed left (i.e "first" rather than "rest") so we must process as SExp now
+            parsing_sexp = true;
+            sexp_to_parse = args[arg_index].0;
+        }
+
+        if bitmask == 0x80 {
+            bitmask = 0x01;
+            byte_idx -= 1;
+        } else {
+            bitmask <<= 1;
+        }
+    }
+    if parsing_sexp {
+        return Ok(sexp_to_parse);
+    } else if args[arg_index].1.is_some() {
+        // cached entry exists - use that instead of recreating the pairs
+        return Ok(args[arg_index].1.unwrap());
+    }
+    // take bottom of stack and make (item . NIL)
+    let mut backref_node = allocator.new_pair(args[0].0, NodePtr::NIL)?;
+    if arg_index == 0 {
+        args[arg_index].1 = Some(backref_node);
+        return Ok(backref_node);
+    }
+    // for the rest of items starting from last + 1 in stack
+    for x in args.iter().take(arg_index + 1).skip(1) {
+        backref_node = allocator.new_pair(x.0, backref_node)?;
+    }
+    args[arg_index].1 = Some(backref_node);
+    Ok(backref_node)
+}
+
 #[cfg(test)]
 mod tests {
+
     use super::*;
     use rstest::rstest;
 
@@ -96,6 +229,12 @@ mod tests {
         "ff86666f6f626172ff86666f6f62617280",
         "9148834131750904c023598bed28db269bdb29012514579e723d63e27829bcba"
     )]
+    // ()
+    // stackpointer to 0
+    #[case(
+        "fffe0100",
+        "8b7bb45fbbdd29c84edcab98274c6e084f19af64d24c65a475562dbbfee67735"
+    )]
     // ("foobar" "foobar")
     #[case(
         "ff86666f6f626172fe01",
@@ -126,11 +265,69 @@ mod tests {
         let buf = Vec::from_hex(serialization_as_hex).unwrap();
         let mut allocator = Allocator::new();
         let node = node_from_bytes_backrefs(&mut allocator, &buf).unwrap();
+        let old_node = node_from_bytes_backrefs_old(&mut allocator, &buf).unwrap();
         let mut oc = ObjectCache::new(treehash);
         let calculated_hash = oc.get_or_calculate(&allocator, &node, None).unwrap();
         let ch: &[u8] = calculated_hash;
         let expected_hash: Vec<u8> = Vec::from_hex(expected_hash_as_hex).unwrap();
         assert_eq!(expected_hash, ch);
+        let calculated_hash = oc.get_or_calculate(&allocator, &old_node, None).unwrap();
+        let ch: &[u8] = calculated_hash;
+        assert_eq!(expected_hash, ch);
+    }
+
+    #[test]
+    fn test_traverse_path_with_vec() {
+        use crate::allocator::Allocator;
+
+        let mut a = Allocator::new();
+        let nul = a.nil();
+        let n1 = a.new_atom(&[0, 1, 2]).unwrap();
+        let n2 = a.new_atom(&[4, 5, 6]).unwrap();
+
+        let mut list: Vec<(NodePtr, Option<NodePtr>)> = vec![(n1, None)];
+
+        assert_eq!(traverse_path_with_vec(&mut a, &[], &mut list).unwrap(), nul);
+
+        // cost for leading zeros
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0], &mut list).unwrap(),
+            nul
+        );
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0, 0], &mut list).unwrap(),
+            nul
+        );
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0, 0, 0], &mut list).unwrap(),
+            nul
+        );
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0, 0, 0, 0], &mut list).unwrap(),
+            nul
+        );
+
+        let mut list: Vec<(NodePtr, Option<NodePtr>)> = vec![(n1, None), (n2, None)];
+
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0b10], &mut list).unwrap(),
+            n2
+        );
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0b101], &mut list).unwrap(),
+            n1
+        );
+        assert_eq!(
+            traverse_path_with_vec(&mut a, &[0b111], &mut list).unwrap(),
+            nul
+        );
+
+        // errors
+        assert!(traverse_path_with_vec(&mut a, &[0b1011], &mut list).is_err());
+        assert!(traverse_path_with_vec(&mut a, &[0b1101], &mut list).is_err());
+        assert!(traverse_path_with_vec(&mut a, &[0b1001], &mut list).is_err());
+        assert!(traverse_path_with_vec(&mut a, &[0b1010], &mut list).is_err());
+        assert!(traverse_path_with_vec(&mut a, &[0b1110], &mut list).is_err());
     }
 
     #[rstest]

diff --git a/src/serde/mod.rs b/src/serde/mod.rs
@@ -19,7 +19,9 @@ pub mod write_atom;
 mod test;
 
 pub use de::node_from_bytes;
-pub use de_br::{node_from_bytes_backrefs, node_from_bytes_backrefs_record};
+pub use de_br::{
+    node_from_bytes_backrefs, node_from_bytes_backrefs_old, node_from_bytes_backrefs_record,
+};
 pub use de_tree::{parse_triples, ParsedTriple};
 pub use identity_hash::RandomState;
 pub use incremental::{Serializer, UndoState};