-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Node from stream backrefs optimisation #532
base: main
Are you sure you want to change the base?
Changes from all commits
438e2df
fdeef9f
bdeea87
a9b3351
47e41b8
5d38c06
99b0c1d
ddceeba
8060a47
d394e0b
f8f1e08
de2b748
3d35f09
5231929
791030b
675ab2c
982498f
3b86c79
423dd73
64340f1
bd93d2a
49e26e2
25cf07c
d322960
fcc634e
8b31b55
55085d0
e7bb047
9e96280
880d7f6
97f6f0f
0ce9f50
31e4fb4
b2946db
1ef2bc7
14d1de5
c469103
d3986f9
e7a8b5c
864ef16
d6d8cd4
0a00f77
85a9785
e8f3f2a
1a30940
1f790fe
e2fed6c
eb9f678
62c4a82
c6a4071
55cdb7f
437f493
08dd203
948712f
11ad810
90bb3de
bf09050
24abb5a
11fb7f6
a82e47f
e1d0b32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#![no_main] | ||
use clvmr::allocator::Allocator; | ||
use clvmr::serde::node_from_bytes_backrefs; | ||
use clvmr::serde::node_from_bytes_backrefs_old; | ||
use clvmr::serde::node_to_bytes_backrefs; | ||
use libfuzzer_sys::fuzz_target; | ||
|
||
// #[cfg(feature = "counters")] | ||
fuzz_target!(|data: &[u8]| { | ||
let mut allocator = Allocator::new(); | ||
let cp = allocator.checkpoint(); | ||
let program = match node_from_bytes_backrefs(&mut allocator, data) { | ||
Err(_) => { | ||
return; | ||
} | ||
Ok(r) => r, | ||
}; | ||
|
||
let b1 = node_to_bytes_backrefs(&allocator, program).unwrap(); | ||
|
||
// reset allocators | ||
allocator.restore_checkpoint(&cp); | ||
|
||
let _program = node_from_bytes_backrefs(&mut allocator, &b1).unwrap(); | ||
let new_pair_count = allocator.pair_count(); | ||
allocator.restore_checkpoint(&cp); | ||
let _program_old = node_from_bytes_backrefs_old(&mut allocator, &b1).unwrap(); | ||
assert!(new_pair_count <= allocator.pair_count()); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,10 @@ use std::collections::HashSet; | |
use std::io; | ||
use std::io::{Cursor, Read}; | ||
|
||
use crate::allocator::{Allocator, NodePtr, SExp}; | ||
use crate::traverse_path::traverse_path; | ||
|
||
use super::parse_atom::{parse_atom, parse_path}; | ||
use crate::allocator::{Allocator, NodePtr, SExp}; | ||
use crate::reduction::EvalErr; | ||
use crate::traverse_path::{first_non_zero, msb_mask, traverse_path}; | ||
|
||
const BACK_REFERENCE: u8 = 0xfe; | ||
const CONS_BOX_MARKER: u8 = 0xff; | ||
|
@@ -21,6 +21,47 @@ pub fn node_from_stream_backrefs( | |
allocator: &mut Allocator, | ||
f: &mut Cursor<&[u8]>, | ||
mut backref_callback: impl FnMut(NodePtr), | ||
) -> io::Result<NodePtr> { | ||
let mut values = Vec::<(NodePtr, Option<NodePtr>)>::new(); | ||
let mut ops = vec![ParseOp::SExp]; | ||
|
||
let mut b = [0; 1]; | ||
while let Some(op) = ops.pop() { | ||
match op { | ||
ParseOp::SExp => { | ||
f.read_exact(&mut b)?; | ||
if b[0] == CONS_BOX_MARKER { | ||
ops.push(ParseOp::Cons); | ||
ops.push(ParseOp::SExp); | ||
ops.push(ParseOp::SExp); | ||
} else if b[0] == BACK_REFERENCE { | ||
let path = parse_path(f)?; | ||
let back_reference = traverse_path_with_vec(allocator, path, &mut values)?; | ||
backref_callback(back_reference); | ||
values.push((back_reference, None)); | ||
} else { | ||
let new_atom = parse_atom(allocator, b[0], f)?; | ||
values.push((new_atom, None)); | ||
} | ||
} | ||
ParseOp::Cons => { | ||
// cons | ||
// pop left and right values off of the "values" stack, then | ||
// push the new pair onto it | ||
let right = values.pop().expect("No cons without two vals."); | ||
let left = values.pop().expect("No cons without two vals."); | ||
let root_node = allocator.new_pair(left.0, right.0)?; | ||
values.push((root_node, None)); | ||
} | ||
} | ||
} | ||
Ok(values.pop().expect("Top of the stack").0) | ||
} | ||
|
||
fn node_from_stream_backrefs_old( | ||
allocator: &mut Allocator, | ||
f: &mut Cursor<&[u8]>, | ||
mut backref_callback: impl FnMut(NodePtr), | ||
) -> io::Result<NodePtr> { | ||
let mut values = allocator.nil(); | ||
let mut ops = vec![ParseOp::SExp]; | ||
|
@@ -71,6 +112,11 @@ pub fn node_from_bytes_backrefs(allocator: &mut Allocator, b: &[u8]) -> io::Resu | |
node_from_stream_backrefs(allocator, &mut buffer, |_node| {}) | ||
} | ||
|
||
pub fn node_from_bytes_backrefs_old(allocator: &mut Allocator, b: &[u8]) -> io::Result<NodePtr> { | ||
let mut buffer = Cursor::new(b); | ||
node_from_stream_backrefs_old(allocator, &mut buffer, |_node| {}) | ||
} | ||
|
||
pub fn node_from_bytes_backrefs_record( | ||
allocator: &mut Allocator, | ||
b: &[u8], | ||
|
@@ -83,8 +129,95 @@ pub fn node_from_bytes_backrefs_record( | |
Ok((ret, backrefs)) | ||
} | ||
|
||
pub fn traverse_path_with_vec( | ||
allocator: &mut Allocator, | ||
node_index: &[u8], | ||
args: &mut [(NodePtr, Option<NodePtr>)], | ||
) -> io::Result<NodePtr> { | ||
// the vec is a stack so a ChiaLisp list of (3 . (2 . (1 . NIL))) would be [1, 2, 3] | ||
// however entries in this vec may be ChiaLisp SExps so it may look more like [1, (2 . NIL), 3] | ||
|
||
let mut parsing_sexp = args.is_empty(); | ||
|
||
// instead of popping, we treat this as a pointer to the end of the virtual stack | ||
let mut arg_index: usize = if parsing_sexp { 0 } else { args.len() - 1 }; | ||
|
||
// find first non-zero byte | ||
let first_bit_byte_index = first_non_zero(node_index); | ||
if first_bit_byte_index >= node_index.len() { | ||
return Ok(NodePtr::NIL); | ||
} | ||
|
||
// find first non-zero bit (the most significant bit is a sentinel) | ||
let last_bitmask = msb_mask(node_index[first_bit_byte_index]); | ||
|
||
// follow through the bits, moving left and right | ||
let mut byte_idx = node_index.len() - 1; | ||
let mut bitmask = 0x01; | ||
|
||
// if we move from parsing the Vec stack to parsing the SExp stack use the following variables | ||
let mut sexp_to_parse = NodePtr::NIL; | ||
|
||
while byte_idx > first_bit_byte_index || bitmask < last_bitmask { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you explore making this two loops, one for the |
||
let is_bit_set: bool = (node_index[byte_idx] & bitmask) != 0; | ||
if parsing_sexp { | ||
match allocator.sexp(sexp_to_parse) { | ||
SExp::Atom => { | ||
return Err(EvalErr( | ||
sexp_to_parse, | ||
"invalid backreference during deserialisation".into(), | ||
) | ||
.into()); | ||
} | ||
SExp::Pair(left, right) => { | ||
sexp_to_parse = if is_bit_set { right } else { left }; | ||
} | ||
} | ||
} else if is_bit_set { | ||
// we have traversed right ("rest"), so we keep processing the Vec | ||
// pop from the stack | ||
if arg_index == 0 { | ||
// if we have reached the end of the stack, we must start parsing as NIL | ||
parsing_sexp = true; | ||
} else { | ||
arg_index -= 1; | ||
} | ||
} else { | ||
// we have traversed left (i.e "first" rather than "rest") so we must process as SExp now | ||
parsing_sexp = true; | ||
sexp_to_parse = args[arg_index].0; | ||
} | ||
|
||
if bitmask == 0x80 { | ||
bitmask = 0x01; | ||
byte_idx -= 1; | ||
} else { | ||
bitmask <<= 1; | ||
} | ||
} | ||
if parsing_sexp { | ||
return Ok(sexp_to_parse); | ||
} else if args[arg_index].1.is_some() { | ||
// cached entry exists - use that instead of recreating the pairs | ||
return Ok(args[arg_index].1.unwrap()); | ||
} | ||
// take bottom of stack and make (item . NIL) | ||
let mut backref_node = allocator.new_pair(args[0].0, NodePtr::NIL)?; | ||
if arg_index == 0 { | ||
args[arg_index].1 = Some(backref_node); | ||
return Ok(backref_node); | ||
} | ||
// for the rest of items starting from last + 1 in stack | ||
for x in args.iter().take(arg_index + 1).skip(1) { | ||
backref_node = allocator.new_pair(x.0, backref_node)?; | ||
} | ||
args[arg_index].1 = Some(backref_node); | ||
Ok(backref_node) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
|
||
use super::*; | ||
use rstest::rstest; | ||
|
||
|
@@ -96,6 +229,12 @@ mod tests { | |
"ff86666f6f626172ff86666f6f62617280", | ||
"9148834131750904c023598bed28db269bdb29012514579e723d63e27829bcba" | ||
)] | ||
// () | ||
// stackpointer to 0 | ||
#[case( | ||
"fffe0100", | ||
"8b7bb45fbbdd29c84edcab98274c6e084f19af64d24c65a475562dbbfee67735" | ||
)] | ||
// ("foobar" "foobar") | ||
#[case( | ||
"ff86666f6f626172fe01", | ||
|
@@ -126,11 +265,69 @@ mod tests { | |
let buf = Vec::from_hex(serialization_as_hex).unwrap(); | ||
let mut allocator = Allocator::new(); | ||
let node = node_from_bytes_backrefs(&mut allocator, &buf).unwrap(); | ||
let old_node = node_from_bytes_backrefs_old(&mut allocator, &buf).unwrap(); | ||
let mut oc = ObjectCache::new(treehash); | ||
let calculated_hash = oc.get_or_calculate(&allocator, &node, None).unwrap(); | ||
let ch: &[u8] = calculated_hash; | ||
let expected_hash: Vec<u8> = Vec::from_hex(expected_hash_as_hex).unwrap(); | ||
assert_eq!(expected_hash, ch); | ||
let calculated_hash = oc.get_or_calculate(&allocator, &old_node, None).unwrap(); | ||
let ch: &[u8] = calculated_hash; | ||
assert_eq!(expected_hash, ch); | ||
} | ||
|
||
#[test] | ||
fn test_traverse_path_with_vec() { | ||
use crate::allocator::Allocator; | ||
|
||
let mut a = Allocator::new(); | ||
let nul = a.nil(); | ||
let n1 = a.new_atom(&[0, 1, 2]).unwrap(); | ||
let n2 = a.new_atom(&[4, 5, 6]).unwrap(); | ||
|
||
let mut list: Vec<(NodePtr, Option<NodePtr>)> = vec![(n1, None)]; | ||
|
||
assert_eq!(traverse_path_with_vec(&mut a, &[], &mut list).unwrap(), nul); | ||
|
||
// cost for leading zeros | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0], &mut list).unwrap(), | ||
nul | ||
); | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0, 0], &mut list).unwrap(), | ||
nul | ||
); | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0, 0, 0], &mut list).unwrap(), | ||
nul | ||
); | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0, 0, 0, 0], &mut list).unwrap(), | ||
nul | ||
); | ||
|
||
let mut list: Vec<(NodePtr, Option<NodePtr>)> = vec![(n1, None), (n2, None)]; | ||
|
||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0b10], &mut list).unwrap(), | ||
n2 | ||
); | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0b101], &mut list).unwrap(), | ||
n1 | ||
); | ||
assert_eq!( | ||
traverse_path_with_vec(&mut a, &[0b111], &mut list).unwrap(), | ||
nul | ||
); | ||
|
||
// errors | ||
assert!(traverse_path_with_vec(&mut a, &[0b1011], &mut list).is_err()); | ||
assert!(traverse_path_with_vec(&mut a, &[0b1101], &mut list).is_err()); | ||
assert!(traverse_path_with_vec(&mut a, &[0b1001], &mut list).is_err()); | ||
assert!(traverse_path_with_vec(&mut a, &[0b1010], &mut list).is_err()); | ||
assert!(traverse_path_with_vec(&mut a, &[0b1110], &mut list).is_err()); | ||
} | ||
|
||
#[rstest] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a pretty big function for not having a unit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a unit test