Skip to content

Commit

Permalink
Fix stats() to correctly count multimap tables
Browse files Browse the repository at this point in the history
  • Loading branch information
cberner committed Sep 16, 2023
1 parent a6addc9 commit f05a59b
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 24 deletions.
183 changes: 178 additions & 5 deletions src/multimap_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,151 @@ use crate::multimap_table::DynamicCollectionType::{Inline, Subtree};
use crate::sealed::Sealed;
use crate::table::TableStats;
use crate::tree_store::{
AllPageNumbersBtreeIter, Btree, BtreeMut, BtreeRangeIter, CachePriority, Checksum,
LeafAccessor, LeafMutator, Page, PageHint, PageNumber, RawBtree, RawLeafBuilder,
TransactionalMemory, UntypedBtreeMut, BRANCH, LEAF, MAX_VALUE_LENGTH,
btree_stats, AllPageNumbersBtreeIter, BranchAccessor, Btree, BtreeMut, BtreeRangeIter,
BtreeStats, CachePriority, Checksum, LeafAccessor, LeafMutator, Page, PageHint, PageNumber,
RawBtree, RawLeafBuilder, TransactionalMemory, UntypedBtreeMut, BRANCH, LEAF, MAX_VALUE_LENGTH,
};
use crate::types::{RedbKey, RedbValue, TypeName};
use crate::{AccessGuard, Result, StorageError, WriteTransaction};
use std::borrow::Borrow;
use std::cmp::max;
use std::convert::TryInto;
use std::marker::PhantomData;
use std::mem;
use std::mem::size_of;
use std::ops::{RangeBounds, RangeFull};
use std::sync::{Arc, Mutex};

pub(crate) fn multimap_btree_stats(
root: Option<PageNumber>,
mem: &TransactionalMemory,
fixed_key_size: Option<usize>,
fixed_value_size: Option<usize>,
) -> Result<BtreeStats> {
if let Some(root) = root {
multimap_stats_helper(root, mem, fixed_key_size, fixed_value_size)
} else {
Ok(BtreeStats {
tree_height: 0,
leaf_pages: 0,
branch_pages: 0,
stored_leaf_bytes: 0,
metadata_bytes: 0,
fragmented_bytes: 0,
})
}
}

fn multimap_stats_helper(
page_number: PageNumber,
mem: &TransactionalMemory,
fixed_key_size: Option<usize>,
fixed_value_size: Option<usize>,
) -> Result<BtreeStats> {
let page = mem.get_page(page_number)?;
let node_mem = page.memory();
match node_mem[0] {
LEAF => {
let accessor = LeafAccessor::new(
page.memory(),
fixed_key_size,
DynamicCollection::<()>::fixed_width_with(fixed_value_size),
);
let mut leaf_bytes = 0u64;
let mut is_branch = false;
for i in 0..accessor.num_pairs() {
let entry = accessor.entry(i).unwrap();
let collection: &UntypedDynamicCollection =
UntypedDynamicCollection::new(entry.value());
match collection.collection_type() {
Inline => {
let inline_accessor = LeafAccessor::new(
collection.as_inline(),
fixed_value_size,
<() as RedbValue>::fixed_width(),
);
leaf_bytes +=
inline_accessor.length_of_pairs(0, inline_accessor.num_pairs()) as u64;
}
Subtree => {
is_branch = true;
}
}
}
let mut overhead_bytes = (accessor.total_length() as u64) - leaf_bytes;
let mut fragmented_bytes = (page.memory().len() - accessor.total_length()) as u64;
let mut max_child_height = 0;
let (mut leaf_pages, mut branch_pages) = if is_branch { (0, 1) } else { (1, 0) };

for i in 0..accessor.num_pairs() {
let entry = accessor.entry(i).unwrap();
let collection: &UntypedDynamicCollection =
UntypedDynamicCollection::new(entry.value());
match collection.collection_type() {
Inline => {
// data is inline, so it was already counted above
}
Subtree => {
// this is a sub-tree, so traverse it
let stats = btree_stats(
Some(collection.as_subtree().0),
mem,
fixed_value_size,
<() as RedbValue>::fixed_width(),
)?;
max_child_height = max(max_child_height, stats.tree_height);
branch_pages += stats.branch_pages;
leaf_pages += stats.leaf_pages;
fragmented_bytes += stats.fragmented_bytes;
overhead_bytes += stats.metadata_bytes;
leaf_bytes += stats.stored_leaf_bytes;
}
}
}

Ok(BtreeStats {
tree_height: max_child_height + 1,
leaf_pages,
branch_pages,
stored_leaf_bytes: leaf_bytes,
metadata_bytes: overhead_bytes,
fragmented_bytes,
})
}
BRANCH => {
let accessor = BranchAccessor::new(&page, fixed_key_size);
let mut max_child_height = 0;
let mut leaf_pages = 0;
let mut branch_pages = 1;
let mut stored_leaf_bytes = 0;
let mut metadata_bytes = accessor.total_length() as u64;
let mut fragmented_bytes = (page.memory().len() - accessor.total_length()) as u64;
for i in 0..accessor.count_children() {
if let Some(child) = accessor.child_page(i) {
let stats =
multimap_stats_helper(child, mem, fixed_key_size, fixed_value_size)?;
max_child_height = max(max_child_height, stats.tree_height);
leaf_pages += stats.leaf_pages;
branch_pages += stats.branch_pages;
stored_leaf_bytes += stats.stored_leaf_bytes;
metadata_bytes += stats.metadata_bytes;
fragmented_bytes += stats.fragmented_bytes;
}
}

Ok(BtreeStats {
tree_height: max_child_height + 1,
leaf_pages,
branch_pages,
stored_leaf_bytes,
metadata_bytes,
fragmented_bytes,
})
}
_ => unreachable!(),
}
}

// Verify all the checksums in the tree, including any Dynamic collection subtrees
pub(crate) fn verify_tree_and_subtree_checksums(
root: Option<(PageNumber, Checksum)>,
Expand Down Expand Up @@ -393,6 +524,38 @@ impl<V: RedbKey> DynamicCollection<V> {
}
}

#[repr(transparent)]
pub(crate) struct UntypedDynamicCollection {
data: [u8],
}

impl UntypedDynamicCollection {
fn new(data: &[u8]) -> &Self {
unsafe { mem::transmute(data) }
}

fn collection_type(&self) -> DynamicCollectionType {
DynamicCollectionType::from(self.data[0])
}

fn as_inline(&self) -> &[u8] {
debug_assert!(matches!(self.collection_type(), Inline));
&self.data[1..]
}

fn as_subtree(&self) -> (PageNumber, Checksum) {
debug_assert!(matches!(self.collection_type(), Subtree));
let offset = 1 + PageNumber::serialized_size();
let page_number = PageNumber::from_le_bytes(self.data[1..offset].try_into().unwrap());
let checksum = Checksum::from_le_bytes(
self.data[offset..(offset + size_of::<Checksum>())]
.try_into()
.unwrap(),
);
(page_number, checksum)
}
}

enum ValueIterState<'a, V: RedbKey + 'static> {
Subtree(BtreeRangeIter<'a, V, ()>),
InlineLeaf(LeafKeyIter<'a, V>),
Expand Down Expand Up @@ -924,7 +1087,12 @@ impl<'db, 'txn, K: RedbKey + 'static, V: RedbKey + 'static> ReadableMultimapTabl
}

fn stats(&self) -> Result<TableStats> {
let tree_stats = self.tree.stats()?;
let tree_stats = multimap_btree_stats(
self.tree.get_root().map(|(p, _)| p),
self.mem,
K::fixed_width(),
V::fixed_width(),
)?;

Ok(TableStats {
tree_height: tree_stats.tree_height,
Expand Down Expand Up @@ -1043,7 +1211,12 @@ impl<'txn, K: RedbKey + 'static, V: RedbKey + 'static> ReadableMultimapTable<K,
}

fn stats(&self) -> Result<TableStats> {
let tree_stats = self.tree.stats()?;
let tree_stats = multimap_btree_stats(
self.tree.get_root().map(|(p, _)| p),
self.mem,
K::fixed_width(),
V::fixed_width(),
)?;

Ok(TableStats {
tree_height: tree_stats.tree_height,
Expand Down
4 changes: 4 additions & 0 deletions src/tree_store/btree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,10 @@ impl<'a, K: RedbKey, V: RedbValue> Btree<'a, K, V> {
})
}

pub(crate) fn get_root(&self) -> Option<(PageNumber, Checksum)> {
self.root
}

pub(crate) fn get(&self, key: &K::SelfType<'_>) -> Result<Option<AccessGuard<'a, V>>> {
if let Some(ref root_page) = self.cached_root {
self.get_helper(root_page.clone(), K::as_bytes(key).as_ref())
Expand Down
9 changes: 5 additions & 4 deletions src/tree_store/btree_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1035,15 +1035,16 @@ impl<'a: 'b, 'b> LeafMutator<'a, 'b> {
}

// Provides a simple zero-copy way to access a branch page
pub(super) struct BranchAccessor<'a: 'b, 'b, T: Page + 'a> {
// TODO: this should be pub(super) and the multimap btree stuff should be moved into this package
pub(crate) struct BranchAccessor<'a: 'b, 'b, T: Page + 'a> {
page: &'b T,
num_keys: usize,
fixed_key_size: Option<usize>,
_page_lifetime: PhantomData<&'a ()>,
}

impl<'a: 'b, 'b, T: Page + 'a> BranchAccessor<'a, 'b, T> {
pub(super) fn new(page: &'b T, fixed_key_size: Option<usize>) -> Self {
pub(crate) fn new(page: &'b T, fixed_key_size: Option<usize>) -> Self {
debug_assert_eq!(page.memory()[0], BRANCH);
let num_keys = u16::from_le_bytes(page.memory()[2..4].try_into().unwrap()) as usize;
BranchAccessor {
Expand Down Expand Up @@ -1137,7 +1138,7 @@ impl<'a: 'b, 'b, T: Page + 'a> BranchAccessor<'a, 'b, T> {
Some(&self.page.memory()[offset..end])
}

pub(super) fn count_children(&self) -> usize {
pub(crate) fn count_children(&self) -> usize {
self.num_keys() + 1
}

Expand All @@ -1154,7 +1155,7 @@ impl<'a: 'b, 'b, T: Page + 'a> BranchAccessor<'a, 'b, T> {
))
}

pub(super) fn child_page(&self, n: usize) -> Option<PageNumber> {
pub(crate) fn child_page(&self, n: usize) -> Option<PageNumber> {
if n >= self.count_children() {
return None;
}
Expand Down
4 changes: 2 additions & 2 deletions src/tree_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ mod btree_mutator;
mod page_store;
mod table_tree;

pub(crate) use btree::{Btree, BtreeMut, RawBtree, UntypedBtreeMut};
pub(crate) use btree_base::Checksum;
pub(crate) use btree::{btree_stats, Btree, BtreeMut, BtreeStats, RawBtree, UntypedBtreeMut};
pub use btree_base::{AccessGuard, AccessGuardMut};
pub(crate) use btree_base::{BranchAccessor, Checksum};
pub(crate) use btree_base::{LeafAccessor, LeafMutator, RawLeafBuilder, BRANCH, LEAF};
pub(crate) use btree_iters::{
AllPageNumbersBtreeIter, BtreeDrain, BtreeDrainFilter, BtreeRangeIter,
Expand Down
44 changes: 31 additions & 13 deletions src/tree_store/table_tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::error::TableError;
use crate::multimap_table::{
finalize_tree_and_subtree_checksums, verify_tree_and_subtree_checksums,
finalize_tree_and_subtree_checksums, multimap_btree_stats, verify_tree_and_subtree_checksums,
};
use crate::tree_store::btree::{btree_stats, UntypedBtreeMut};
use crate::tree_store::btree_base::Checksum;
Expand Down Expand Up @@ -746,18 +746,36 @@ impl<'txn> TableTree<'txn> {
if let Some(updated_root) = self.pending_table_updates.get(entry.key()) {
definition.table_root = *updated_root;
}
let subtree_stats = btree_stats(
definition.table_root.map(|(p, _)| p),
self.mem,
definition.fixed_key_size,
definition.fixed_value_size,
)?;
max_subtree_height = max(max_subtree_height, subtree_stats.tree_height);
total_stored_bytes += subtree_stats.stored_leaf_bytes;
total_metadata_bytes += subtree_stats.metadata_bytes;
total_fragmented += subtree_stats.fragmented_bytes;
branch_pages += subtree_stats.branch_pages;
leaf_pages += subtree_stats.leaf_pages;
match definition.get_type() {
TableType::Normal => {
let subtree_stats = btree_stats(
definition.table_root.map(|(p, _)| p),
self.mem,
definition.fixed_key_size,
definition.fixed_value_size,
)?;
max_subtree_height = max(max_subtree_height, subtree_stats.tree_height);
total_stored_bytes += subtree_stats.stored_leaf_bytes;
total_metadata_bytes += subtree_stats.metadata_bytes;
total_fragmented += subtree_stats.fragmented_bytes;
branch_pages += subtree_stats.branch_pages;
leaf_pages += subtree_stats.leaf_pages;
}
TableType::Multimap => {
let subtree_stats = multimap_btree_stats(
definition.table_root.map(|(p, _)| p),
self.mem,
definition.fixed_key_size,
definition.fixed_value_size,
)?;
max_subtree_height = max(max_subtree_height, subtree_stats.tree_height);
total_stored_bytes += subtree_stats.stored_leaf_bytes;
total_metadata_bytes += subtree_stats.metadata_bytes;
total_fragmented += subtree_stats.fragmented_bytes;
branch_pages += subtree_stats.branch_pages;
leaf_pages += subtree_stats.leaf_pages;
}
}
}
Ok(DatabaseStats {
tree_height: master_tree_stats.tree_height + max_subtree_height,
Expand Down
23 changes: 23 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,29 @@ fn regression21() {
txn.commit().unwrap();
}

#[test]
fn multimap_stats() {
let tmpfile = create_tempfile();
let db = Database::builder().create(tmpfile.path()).unwrap();

let table_def: MultimapTableDefinition<u128, u128> = MultimapTableDefinition::new("x");

let mut last_size = 0;
for i in 0..1000 {
let mut txn = db.begin_write().unwrap();
txn.set_durability(Durability::None);
let mut table = txn.open_multimap_table(table_def).unwrap();
table.insert(0, i).unwrap();
drop(table);
txn.commit().unwrap();

let txn = db.begin_write().unwrap();
let bytes = txn.stats().unwrap().stored_bytes();
assert!(bytes > last_size, "{i}");
last_size = bytes;
}
}

#[test]
fn no_savepoint_resurrection() {
let tmpfile = create_tempfile();
Expand Down

0 comments on commit f05a59b

Please sign in to comment.