Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major performance improvement to compact() #858

Merged
merged 4 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benches/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ impl<'a> BenchDatabase for HeedBenchDatabase<'a> {
type R<'db> = HeedBenchReadTransaction<'db> where Self: 'db;

fn db_type_name() -> &'static str {
"heed"
"lmdb"
}

fn write_transaction(&self) -> Self::W<'_> {
Expand Down
8 changes: 4 additions & 4 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,8 @@ impl Database {
// Chain all the other tables to the master table iter
for entry in iter {
let definition = entry?.value();
definition.visit_all_pages(mem.clone(), |page_number| {
assert!(mem.is_allocated(page_number));
definition.visit_all_pages(mem.clone(), |path| {
assert!(mem.is_allocated(path.page_number()));
Ok(())
})?;
}
Expand All @@ -594,9 +594,9 @@ impl Database {
// Chain all the other tables to the master table iter
for entry in iter {
let definition = entry?.value();
definition.visit_all_pages(mem.clone(), |page_number| {
definition.visit_all_pages(mem.clone(), |path| {
// TODO: simplify mark_pages_allocated()
mem.mark_pages_allocated([Ok(page_number)].into_iter(), allow_duplicates)?;
mem.mark_pages_allocated([Ok(path.page_number())].into_iter(), allow_duplicates)?;
Ok(())
})?;
}
Expand Down
111 changes: 83 additions & 28 deletions src/multimap_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ use crate::table::{ReadableTableMetadata, TableStats};
use crate::tree_store::{
btree_stats, AllPageNumbersBtreeIter, BranchAccessor, BranchMutator, Btree, BtreeHeader,
BtreeMut, BtreeRangeIter, BtreeStats, CachePriority, Checksum, LeafAccessor, LeafMutator, Page,
PageHint, PageNumber, RawBtree, RawLeafBuilder, TransactionalMemory, UntypedBtreeMut, BRANCH,
DEFERRED, LEAF, MAX_PAIR_LENGTH, MAX_VALUE_LENGTH,
PageHint, PageNumber, PagePath, RawBtree, RawLeafBuilder, TransactionalMemory, UntypedBtree,
UntypedBtreeMut, BRANCH, DEFERRED, LEAF, MAX_PAIR_LENGTH, MAX_VALUE_LENGTH,
};
use crate::types::{Key, TypeName, Value};
use crate::{AccessGuard, MultimapTableHandle, Result, StorageError, WriteTransaction};
use std::borrow::Borrow;
use std::cmp::max;
use std::collections::HashMap;
use std::convert::TryInto;
use std::marker::PhantomData;
use std::mem;
Expand Down Expand Up @@ -197,18 +198,17 @@ pub(crate) fn relocate_subtrees(
value_size: Option<usize>,
mem: Arc<TransactionalMemory>,
freed_pages: Arc<Mutex<Vec<PageNumber>>>,
relocation_map: &HashMap<PageNumber, PageNumber>,
) -> Result<(PageNumber, Checksum)> {
let old_page = mem.get_page(root.0)?;
let mut new_page = mem.allocate_lowest(
old_page.memory().len(),
CachePriority::default_btree(old_page.memory()),
)?;

let mut new_page = if let Some(new_page_number) = relocation_map.get(&root.0) {
mem.get_page_mut(*new_page_number)?
} else {
return Ok(root);
};
let new_page_number = new_page.get_page_number();
new_page.memory_mut().copy_from_slice(old_page.memory());

let mut changed = false;

match old_page.memory()[0] {
LEAF => {
let accessor = LeafAccessor::new(
Expand All @@ -234,11 +234,11 @@ pub(crate) fn relocate_subtrees(
value_size,
<() as Value>::fixed_width(),
);
if tree.relocate()? {
tree.relocate(relocation_map)?;
if sub_root != tree.get_root().unwrap() {
let new_collection =
UntypedDynamicCollection::make_subtree_data(tree.get_root().unwrap());
mutator.insert(i, true, entry.key(), &new_collection);
changed = true;
}
}
}
Expand All @@ -255,29 +255,21 @@ pub(crate) fn relocate_subtrees(
value_size,
mem.clone(),
freed_pages.clone(),
relocation_map,
)?;
mutator.write_child_page(i, new_child, new_checksum);
if new_child != child {
changed = true;
}
}
}
}
_ => unreachable!(),
}

if changed || new_page_number.is_before(old_page.get_page_number()) {
let old_page_number = old_page.get_page_number();
drop(old_page);
if !mem.free_if_uncommitted(old_page_number) {
freed_pages.lock().unwrap().push(old_page_number);
}
Ok((new_page_number, DEFERRED))
} else {
drop(new_page);
mem.free(new_page_number);
Ok(root)
let old_page_number = old_page.get_page_number();
drop(old_page);
if !mem.free_if_uncommitted(old_page_number) {
freed_pages.lock().unwrap().push(old_page_number);
}
Ok((new_page_number, DEFERRED))
}

// Finalize all the checksums in the tree, including any Dynamic collection subtrees
Expand Down Expand Up @@ -342,7 +334,7 @@ pub(crate) fn finalize_tree_and_subtree_checksums(
Ok(tree.get_root())
}

pub(crate) fn parse_subtree_roots<T: Page>(
fn parse_subtree_roots<T: Page>(
page: &T,
fixed_key_size: Option<usize>,
fixed_value_size: Option<usize>,
Expand Down Expand Up @@ -372,6 +364,69 @@ pub(crate) fn parse_subtree_roots<T: Page>(
}
}

pub(crate) struct UntypedMultiBtree {
mem: Arc<TransactionalMemory>,
root: Option<BtreeHeader>,
key_width: Option<usize>,
value_width: Option<usize>,
}

impl UntypedMultiBtree {
pub(crate) fn new(
root: Option<BtreeHeader>,
mem: Arc<TransactionalMemory>,
key_width: Option<usize>,
value_width: Option<usize>,
) -> Self {
Self {
mem,
root,
key_width,
value_width,
}
}

// Applies visitor to pages in the tree
pub(crate) fn visit_all_pages<F>(&self, mut visitor: F) -> Result
where
F: FnMut(&PagePath) -> Result,
{
let tree = UntypedBtree::new(
self.root,
self.mem.clone(),
self.key_width,
UntypedDynamicCollection::fixed_width_with(self.value_width),
);
tree.visit_all_pages(|path| {
visitor(path)?;
let page = self.mem.get_page(path.page_number())?;
match page.memory()[0] {
LEAF => {
for header in parse_subtree_roots(&page, self.key_width, self.value_width) {
let subtree = UntypedBtree::new(
Some(header),
self.mem.clone(),
self.value_width,
<() as Value>::fixed_width(),
);
subtree.visit_all_pages(|subpath| {
let full_path = path.with_subpath(subpath);
visitor(&full_path)
})?;
}
}
BRANCH => {
// No-op. The tree.visit_pages() call will process this sub-tree
}
_ => unreachable!(),
}
Ok(())
})?;

Ok(())
}
}

pub(crate) struct LeafKeyIter<'a, V: Key + 'static> {
inline_collection: AccessGuard<'a, &'static DynamicCollection<V>>,
fixed_key_size: Option<usize>,
Expand Down Expand Up @@ -474,7 +529,7 @@ impl Into<u8> for DynamicCollectionType {
/// See [Exotically Sized Types](https://doc.rust-lang.org/nomicon/exotic-sizes.html#dynamically-sized-types-dsts)
/// section of the Rustonomicon for more details.
#[repr(transparent)]
pub(crate) struct DynamicCollection<V: Key> {
struct DynamicCollection<V: Key> {
_value_type: PhantomData<V>,
data: [u8],
}
Expand Down Expand Up @@ -637,7 +692,7 @@ impl<V: Key> DynamicCollection<V> {
}

#[repr(transparent)]
pub(crate) struct UntypedDynamicCollection {
struct UntypedDynamicCollection {
data: [u8],
}

Expand Down
64 changes: 55 additions & 9 deletions src/transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use crate::sealed::Sealed;
use crate::table::ReadOnlyUntypedTable;
use crate::transaction_tracker::{SavepointId, TransactionId, TransactionTracker};
use crate::tree_store::{
Btree, BtreeHeader, BtreeMut, FreedPageList, FreedTableKey, InternalTableDefinition, PageHint,
PageNumber, SerializedSavepoint, TableTree, TableTreeMut, TableType, TransactionalMemory,
MAX_PAIR_LENGTH, MAX_VALUE_LENGTH,
Btree, BtreeHeader, BtreeMut, CachePriority, FreedPageList, FreedTableKey,
InternalTableDefinition, Page, PageHint, PageNumber, SerializedSavepoint, TableTree,
TableTreeMut, TableType, TransactionalMemory, MAX_PAIR_LENGTH, MAX_VALUE_LENGTH,
};
use crate::types::{Key, Value};
use crate::{
Expand All @@ -20,7 +20,7 @@ use crate::{
use log::{debug, warn};
use std::borrow::Borrow;
use std::cmp::min;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::{Debug, Display, Formatter};
use std::marker::PhantomData;
use std::ops::RangeBounds;
Expand All @@ -30,6 +30,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::{panic, thread};

const MAX_PAGES_PER_COMPACTION: usize = 1_000_000;
const NEXT_SAVEPOINT_TABLE: SystemTableDefinition<(), SavepointId> =
SystemTableDefinition::new("next_savepoint_id");
pub(crate) const SAVEPOINT_TABLE: SystemTableDefinition<SavepointId, SerializedSavepoint> =
Expand Down Expand Up @@ -1160,18 +1161,63 @@ impl WriteTransaction {
progress = true;
}

// Relocate the btree pages
// Find the 1M highest pages
let mut highest_pages = BTreeMap::new();
let mut tables = self.tables.lock().unwrap();
let table_tree = &mut tables.table_tree;
if table_tree.compact_tables()? {
progress = true;
}
table_tree.highest_index_pages(MAX_PAGES_PER_COMPACTION, &mut highest_pages)?;
let mut system_tables = self.system_tables.lock().unwrap();
let system_table_tree = &mut system_tables.table_tree;
if system_table_tree.compact_tables()? {
system_table_tree.highest_index_pages(MAX_PAGES_PER_COMPACTION, &mut highest_pages)?;

// Calculate how many of them can be relocated to lower pages, starting from the last page
let mut relocation_map = HashMap::new();
for path in highest_pages.into_values().rev() {
if relocation_map.contains_key(&path.page_number()) {
continue;
}
let old_page = self.mem.get_page(path.page_number())?;
let mut new_page = self.mem.allocate_lowest(
old_page.memory().len(),
CachePriority::default_btree(old_page.memory()),
)?;
let new_page_number = new_page.get_page_number();
// We have to copy at least the page type into the new page.
// Otherwise its cache priority will be calculated incorrectly
new_page.memory_mut()[0] = old_page.memory()[0];
drop(new_page);
// We're able to move this to a lower page, so insert it and rewrite all its parents
if new_page_number < path.page_number() {
relocation_map.insert(path.page_number(), new_page_number);
for parent in path.parents() {
if relocation_map.contains_key(parent) {
continue;
}
let old_parent = self.mem.get_page(*parent)?;
let mut new_page = self.mem.allocate_lowest(
old_parent.memory().len(),
CachePriority::default_btree(old_parent.memory()),
)?;
let new_page_number = new_page.get_page_number();
// We have to copy at least the page type into the new page.
// Otherwise its cache priority will be calculated incorrectly
new_page.memory_mut()[0] = old_parent.memory()[0];
drop(new_page);
relocation_map.insert(*parent, new_page_number);
}
} else {
self.mem.free(new_page_number);
break;
}
}

if !relocation_map.is_empty() {
progress = true;
}

table_tree.relocate_tables(&relocation_map)?;
system_table_tree.relocate_tables(&relocation_map)?;

Ok(progress)
}

Expand Down
Loading
Loading