Skip to content

Commit

Permalink
Added a skeleton for the borrow tracking addition to serialized
Browse files Browse the repository at this point in the history
  • Loading branch information
OliverKillane committed Jun 24, 2024
1 parent 951c066 commit ade54f7
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 0 deletions.
103 changes: 103 additions & 0 deletions crates/emdb_core/src/backend/serialized/borrow_tracker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//! Track borrows over a dataflow graph, with some ordering.
//! - Needs to be accessed in a dataflow ordering over the graph.
//! - Produces the borrows and collects required.
//!
//! By removing intermediate collections, we can improve performance for some operator implementations:
//! - Lazy implementations do not need to pull.
use std::collections::{HashMap, HashSet};

use crate::plan::{self, Collect};

enum BorrowKind {
Read,
Write,
}

type Borrows<'imm> = HashMap<plan::ImmKey<'imm, plan::Table>, BorrowKind>;

pub struct BorrowTracker<'imm> {
previous: Borrows<'imm>,
current: HashMap<plan::ImmKey<'imm, plan::DataFlow>, Borrows<'imm>>,
}

impl<'imm> BorrowTracker<'imm> {
fn remove_dataflow(&mut self, key: plan::ImmKey<'imm, plan::DataFlow>) -> Borrows<'imm> {
todo!()
}

fn add_dataflow(&mut self, key: plan::ImmKey<'imm, plan::DataFlow>) {
todo!()
}

/// Add a new borrow, and return the collection that needs to occur for it to be valid
fn add_brw(&mut self, key: plan::ImmKey<'imm, plan::DataFlow>, table: plan::ImmKey<'imm, plan::Table>, kind: BorrowKind) -> Collections<'imm> {
todo!()
}

fn move_dataflow(
&mut self,
from: plan::ImmKey<'imm, plan::DataFlow>,
to: plan::ImmKey<'imm, plan::DataFlow>,
) {
todo!()
}

fn end_context(self) -> Borrows<'imm> {
todo!()
}
}

type Collections<'imm> = HashSet<plan::ImmKey<'imm, plan::DataFlow>>;

#[enumtrait::store(trait_borrow_tracked)]
trait BorrowTracked<'imm> {
fn track_borrow(
&self,
lp: &'imm plan::Plan,
tracker: &mut BorrowTracker<'imm>,
) -> Collections<'imm> {
todo!()
}
}

#[enumtrait::impl_trait(trait_borrow_tracked for plan::operator_enum)]
impl<'imm> BorrowTracked<'imm> for plan::Operator {}

impl<'imm> BorrowTracked<'imm> for plan::UniqueRef {
fn track_borrow(
&self,
lp: &'imm plan::Plan,
tracker: &mut BorrowTracker<'imm>,
) -> Collections<'imm> {
let output = plan::ImmKey::new(self.output, lp);
let input = plan::ImmKey::new(self.input, lp);
let collects = tracker.add_brw(input, plan::ImmKey::new(self.table, lp), BorrowKind::Read);
tracker.move_dataflow(input, output);
collects
}
}

impl<'imm> BorrowTracked<'imm> for plan::ScanRefs {}
impl<'imm> BorrowTracked<'imm> for plan::DeRef {}
impl<'imm> BorrowTracked<'imm> for plan::Update {}
impl<'imm> BorrowTracked<'imm> for plan::Insert {}
impl<'imm> BorrowTracked<'imm> for plan::Delete {}
impl<'imm> BorrowTracked<'imm> for plan::Map {}
impl<'imm> BorrowTracked<'imm> for plan::Expand {}
impl<'imm> BorrowTracked<'imm> for plan::Fold {}
impl<'imm> BorrowTracked<'imm> for plan::Filter {}
impl<'imm> BorrowTracked<'imm> for plan::Sort {}
impl<'imm> BorrowTracked<'imm> for plan::Assert {}
impl<'imm> BorrowTracked<'imm> for plan::Combine {}
impl<'imm> BorrowTracked<'imm> for plan::Count {}
impl<'imm> BorrowTracked<'imm> for plan::Take {}
impl<'imm> BorrowTracked<'imm> for plan::Collect {}
impl<'imm> BorrowTracked<'imm> for plan::Join {}
impl<'imm> BorrowTracked<'imm> for plan::Fork {}
impl<'imm> BorrowTracked<'imm> for plan::Union {}
impl<'imm> BorrowTracked<'imm> for plan::Row {}
impl<'imm> BorrowTracked<'imm> for plan::Return {}
impl<'imm> BorrowTracked<'imm> for plan::Discard {}

impl<'imm> BorrowTracked<'imm> for plan::GroupBy {}
impl<'imm> BorrowTracked<'imm> for plan::Lift {}
15 changes: 15 additions & 0 deletions crates/emdb_core/src/backend/serialized/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@
//! - Generates a table object that uses parallelism internally, but only allows
//! queries to execute in parallel if they are read only (normal borrow checker
//! rules apply)
//!
//! ## Potential Improvements
//! 1. More precise checking of borrows ([`borrow_tracker`]) will allow for a lazy
//! backend that does not collect intermediate results until absolutely necessary
//! 2. Removal of unnecessary transaction log append. When no more errors are possible,
//! commit and successive mutations (that cannot error) do not write to the log.
//! 3. Better parallel operator implementation, needs to be more coarse grained.
//! 4. Borrow operator instead of get for some operations.
//! 5. Separating borrows of values and keys (insert, delete affect set of keys,
//! update affects set of values)
//! 6. More unchecked operations (update, delete), using dataflow analysis.
//! Will need to `assume!(@unreachable)` for invalid error returns.
//! 7. Improve [`super::plan::GroupBy`] and [`super::plan::Join`] performance,
//! include multi-way joins.
use combi::{
core::{choice, mapsuc}, macros::choices, tokens::{
Expand All @@ -31,6 +45,7 @@ mod operators;
mod queries;
mod tables;
mod types;
mod borrow_tracker;

pub struct Serialized {
debug: Option<LitStr>,
Expand Down

0 comments on commit ade54f7

Please sign in to comment.