Skip to content

Commit

Permalink
revset: introduce more performant way to check if a commit is in a re…
Browse files Browse the repository at this point in the history
…vset

Initially we were thinking to have `Revset` return something like
`CachedRevset`:

```
pub trait CachedRevset {
  fn iter(&self) -> Box<dyn Iterator<Item = Commit>>;
  fn contains(&self, &CommitId) -> bool;
}
```

But we weren't sure what use case for `iter` would be, so we dropped the `iter`
method. `CachedRevset` with single `contains` method needed a better name. We
weren't able to come up with one, so we decided instead to have a method on
`Revset` that returns a closure to check if a commit is in a revset.
  • Loading branch information
zummenix committed Mar 10, 2024
1 parent 099f06b commit dd9bf37
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 0 deletions.
137 changes: 137 additions & 0 deletions lib/src/default_index/revset_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

#![allow(missing_docs)]

use std::cell::RefCell;
use std::cmp::{Ordering, Reverse};
use std::collections::{BTreeSet, BinaryHeap, HashSet};
use std::fmt;
use std::iter::Peekable;
use std::ops::Range;
use std::rc::Rc;
use std::sync::Arc;

use itertools::Itertools;
Expand Down Expand Up @@ -163,6 +165,76 @@ impl<I: AsCompositeIndex> Revset for RevsetImpl<I> {
(count, Some(count))
}
}

fn containing_fn(&self) -> Box<dyn Fn(&CommitId) -> bool + '_> {
let positions = PositionsAccumulator::new(self.index.as_composite(), self.positions());
Box::new(move |commit_id| positions.contains(commit_id))
}
}

/// Incrementally consumes positions iterator of the revset collecting
/// positions.
struct PositionsAccumulator<'revset, 'index> {
index: CompositeIndex<'index>,
inner: Rc<RefCell<PositionsAccumulatorInner<'revset>>>,
}

impl<'revset, 'index> PositionsAccumulator<'revset, 'index> {
fn new(
index: CompositeIndex<'index>,
positions_iter: Box<dyn Iterator<Item = IndexPosition> + 'revset>,
) -> Self {
let inner = Rc::new(RefCell::new(PositionsAccumulatorInner {
positions_iter: Some(positions_iter),
consumed_positions: Vec::new(),
}));
PositionsAccumulator { index, inner }
}

/// Checks whether the commit is in the revset.
fn contains(&self, commit_id: &CommitId) -> bool {
let Some(position) = self.index.commit_id_to_pos(commit_id) else {
return false;
};

let mut inner = self.inner.borrow_mut();
if let Some(last_position) = inner.consumed_positions.last() {
if last_position > &position {
inner.consume_to(position);
}
} else {
inner.consume_to(position);
}

inner
.consumed_positions
.binary_search_by(|p| p.cmp(&position).reverse())
.is_ok()
}

#[cfg(test)]
fn consumed_len(&self) -> usize {
self.inner.borrow().consumed_positions.len()
}
}

/// Helper struct for [`PositionsAccumulator`] to simplify interior mutability.
struct PositionsAccumulatorInner<'revset> {
positions_iter: Option<Box<dyn Iterator<Item = IndexPosition> + 'revset>>,
consumed_positions: Vec<IndexPosition>,
}

impl<'revset> PositionsAccumulatorInner<'revset> {
/// Consumes positions iterator to a desired position but not deeper.
fn consume_to(&mut self, desired_position: IndexPosition) {
while let Some(position) = self.positions_iter.as_mut().and_then(|iter| iter.next()) {
self.consumed_positions.push(position);
if position <= desired_position {
return;
}
}
self.positions_iter = None;
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -1242,4 +1314,69 @@ mod tests {
assert!(!p(&get_entry(&id_1)));
assert!(p(&get_entry(&id_0)));
}

#[test]
fn test_positions_accumulator() {
let mut new_change_id = change_id_generator();
let mut index = DefaultMutableIndex::full(3, 16);
let id_0 = CommitId::from_hex("000000");
let id_1 = CommitId::from_hex("111111");
let id_2 = CommitId::from_hex("222222");
let id_3 = CommitId::from_hex("333333");
let id_4 = CommitId::from_hex("444444");
index.add_commit_data(id_0.clone(), new_change_id(), &[]);
index.add_commit_data(id_1.clone(), new_change_id(), &[id_0.clone()]);
index.add_commit_data(id_2.clone(), new_change_id(), &[id_1.clone()]);
index.add_commit_data(id_3.clone(), new_change_id(), &[id_2.clone()]);
index.add_commit_data(id_4.clone(), new_change_id(), &[id_3.clone()]);

let get_pos = |id: &CommitId| index.as_composite().commit_id_to_pos(id).unwrap();
let make_positions = |ids: &[&CommitId]| ids.iter().copied().map(get_pos).collect_vec();
let make_set = |ids: &[&CommitId]| -> Box<dyn InternalRevset> {
let positions = make_positions(ids);
Box::new(EagerRevset { positions })
};

let full_set = make_set(&[&id_4, &id_3, &id_2, &id_1, &id_0]);

// Consumes entries incrementally
let positions_accum = PositionsAccumulator::new(
index.as_composite(),
full_set.positions(index.as_composite()),
);

assert!(positions_accum.contains(&id_3));
assert_eq!(positions_accum.consumed_len(), 2);

assert!(positions_accum.contains(&id_0));
assert_eq!(positions_accum.consumed_len(), 5);

assert!(positions_accum.contains(&id_3));
assert_eq!(positions_accum.consumed_len(), 5);

// Does not consume positions for unknown commits
let positions_accum = PositionsAccumulator::new(
index.as_composite(),
full_set.positions(index.as_composite()),
);

assert!(!positions_accum.contains(&CommitId::from_hex("999999")));
assert_eq!(positions_accum.consumed_len(), 0);

// Does not consume without necessity
let set = make_set(&[&id_3, &id_2, &id_1]);
let positions_accum =
PositionsAccumulator::new(index.as_composite(), set.positions(index.as_composite()));

assert!(!positions_accum.contains(&id_4));
assert_eq!(positions_accum.consumed_len(), 1);

assert!(positions_accum.contains(&id_3));
assert_eq!(positions_accum.consumed_len(), 1);

assert!(!positions_accum.contains(&id_0));
assert_eq!(positions_accum.consumed_len(), 3);

assert!(positions_accum.contains(&id_1));
}
}
7 changes: 7 additions & 0 deletions lib/src/revset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2415,6 +2415,13 @@ pub trait Revset: fmt::Debug {
/// to how much effort should be put into the estimation, and how accurate
/// the resulting estimate should be.
fn count_estimate(&self) -> (usize, Option<usize>);

/// Returns a closure that checks if a commit is contained within the
/// revset.
///
/// The implementation may construct and maintain any necessary internal
/// context to optimize the performance of the check.
fn containing_fn(&self) -> Box<dyn Fn(&CommitId) -> bool + '_>;
}

pub trait RevsetIteratorExt<'index, I> {
Expand Down
23 changes: 23 additions & 0 deletions lib/tests/test_revset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2797,3 +2797,26 @@ fn test_no_such_revision_suggestion() {
if name == "bax" && candidates == vec!["bar".to_string(), "baz".to_string()]
);
}

#[test]
fn test_revset_containing_fn() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;

let mut tx = repo.start_transaction(&settings);
let mut_repo = tx.mut_repo();
let commit_a = write_random_commit(mut_repo, &settings);
let commit_b = write_random_commit(mut_repo, &settings);
let commit_c = write_random_commit(mut_repo, &settings);
let commit_d = write_random_commit(mut_repo, &settings);
let repo = tx.commit("test");

let revset = revset_for_commits(repo.as_ref(), &[&commit_b, &commit_d]);

let revset_has_commit = revset.containing_fn();
assert!(!revset_has_commit(commit_a.id()));
assert!(revset_has_commit(commit_b.id()));
assert!(!revset_has_commit(commit_c.id()));
assert!(revset_has_commit(commit_d.id()));
}

0 comments on commit dd9bf37

Please sign in to comment.