Skip to content

Commit

Permalink
revset: introduce cached revset allows checking if it contains a give…
Browse files Browse the repository at this point in the history
…n commit
  • Loading branch information
zummenix committed Mar 8, 2024
1 parent 7e28e19 commit 9cef357
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 1 deletion.
201 changes: 200 additions & 1 deletion lib/src/default_index/revset_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

#![allow(missing_docs)]

use std::cell::RefCell;
use std::cmp::{Ordering, Reverse};
use std::collections::{BTreeSet, BinaryHeap, HashSet};
use std::fmt;
use std::iter::Peekable;
use std::ops::Range;
use std::rc::Rc;
use std::sync::Arc;

use itertools::Itertools;
Expand All @@ -29,7 +31,7 @@ use crate::default_index::{AsCompositeIndex, CompositeIndex, IndexEntry, IndexPo
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit};
use crate::repo_path::RepoPath;
use crate::revset::{
ResolvedExpression, ResolvedPredicateExpression, Revset, RevsetEvaluationError,
CachedRevset, ResolvedExpression, ResolvedPredicateExpression, Revset, RevsetEvaluationError,
RevsetFilterPredicate, GENERATION_RANGE_FULL,
};
use crate::revset_graph::RevsetGraphEdge;
Expand Down Expand Up @@ -162,6 +164,109 @@ impl<I: AsCompositeIndex> Revset for RevsetImpl<I> {
(count, Some(count))
}
}

fn caching(&self) -> Box<dyn CachedRevset + '_> {
Box::new(CachedRevsetImpl::new(
self.index.as_composite(),
self.entries(),
))
}
}

struct CachedRevsetInner<'revset, 'index> {
entries_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
consumed_entries: Vec<IndexEntry<'index>>,
consumed_commit_ids: HashSet<CommitId>,
last_consumed_position: IndexPosition,
}

struct CachedRevsetImpl<'revset, 'index> {
index: CompositeIndex<'index>,
inner: Rc<RefCell<CachedRevsetInner<'revset, 'index>>>,
}

struct CachedRevsetIterator<'revset, 'index> {
inner: Rc<RefCell<CachedRevsetInner<'revset, 'index>>>,
next_index: usize,
}

impl<'revset, 'index> CachedRevsetImpl<'revset, 'index> {
fn new(
index: CompositeIndex<'index>,
entries_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
) -> CachedRevsetImpl<'revset, 'index> {
let inner = Rc::new(RefCell::new(CachedRevsetInner {
entries_iter,
consumed_entries: Vec::new(),
consumed_commit_ids: HashSet::new(),
last_consumed_position: IndexPosition::MAX,
}));
CachedRevsetImpl { index, inner }
}

fn consume_to(&self, position: IndexPosition) {
let mut inner = self.inner.borrow_mut();
while let Some(entry) = inner.entries_iter.next() {
inner.consumed_commit_ids.insert(entry.commit_id());
inner.last_consumed_position = entry.position();
inner.consumed_entries.push(entry);
if inner.last_consumed_position <= position {
break;
}
}
}

#[cfg(test)]
fn consumed_len(&self) -> usize {
self.inner.borrow().consumed_entries.len()
}
}

impl<'revset, 'index> CachedRevset for CachedRevsetImpl<'revset, 'index> {
fn iter(&self) -> Box<dyn Iterator<Item = CommitId> + '_> {
Box::new(CachedRevsetIterator {
inner: self.inner.clone(),
next_index: 0,
})
}

fn contains(&self, commit_id: &CommitId) -> bool {
if self.inner.borrow().consumed_commit_ids.contains(commit_id) {
return true;
}

if let Some(position) = self.index.commit_id_to_pos(commit_id) {
if self.inner.borrow().last_consumed_position > position {
self.consume_to(position)
}
}

self.inner.borrow().consumed_commit_ids.contains(commit_id)
}
}

impl<'revset, 'index> Iterator for CachedRevsetIterator<'revset, 'index> {
type Item = CommitId;

fn next(&mut self) -> Option<Self::Item> {
let mut inner = self.inner.borrow_mut();
let result = if self.next_index >= inner.consumed_entries.len() {
if let Some(entry) = inner.entries_iter.next() {
let commit_id = entry.commit_id().clone();
inner.consumed_commit_ids.insert(entry.commit_id());
inner.last_consumed_position = entry.position();
inner.consumed_entries.push(entry);
Some(commit_id.clone())
} else {
None
}
} else {
Some(inner.consumed_entries[self.next_index].commit_id())
};

self.next_index += 1;
result
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -1098,6 +1203,100 @@ mod tests {
move || iter.next().unwrap()
}

#[test]
fn test_cached_revset() {
let mut new_change_id = change_id_generator();
let mut index = DefaultMutableIndex::full(3, 16);
let id_0 = CommitId::from_hex("000000");
let id_1 = CommitId::from_hex("111111");
let id_2 = CommitId::from_hex("222222");
let id_3 = CommitId::from_hex("333333");
let id_4 = CommitId::from_hex("444444");
index.add_commit_data(id_0.clone(), new_change_id(), &[]);
index.add_commit_data(id_1.clone(), new_change_id(), &[id_0.clone()]);
index.add_commit_data(id_2.clone(), new_change_id(), &[id_1.clone()]);
index.add_commit_data(id_3.clone(), new_change_id(), &[id_2.clone()]);
index.add_commit_data(id_4.clone(), new_change_id(), &[id_3.clone()]);

let get_pos = |id: &CommitId| index.as_composite().commit_id_to_pos(id).unwrap();
let make_positions = |ids: &[&CommitId]| ids.iter().copied().map(get_pos).collect_vec();
let make_set = |ids: &[&CommitId]| -> Box<dyn InternalRevset> {
let positions = make_positions(ids);
Box::new(EagerRevset { positions })
};

let full_set = make_set(&[&id_4, &id_3, &id_2, &id_1, &id_0]);

// Consumes entries incrementally
let cached_revset =
CachedRevsetImpl::new(index.as_composite(), full_set.entries(index.as_composite()));

assert!(cached_revset.contains(&id_3));
assert_eq!(cached_revset.consumed_len(), 2);

assert!(cached_revset.contains(&id_0));
assert_eq!(cached_revset.consumed_len(), 5);

assert!(cached_revset.contains(&id_3));
assert_eq!(cached_revset.consumed_len(), 5);

// Does not consume entries for unknown commits
let cached_revset =
CachedRevsetImpl::new(index.as_composite(), full_set.entries(index.as_composite()));

assert!(!cached_revset.contains(&CommitId::from_hex("999999")));
assert_eq!(cached_revset.consumed_len(), 0);

// Does not consume without necessity
let set = make_set(&[&id_3, &id_2, &id_1]);
let cached_revset =
CachedRevsetImpl::new(index.as_composite(), set.entries(index.as_composite()));

assert!(!cached_revset.contains(&id_4));
assert_eq!(cached_revset.consumed_len(), 1);

assert!(!cached_revset.contains(&id_0));
assert_eq!(cached_revset.consumed_len(), 3);

assert!(cached_revset.contains(&id_1));

// Consumes in iter
let commits = [&id_3, &id_2, &id_1];
let set = make_set(&commits);
let cached_revset =
CachedRevsetImpl::new(index.as_composite(), set.entries(index.as_composite()));

assert_eq!(
cached_revset.iter().collect::<Vec<_>>(),
commits
.into_iter()
.map(|c| c.to_owned())
.collect::<Vec<_>>()
);
assert_eq!(cached_revset.consumed_len(), 3);

// `contains` does not affect iter
let commits = [&id_3, &id_2, &id_1];
let set = make_set(&commits);
let cached_revset =
CachedRevsetImpl::new(index.as_composite(), set.entries(index.as_composite()));

let mut cached_revset_iter = cached_revset.iter();

assert!(cached_revset.contains(&id_3));
assert_eq!(cached_revset.consumed_len(), 1);
assert_eq!(cached_revset_iter.next(), Some(id_3.to_owned()));

assert_eq!(cached_revset_iter.next(), Some(id_2.to_owned()));
assert_eq!(cached_revset.consumed_len(), 2);

assert!(cached_revset.contains(&id_1));
assert_eq!(cached_revset.consumed_len(), 3);
assert_eq!(cached_revset_iter.next(), Some(id_1.to_owned()));

assert_eq!(cached_revset_iter.next(), None);
}

#[test]
fn test_revset_combinator() {
let mut new_change_id = change_id_generator();
Expand Down
8 changes: 8 additions & 0 deletions lib/src/revset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2415,6 +2415,14 @@ pub trait Revset: fmt::Debug {
/// to how much effort should be put into the estimation, and how accurate
/// the resulting estimate should be.
fn count_estimate(&self) -> (usize, Option<usize>);

/// Returns a revset that caches visited entries.
fn caching(&self) -> Box<dyn CachedRevset + '_>;
}

pub trait CachedRevset {
fn iter(&self) -> Box<dyn Iterator<Item = CommitId> + '_>;
fn contains(&self, commit_id: &CommitId) -> bool;
}

pub trait RevsetIteratorExt<'index, I> {
Expand Down

0 comments on commit 9cef357

Please sign in to comment.