From 025b6b25d0e7a4e5c9e5215b603fcdbde8fc9760 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 22 Apr 2023 16:16:51 +0900 Subject: [PATCH 1/4] index: reimplement collect_dag_range() of revset engine as iterator I'm going to remove 'index lifetime from InternalRevset so Revset<'static> can be easily constructed from DefaultReadonlyIndex. As the first step, this series removes some lifetime complexity from EvaluationContext methods. We don't need an descendant iterator API, but it helps to add separate function to collect into HashSet instead of returning a pair of ordered vec and set. --- lib/src/default_index_store.rs | 53 ++++++++++++++++++++++++++++++++ lib/src/default_revset_engine.rs | 20 ++---------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/lib/src/default_index_store.rs b/lib/src/default_index_store.rs index 16236534cc..22d1127a73 100644 --- a/lib/src/default_index_store.rs +++ b/lib/src/default_index_store.rs @@ -21,6 +21,7 @@ use std::fmt::{Debug, Formatter}; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io::{Read, Write}; +use std::iter::FusedIterator; use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -1381,6 +1382,18 @@ impl<'a> RevWalk<'a> { self.take_while(move |entry| entry.position() >= bottom_position) } + /// Fully consumes the ancestors and walks back from `root_positions`. + /// + /// The returned iterator yields entries in order of ascending index + /// position. + pub fn descendants(self, root_positions: &[IndexPosition]) -> RevWalkDescendants<'a> { + RevWalkDescendants { + candidate_entries: self.take_until_roots(root_positions).collect(), + root_positions: root_positions.iter().copied().collect(), + reachable_positions: HashSet::new(), + } + } + /// Fully consumes the ancestors and walks back from `root_positions` within /// `generation_range`. /// @@ -1589,6 +1602,46 @@ impl RevWalkItemGenerationRange { } } +/// Walks descendants from the roots, in order of ascending index position. +#[derive(Clone)] +pub struct RevWalkDescendants<'a> { + candidate_entries: Vec>, + root_positions: HashSet, + reachable_positions: HashSet, +} + +impl RevWalkDescendants<'_> { + /// Builds a set of index positions reachable from the roots. + /// + /// This is equivalent to `.map(|entry| entry.position()).collect()` on + /// the new iterator, but returns the internal buffer instead. + pub fn collect_positions_set(mut self) -> HashSet { + self.by_ref().for_each(drop); + self.reachable_positions + } +} + +impl<'a> Iterator for RevWalkDescendants<'a> { + type Item = IndexEntry<'a>; + + fn next(&mut self) -> Option { + while let Some(candidate) = self.candidate_entries.pop() { + if self.root_positions.contains(&candidate.position()) + || candidate + .parent_positions() + .iter() + .any(|parent_pos| self.reachable_positions.contains(parent_pos)) + { + self.reachable_positions.insert(candidate.position()); + return Some(candidate); + } + } + None + } +} + +impl FusedIterator for RevWalkDescendants<'_> {} + /// Removes the greatest items (including duplicates) from the heap, returns /// one. fn dedup_pop(heap: &mut BinaryHeap) -> Option { diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index fceb5bda4e..9a359c0ad1 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -725,23 +725,9 @@ impl<'index> EvaluationContext<'index> { T: InternalRevset<'b> + ?Sized, { let root_positions = root_set.iter().map(|entry| entry.position()).collect_vec(); - let walk = self - .walk_ancestors(head_set) - .take_until_roots(&root_positions); - let root_positions: HashSet<_> = root_positions.into_iter().collect(); - let mut reachable_positions = HashSet::new(); - let mut index_entries = vec![]; - for candidate in walk.collect_vec().into_iter().rev() { - if root_positions.contains(&candidate.position()) - || candidate - .parent_positions() - .iter() - .any(|parent_pos| reachable_positions.contains(parent_pos)) - { - reachable_positions.insert(candidate.position()); - index_entries.push(candidate); - } - } + let mut walk = self.walk_ancestors(head_set).descendants(&root_positions); + let mut index_entries = walk.by_ref().collect_vec(); + let reachable_positions = walk.collect_positions_set(); index_entries.reverse(); (EagerRevset { index_entries }, reachable_positions) } From 221a50096aac054a3e5acab1398b5d69980e1ff1 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 22 Apr 2023 16:16:51 +0900 Subject: [PATCH 2/4] revset: inline collect_dag_range() --- lib/src/default_revset_engine.rs | 37 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 9a359c0ad1..2d6cf972a8 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -573,8 +573,14 @@ impl<'index> EvaluationContext<'index> { if generation_from_roots == &(1..2) { Ok(Box::new(self.walk_children(&*root_set, &*head_set))) } else if generation_from_roots == &GENERATION_RANGE_FULL { - let (dag_range_set, _) = self.collect_dag_range(&*root_set, &*head_set); - Ok(Box::new(dag_range_set)) + let root_positions = + root_set.iter().map(|entry| entry.position()).collect_vec(); + let mut index_entries = self + .walk_ancestors(&*head_set) + .descendants(&root_positions) + .collect_vec(); + index_entries.reverse(); + Ok(Box::new(EagerRevset { index_entries })) } else { // For small generation range, it might be better to build a reachable map // with generation bit set, which can be calculated incrementally from roots: @@ -608,7 +614,14 @@ impl<'index> EvaluationContext<'index> { let candidate_set = EagerRevset { index_entries: self.evaluate(candidates)?.iter().collect(), }; - let (_, filled) = self.collect_dag_range(&candidate_set, &candidate_set); + let candidate_positions = candidate_set + .iter() + .map(|entry| entry.position()) + .collect_vec(); + let filled = self + .walk_ancestors(&candidate_set) + .descendants(&candidate_positions) + .collect_positions_set(); let mut index_entries = vec![]; for candidate in candidate_set.iter() { if !candidate @@ -714,24 +727,6 @@ impl<'index> EvaluationContext<'index> { } } - /// Calculates `root_set::head_set`. - fn collect_dag_range<'a, 'b, S, T>( - &self, - root_set: &S, - head_set: &T, - ) -> (EagerRevset<'index>, HashSet) - where - S: InternalRevset<'a> + ?Sized, - T: InternalRevset<'b> + ?Sized, - { - let root_positions = root_set.iter().map(|entry| entry.position()).collect_vec(); - let mut walk = self.walk_ancestors(head_set).descendants(&root_positions); - let mut index_entries = walk.by_ref().collect_vec(); - let reachable_positions = walk.collect_positions_set(); - index_entries.reverse(); - (EagerRevset { index_entries }, reachable_positions) - } - fn revset_for_commit_ids(&self, commit_ids: &[CommitId]) -> EagerRevset<'index> { let mut index_entries = vec![]; for id in commit_ids { From c2c0c095912c76b938c5604fc9abb213dbc1d561 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 28 May 2023 21:21:21 +0900 Subject: [PATCH 3/4] revset: inline walk_children() There's only one caller, and we have common code at the call site. --- lib/src/default_revset_engine.rs | 52 +++++++++++--------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 2d6cf972a8..0e4be2110a 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -569,12 +569,27 @@ impl<'index> EvaluationContext<'index> { generation_from_roots, } => { let root_set = self.evaluate(roots)?; + let root_positions = root_set.iter().map(|entry| entry.position()).collect_vec(); let head_set = self.evaluate(heads)?; if generation_from_roots == &(1..2) { - Ok(Box::new(self.walk_children(&*root_set, &*head_set))) + let walk = self + .walk_ancestors(&*head_set) + .take_until_roots(&root_positions); + let root_positions_set: HashSet<_> = root_positions.into_iter().collect(); + let candidates = Box::new(RevWalkRevset { walk }); + let predicate = PurePredicateFn(move |entry: &IndexEntry| { + entry + .parent_positions() + .iter() + .any(|parent_pos| root_positions_set.contains(parent_pos)) + }); + // TODO: Suppose heads include all visible heads, ToPredicateFn version can be + // optimized to only test the predicate() + Ok(Box::new(FilterRevset { + candidates, + predicate, + })) } else if generation_from_roots == &GENERATION_RANGE_FULL { - let root_positions = - root_set.iter().map(|entry| entry.position()).collect_vec(); let mut index_entries = self .walk_ancestors(&*head_set) .descendants(&root_positions) @@ -585,8 +600,6 @@ impl<'index> EvaluationContext<'index> { // For small generation range, it might be better to build a reachable map // with generation bit set, which can be calculated incrementally from roots: // reachable[pos] = (reachable[parent_pos] | ...) << 1 - let root_positions = - root_set.iter().map(|entry| entry.position()).collect_vec(); let walk = self .walk_ancestors(&*head_set) .descendants_filtered_by_generation( @@ -698,35 +711,6 @@ impl<'index> EvaluationContext<'index> { self.index.walk_revs(&head_positions, &[]) } - fn walk_children<'a, 'b, S, T>( - &self, - root_set: &S, - head_set: &T, - ) -> impl InternalRevset<'index> + 'index - where - S: InternalRevset<'a> + ?Sized, - T: InternalRevset<'b> + ?Sized, - { - let root_positions = root_set.iter().map(|entry| entry.position()).collect_vec(); - let walk = self - .walk_ancestors(head_set) - .take_until_roots(&root_positions); - let root_positions: HashSet<_> = root_positions.into_iter().collect(); - let candidates = Box::new(RevWalkRevset { walk }); - let predicate = PurePredicateFn(move |entry: &IndexEntry| { - entry - .parent_positions() - .iter() - .any(|parent_pos| root_positions.contains(parent_pos)) - }); - // TODO: Suppose heads include all visible heads, ToPredicateFn version can be - // optimized to only test the predicate() - FilterRevset { - candidates, - predicate, - } - } - fn revset_for_commit_ids(&self, commit_ids: &[CommitId]) -> EagerRevset<'index> { let mut index_entries = vec![]; for id in commit_ids { From d0e483bf551629f3c3451e0a07e3474ea011e68a Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 28 May 2023 21:31:02 +0900 Subject: [PATCH 4/4] revset: inline walk_ancestors() --- lib/src/default_revset_engine.rs | 44 +++++++++++++------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 0e4be2110a..6b5644149a 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -24,9 +24,7 @@ use std::sync::Arc; use itertools::Itertools; use crate::backend::{ChangeId, CommitId, MillisSinceEpoch}; -use crate::default_index_store::{ - CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition, RevWalk, -}; +use crate::default_index_store::{CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition}; use crate::default_revset_graph_iterator::RevsetGraphIterator; use crate::id_prefix::{IdIndex, IdIndexSource, IdIndexSourceEntry}; use crate::index::{HexPrefix, PrefixResolution}; @@ -538,7 +536,8 @@ impl<'index> EvaluationContext<'index> { } ResolvedExpression::Ancestors { heads, generation } => { let head_set = self.evaluate(heads)?; - let walk = self.walk_ancestors(&*head_set); + let head_positions = head_set.iter().map(|entry| entry.position()).collect_vec(); + let walk = self.index.walk_revs(&head_positions, &[]); if generation == &GENERATION_RANGE_FULL { Ok(Box::new(RevWalkRevset { walk })) } else { @@ -568,12 +567,14 @@ impl<'index> EvaluationContext<'index> { heads, generation_from_roots, } => { + let index = self.index; let root_set = self.evaluate(roots)?; let root_positions = root_set.iter().map(|entry| entry.position()).collect_vec(); let head_set = self.evaluate(heads)?; + let head_positions = head_set.iter().map(|entry| entry.position()).collect_vec(); if generation_from_roots == &(1..2) { - let walk = self - .walk_ancestors(&*head_set) + let walk = index + .walk_revs(&head_positions, &[]) .take_until_roots(&root_positions); let root_positions_set: HashSet<_> = root_positions.into_iter().collect(); let candidates = Box::new(RevWalkRevset { walk }); @@ -590,8 +591,8 @@ impl<'index> EvaluationContext<'index> { predicate, })) } else if generation_from_roots == &GENERATION_RANGE_FULL { - let mut index_entries = self - .walk_ancestors(&*head_set) + let mut index_entries = index + .walk_revs(&head_positions, &[]) .descendants(&root_positions) .collect_vec(); index_entries.reverse(); @@ -600,13 +601,13 @@ impl<'index> EvaluationContext<'index> { // For small generation range, it might be better to build a reachable map // with generation bit set, which can be calculated incrementally from roots: // reachable[pos] = (reachable[parent_pos] | ...) << 1 - let walk = self - .walk_ancestors(&*head_set) + let mut index_entries = index + .walk_revs(&head_positions, &[]) .descendants_filtered_by_generation( &root_positions, to_u32_generation_range(generation_from_roots)?, - ); - let mut index_entries = walk.collect_vec(); + ) + .collect_vec(); index_entries.reverse(); Ok(Box::new(EagerRevset { index_entries })) } @@ -624,19 +625,18 @@ impl<'index> EvaluationContext<'index> { Ok(Box::new(EagerRevset { index_entries })) } ResolvedExpression::Roots(candidates) => { - let candidate_set = EagerRevset { - index_entries: self.evaluate(candidates)?.iter().collect(), - }; - let candidate_positions = candidate_set + let candidate_entries = self.evaluate(candidates)?.iter().collect_vec(); + let candidate_positions = candidate_entries .iter() .map(|entry| entry.position()) .collect_vec(); let filled = self - .walk_ancestors(&candidate_set) + .index + .walk_revs(&candidate_positions, &[]) .descendants(&candidate_positions) .collect_positions_set(); let mut index_entries = vec![]; - for candidate in candidate_set.iter() { + for candidate in candidate_entries { if !candidate .parent_positions() .iter() @@ -703,14 +703,6 @@ impl<'index> EvaluationContext<'index> { } } - fn walk_ancestors<'a, S>(&self, head_set: &S) -> RevWalk<'index> - where - S: InternalRevset<'a> + ?Sized, - { - let head_positions = head_set.iter().map(|entry| entry.position()).collect_vec(); - self.index.walk_revs(&head_positions, &[]) - } - fn revset_for_commit_ids(&self, commit_ids: &[CommitId]) -> EagerRevset<'index> { let mut index_entries = vec![]; for id in commit_ids {