From 2e70c4e45fb80b22bd3a1f4444a1885bbd817ec1 Mon Sep 17 00:00:00 2001 From: dploch Date: Tue, 14 May 2024 17:01:28 -0400 Subject: [PATCH] revset: implement a 'reachable()' expression This revset correctly implements "reachability" from a set of source commits following both parent and child edges as far as they can go, outside of a defined 'boundary' set. This type of 'bfs' query is currently impossible to express with existing revset functions. --- CHANGELOG.md | 3 + cli/tests/test_revset_output.rs | 4 +- docs/revsets.md | 4 + lib/src/default_index/revset_engine.rs | 122 ++++++++++++++++++++- lib/src/revset.rs | 48 +++++++++ lib/tests/test_revset.rs | 141 +++++++++++++++++++++++++ 6 files changed, 319 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d10496145c2..0bec70f2f30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,6 +102,9 @@ to avoid letting the user edit the immutable one. * `jj rebase -r` now accepts `--insert-after` and `--insert-before` options to customize the location of the rebased revisions. +* A new revset `reachable(srcs, boundary_heads)` will return all commits that are + reachable from `srcs` without traversing ancestors of `boundary_heads`. + ### Fixed bugs * Revsets now support `\`-escapes in string literal. diff --git a/cli/tests/test_revset_output.rs b/cli/tests/test_revset_output.rs index 29a4f881175..86de7504306 100644 --- a/cli/tests/test_revset_output.rs +++ b/cli/tests/test_revset_output.rs @@ -278,7 +278,7 @@ fn test_function_name_hint() { | ^----^ | = Function "branch" doesn't exist - Hint: Did you mean "branches"? + Hint: Did you mean "branches", "reachable"? "###); // Both builtin function and function alias should be suggested @@ -308,7 +308,7 @@ fn test_function_name_hint() { | ^----^ | = Function "branch" doesn't exist - Hint: Did you mean "branches"? + Hint: Did you mean "branches", "reachable"? "###); } diff --git a/docs/revsets.md b/docs/revsets.md index 59f0614113b..8a33806c12e 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -87,6 +87,10 @@ revsets (expressions) as arguments. * `descendants(x)`: Same as `x::`. +* `reachable(s, b)`: All commits reachable from `s` without entering `::b`. + Equivalent to `(b..s)::` for simple graph structures, but distinct when more + complex merge commit graphs are involved. + * `connected(x)`: Same as `x::x`. Useful when `x` includes several commits. * `all()`: All visible commits in the repo. diff --git a/lib/src/default_index/revset_engine.rs b/lib/src/default_index/revset_engine.rs index 41140ad8ca8..9ca01448db9 100644 --- a/lib/src/default_index/revset_engine.rs +++ b/lib/src/default_index/revset_engine.rs @@ -16,7 +16,7 @@ use std::cell::RefCell; use std::cmp::{Ordering, Reverse}; -use std::collections::{BTreeSet, BinaryHeap, HashSet}; +use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; use std::ops::Range; use std::rc::Rc; use std::sync::Arc; @@ -709,6 +709,96 @@ pub fn evaluate( Ok(RevsetImpl::new(internal_revset, index)) } +#[derive(Clone, Eq, PartialEq)] +struct UnionFindNode { + root: IndexPosition, + size: u32, +} + +// A disjoint-set representation of connected index positions. +struct UnionFind { + map: HashMap, +} + +impl UnionFind { + fn new(index: &CompositeIndex, positions: &[IndexPosition]) -> Self { + let mut union_find = Self { + map: HashMap::new(), + }; + let domain: HashSet<_> = positions.iter().collect(); + for pos in positions { + for parent in index.entry_by_pos(*pos).parent_positions() { + if domain.contains(&parent) { + union_find.union(*pos, parent); + } + } + } + + // Make sure all the roots are stable + for pos in positions { + union_find.find_mut(*pos); + } + + union_find + } + + fn find(&self, pos: IndexPosition) -> &UnionFindNode { + self.map.get(&pos).unwrap() + } + + fn find_mut(&mut self, pos: IndexPosition) -> UnionFindNode { + match self.map.get(&pos) { + Some(node) => { + if node.root != pos { + let new_root = self.find_mut(node.root); + self.map.insert(pos, new_root.clone()); + new_root + } else { + node.clone() + } + } + None => { + let node = UnionFindNode { root: pos, size: 1 }; + self.map.insert(pos, node.clone()); + node + } + } + } + + fn union(&mut self, a: IndexPosition, b: IndexPosition) { + let a = self.find_mut(a); + let b = self.find_mut(b); + if a.root == b.root { + return; + } + + // Merge the roots. + let new_root = UnionFindNode { + root: if a.size < b.size { b.root } else { a.root }, + size: a.size + b.size, + }; + self.map.insert(a.root, new_root.clone()); + self.map.insert(b.root, new_root); + } + + fn all_with_roots(self, roots: &HashSet) -> Vec { + let mut vec = self + .map + .into_iter() + .filter_map(|(pos, node)| { + if roots.contains(&node.root) { + Some(pos) + } else { + None + } + }) + .sorted() + .collect_vec(); + vec.reverse(); + vec + } +} + struct EvaluationContext<'index> { store: Arc, index: &'index CompositeIndex, @@ -829,6 +919,36 @@ impl<'index> EvaluationContext<'index> { Ok(Box::new(EagerRevset { positions })) } } + ResolvedExpression::Reachable { + sources, + boundary_heads, + visible_heads, + } => { + let domain = ResolvedExpression::Range { + roots: boundary_heads.clone(), + heads: visible_heads.clone(), + generation: GENERATION_RANGE_FULL, + }; + let candidates = self + .evaluate(&domain)? + .positions() + .attach(index) + .collect_vec(); + let union_find = UnionFind::new(index, &candidates); + + let source_roots: HashSet<_> = self + .evaluate(&ResolvedExpression::Intersection( + sources.clone(), + domain.into(), + ))? + .positions() + .attach(index) + .map(|pos| union_find.find(pos).root) + .collect(); + Ok(Box::new(EagerRevset { + positions: union_find.all_with_roots(&source_roots), + })) + } ResolvedExpression::Heads(candidates) => { let candidate_set = self.evaluate(candidates)?; let head_positions: BTreeSet<_> = diff --git a/lib/src/revset.rs b/lib/src/revset.rs index ff5d86a9974..7a1dcedc3d0 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -190,6 +190,11 @@ pub enum RevsetExpression { heads: Rc, // TODO: maybe add generation_from_roots/heads? }, + // Commits reachable from "sources" without traversing ancestors of "boundary_heads" + Reachable { + sources: Rc, + boundary_heads: Rc, + }, Heads(Rc), Roots(Rc), Latest { @@ -374,6 +379,18 @@ impl RevsetExpression { self.dag_range_to(self) } + /// Commits connected to `sources` without traversing ancestors of + /// `boundary_heads`. + pub fn reachable( + sources: Rc, + boundary_heads: Rc, + ) -> Rc { + Rc::new(RevsetExpression::Reachable { + sources, + boundary_heads, + }) + } + /// Commits reachable from `heads` but not from `self`. pub fn range( self: &Rc, @@ -502,6 +519,13 @@ pub enum ResolvedExpression { heads: Box, generation_from_roots: Range, }, + /// Commits reachable from `sources` without traversing ancestors of + /// `boundary_heads`. + Reachable { + sources: Box, + boundary_heads: Box, + visible_heads: Box, + }, Heads(Box), Roots(Box), Latest { @@ -630,6 +654,12 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: let candidates = parse_expression_rule(arg.into_inner(), state)?; Ok(candidates.connected()) }); + map.insert("reachable", |name, arguments_pair, state| { + let ([source_arg, boundary_heads_arg], []) = expect_arguments(name, arguments_pair)?; + let sources = parse_expression_rule(source_arg.into_inner(), state)?; + let boundary_heads = parse_expression_rule(boundary_heads_arg.into_inner(), state)?; + Ok(RevsetExpression::reachable(sources, boundary_heads)) + }); map.insert("none", |name, arguments_pair, _state| { expect_no_arguments(name, arguments_pair)?; Ok(RevsetExpression::none()) @@ -958,6 +988,15 @@ fn try_transform_expression( transform_rec_pair((roots, heads), pre, post)? .map(|(roots, heads)| RevsetExpression::DagRange { roots, heads }) } + RevsetExpression::Reachable { + sources, + boundary_heads, + } => transform_rec_pair((sources, boundary_heads), pre, post)?.map( + |(sources, boundary_heads)| RevsetExpression::Reachable { + sources, + boundary_heads, + }, + ), RevsetExpression::Heads(candidates) => { transform_rec(candidates, pre, post)?.map(RevsetExpression::Heads) } @@ -1746,6 +1785,14 @@ impl VisibilityResolutionContext<'_> { heads: self.resolve(heads).into(), generation_from_roots: GENERATION_RANGE_FULL, }, + RevsetExpression::Reachable { + sources, + boundary_heads, + } => ResolvedExpression::Reachable { + sources: self.resolve(sources).into(), + boundary_heads: self.resolve(boundary_heads).into(), + visible_heads: self.resolve_visible_heads().into(), + }, RevsetExpression::Heads(candidates) => { ResolvedExpression::Heads(self.resolve(candidates).into()) } @@ -1831,6 +1878,7 @@ impl VisibilityResolutionContext<'_> { | RevsetExpression::Descendants { .. } | RevsetExpression::Range { .. } | RevsetExpression::DagRange { .. } + | RevsetExpression::Reachable { .. } | RevsetExpression::Heads(_) | RevsetExpression::Roots(_) | RevsetExpression::Latest { .. } => { diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index bb372f3c7b2..117d7135f6a 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -1556,6 +1556,147 @@ fn test_evaluate_expression_connected() { ); } +#[test] +fn test_evaluate_expression_reachable() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let mut_repo = tx.mut_repo(); + + // Construct 3 separate subgraphs off the root commit. + // 1 is a chain, 2 is a merge, 3 is a pyramidal monstrosity + let graph1commit1 = write_random_commit(mut_repo, &settings); + let graph1commit2 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph1commit1.id().clone()]) + .write() + .unwrap(); + let graph1commit3 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph1commit2.id().clone()]) + .write() + .unwrap(); + let graph2commit1 = write_random_commit(mut_repo, &settings); + let graph2commit2 = write_random_commit(mut_repo, &settings); + let graph2commit3 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph2commit1.id().clone(), graph2commit2.id().clone()]) + .write() + .unwrap(); + let graph3commit1 = write_random_commit(mut_repo, &settings); + let graph3commit2 = write_random_commit(mut_repo, &settings); + let graph3commit3 = write_random_commit(mut_repo, &settings); + let graph3commit4 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit1.id().clone(), graph3commit2.id().clone()]) + .write() + .unwrap(); + let graph3commit5 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit2.id().clone(), graph3commit3.id().clone()]) + .write() + .unwrap(); + let graph3commit6 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit3.id().clone()]) + .write() + .unwrap(); + let graph3commit7 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit4.id().clone(), graph3commit5.id().clone()]) + .write() + .unwrap(); + + // Boundary heads are respected. + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "reachable({}, {})", + graph1commit2.id().hex(), + graph1commit1.id().hex() + ) + ), + vec![graph1commit3.id().clone(), graph1commit2.id().clone(),] + ); + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "reachable({}, {})", + graph1commit2.id().hex(), + graph1commit3.id().hex() + ) + ), + vec![] + ); + + // Each graph is identifiable from any node in it. + for (i, commit) in [&graph1commit1, &graph1commit2, &graph1commit3] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, root())", commit.id().hex()) + ), + vec![ + graph1commit3.id().clone(), + graph1commit2.id().clone(), + graph1commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } + + for (i, commit) in [&graph2commit1, &graph2commit2, &graph2commit3] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, root())", commit.id().hex()) + ), + vec![ + graph2commit3.id().clone(), + graph2commit2.id().clone(), + graph2commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } + + for (i, commit) in [ + &graph3commit1, + &graph3commit2, + &graph3commit3, + &graph3commit4, + &graph3commit5, + &graph3commit6, + &graph3commit7, + ] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, root())", commit.id().hex()) + ), + vec![ + graph3commit7.id().clone(), + graph3commit6.id().clone(), + graph3commit5.id().clone(), + graph3commit4.id().clone(), + graph3commit3.id().clone(), + graph3commit2.id().clone(), + graph3commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } +} + #[test] fn test_evaluate_expression_descendants() { let settings = testutils::user_settings();