Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revset: implement a "reachable()" expression #3691

Merged
merged 2 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ to avoid letting the user edit the immutable one.
* `jj rebase -r` now accepts `--insert-after` and `--insert-before` options to
customize the location of the rebased revisions.

* A new revset `reahable(srcs, domain)` will return all commits that are
reachable from `srcs` within `domain`.

thoughtpolice marked this conversation as resolved.
Show resolved Hide resolved
### Fixed bugs

* Revsets now support `\`-escapes in string literal.
Expand Down
4 changes: 2 additions & 2 deletions cli/tests/test_revset_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ fn test_function_name_hint() {
| ^----^
|
= Function "branch" doesn't exist
Hint: Did you mean "branches"?
Hint: Did you mean "branches", "reachable"?
"###);

// Both builtin function and function alias should be suggested
Expand Down Expand Up @@ -308,7 +308,7 @@ fn test_function_name_hint() {
| ^----^
|
= Function "branch" doesn't exist
Hint: Did you mean "branches"?
Hint: Did you mean "branches", "reachable"?
"###);
}

Expand Down
3 changes: 3 additions & 0 deletions docs/revsets.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ revsets (expressions) as arguments.

* `descendants(x)`: Same as `x::`.

* `reachable(srcs, domain)`: All commits reachable from `srcs` within
`domain`, traversing all parent and child edges.

* `connected(x)`: Same as `x::x`. Useful when `x` includes several commits.

* `all()`: All visible commits in the repo.
Expand Down
33 changes: 32 additions & 1 deletion lib/src/default_index/revset_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ use crate::revset::{
RevsetFilterExtensionWrapper, RevsetFilterPredicate, GENERATION_RANGE_FULL,
};
use crate::revset_graph::RevsetGraphEdge;
use crate::rewrite;
use crate::store::Store;
use crate::{rewrite, union_find};

type BoxedPredicateFn<'a> = Box<dyn FnMut(&CompositeIndex, IndexPosition) -> bool + 'a>;
pub(super) type BoxedRevWalk<'a> = Box<dyn RevWalk<CompositeIndex, Item = IndexPosition> + 'a>;
Expand Down Expand Up @@ -829,6 +829,37 @@ impl<'index> EvaluationContext<'index> {
Ok(Box::new(EagerRevset { positions }))
}
}
ResolvedExpression::Reachable { sources, domain } => {
let mut sets = union_find::UnionFind::<IndexPosition>::new();

// Compute all reachable subgraphs.
let domain_revset = self.evaluate(domain)?;
let domain_vec = domain_revset.positions().attach(index).collect_vec();
let domain_set: HashSet<_> = domain_vec.iter().copied().collect();
for pos in &domain_set {
for parent_pos in index.entry_by_pos(*pos).parent_positions() {
if domain_set.contains(&parent_pos) {
sets.union(*pos, parent_pos);
}
}
}

// Identify disjoint sets reachable from sources.
let set_reps: HashSet<_> = intersection_by(
self.evaluate(sources)?.positions(),
EagerRevWalk::new(domain_vec.iter().copied()),
|pos1, pos2| pos1.cmp(pos2).reverse(),
)
.attach(index)
.map(|pos| sets.find(pos))
.collect();

let positions = domain_vec
.into_iter()
.filter(|pos| set_reps.contains(&sets.find(*pos)))
.collect_vec();
Ok(Box::new(EagerRevset { positions }))
}
ResolvedExpression::Heads(candidates) => {
let candidate_set = self.evaluate(candidates)?;
let head_positions: BTreeSet<_> =
Expand Down
1 change: 1 addition & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub mod submodule_store;
pub mod transaction;
pub mod tree;
pub mod tree_builder;
pub mod union_find;
pub mod view;
pub mod working_copy;
pub mod workspace;
37 changes: 37 additions & 0 deletions lib/src/revset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ pub enum RevsetExpression {
heads: Rc<RevsetExpression>,
// TODO: maybe add generation_from_roots/heads?
},
// Commits reachable from "sources" within "domain"
Reachable {
sources: Rc<RevsetExpression>,
domain: Rc<RevsetExpression>,
},
Heads(Rc<RevsetExpression>),
Roots(Rc<RevsetExpression>),
Latest {
Expand Down Expand Up @@ -379,6 +384,18 @@ impl RevsetExpression {
self.dag_range_to(self)
}

/// All commits within `domain` reachable from this set of commits, by
/// traversing either parent or child edges.
pub fn reachable(
self: &Rc<RevsetExpression>,
domain: &Rc<RevsetExpression>,
) -> Rc<RevsetExpression> {
Rc::new(RevsetExpression::Reachable {
sources: self.clone(),
domain: domain.clone(),
})
}

/// Commits reachable from `heads` but not from `self`.
pub fn range(
self: &Rc<RevsetExpression>,
Expand Down Expand Up @@ -507,6 +524,11 @@ pub enum ResolvedExpression {
heads: Box<ResolvedExpression>,
generation_from_roots: Range<u64>,
},
/// Commits reachable from `sources` within `domain`.
Reachable {
sources: Box<ResolvedExpression>,
domain: Box<ResolvedExpression>,
},
Heads(Box<ResolvedExpression>),
Roots(Box<ResolvedExpression>),
Latest {
Expand Down Expand Up @@ -635,6 +657,12 @@ static BUILTIN_FUNCTION_MAP: Lazy<HashMap<&'static str, RevsetFunction>> = Lazy:
let candidates = parse_expression_rule(arg.into_inner(), state)?;
Ok(candidates.connected())
});
map.insert("reachable", |name, arguments_pair, state| {
let ([source_arg, domain_arg], []) = expect_arguments(name, arguments_pair)?;
let sources = parse_expression_rule(source_arg.into_inner(), state)?;
let domain = parse_expression_rule(domain_arg.into_inner(), state)?;
Ok(sources.reachable(&domain))
});
map.insert("none", |name, arguments_pair, _state| {
expect_no_arguments(name, arguments_pair)?;
Ok(RevsetExpression::none())
Expand Down Expand Up @@ -960,6 +988,10 @@ fn try_transform_expression<E>(
transform_rec_pair((roots, heads), pre, post)?
.map(|(roots, heads)| RevsetExpression::DagRange { roots, heads })
}
RevsetExpression::Reachable { sources, domain } => {
transform_rec_pair((sources, domain), pre, post)?
.map(|(sources, domain)| RevsetExpression::Reachable { sources, domain })
}
RevsetExpression::Heads(candidates) => {
transform_rec(candidates, pre, post)?.map(RevsetExpression::Heads)
}
Expand Down Expand Up @@ -1748,6 +1780,10 @@ impl VisibilityResolutionContext<'_> {
heads: self.resolve(heads).into(),
generation_from_roots: GENERATION_RANGE_FULL,
},
RevsetExpression::Reachable { sources, domain } => ResolvedExpression::Reachable {
sources: self.resolve(sources).into(),
domain: self.resolve(domain).into(),
},
RevsetExpression::Heads(candidates) => {
ResolvedExpression::Heads(self.resolve(candidates).into())
}
Expand Down Expand Up @@ -1833,6 +1869,7 @@ impl VisibilityResolutionContext<'_> {
| RevsetExpression::Descendants { .. }
| RevsetExpression::Range { .. }
| RevsetExpression::DagRange { .. }
| RevsetExpression::Reachable { .. }
| RevsetExpression::Heads(_)
| RevsetExpression::Roots(_)
| RevsetExpression::Latest { .. } => {
Expand Down
157 changes: 157 additions & 0 deletions lib/src/union_find.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Copyright 2024 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This module implements a UnionFind<T> type which can be used to
//! efficiently calculate disjoint sets for any data type.

use std::collections::HashMap;
use std::hash::Hash;

#[derive(Clone, Copy)]
struct Node<T> {
root: T,
size: u32,
}

/// Implementation of the union-find algorithm:
/// https://en.wikipedia.org/wiki/Disjoint-set_data_structure
///
/// Joins disjoint sets by size to amortize cost.
#[derive(Clone)]
pub struct UnionFind<T> {
roots: HashMap<T, Node<T>>,
}

impl<T> Default for UnionFind<T>
where
T: Copy + Eq + Hash,
{
fn default() -> Self {
Self::new()
}
}

impl<T> UnionFind<T>
where
T: Copy + Eq + Hash,
{
/// Creates a new empty UnionFind data structure.
pub fn new() -> Self {
Self {
roots: HashMap::new(),
}
}

/// Returns the root identifying the union this item is a part of.
pub fn find(&mut self, item: T) -> T {
self.find_node(item).root
}

fn find_node(&mut self, item: T) -> Node<T> {
match self.roots.get(&item) {
Some(node) => {
if node.root != item {
let new_root = self.find_node(node.root);
self.roots.insert(item, new_root);
new_root
} else {
*node
}
}
None => {
let node = Node::<T> {
root: item,
size: 1,
};
self.roots.insert(item, node);
node
}
}
}

/// Unions the disjoint sets connected to `a` and `b`.
pub fn union(&mut self, a: T, b: T) {
let a = self.find_node(a);
let b = self.find_node(b);
if a.root == b.root {
return;
}

let new_node = Node::<T> {
root: if a.size < b.size { b.root } else { a.root },
size: a.size + b.size,
};
self.roots.insert(a.root, new_node);
self.roots.insert(b.root, new_node);
}
}

#[cfg(test)]
mod tests {
use itertools::Itertools;

use super::*;

#[test]
fn test_basic() {
let mut union_find = UnionFind::<i32>::new();

// Everything starts as a singleton.
assert_eq!(union_find.find(1), 1);
assert_eq!(union_find.find(2), 2);
assert_eq!(union_find.find(3), 3);

// Make two pair sets. This implicitly adds node 4.
union_find.union(1, 2);
union_find.union(3, 4);
assert_eq!(union_find.find(1), union_find.find(2));
assert_eq!(union_find.find(3), union_find.find(4));
assert_ne!(union_find.find(1), union_find.find(3));

// Unioning the pairs gives everything the same root.
union_find.union(1, 3);
assert!([
union_find.find(1),
union_find.find(2),
union_find.find(3),
union_find.find(4),
]
.iter()
.all_equal());
}

#[test]
fn test_union_by_size() {
let mut union_find = UnionFind::<i32>::new();

// Create a set of 3 and a set of 2.
union_find.union(1, 2);
union_find.union(2, 3);
union_find.union(4, 5);
let set3 = union_find.find(1);
let set2 = union_find.find(4);
assert_ne!(set3, set2);

// Merging them always chooses the larger set.
let mut large_first = union_find.clone();
large_first.union(1, 4);
assert_eq!(large_first.find(1), set3);
assert_eq!(large_first.find(4), set3);

let mut small_first = union_find.clone();
small_first.union(4, 1);
assert_eq!(small_first.find(1), set3);
assert_eq!(small_first.find(4), set3);
}
}
Loading