Skip to content

Commit

Permalink
index: build reachable change ids set lazily
Browse files Browse the repository at this point in the history
Instead of abstracting RevWalk over borrowed/Arc-ed index types, I decided to
implement bitset-based ancestor traversal. It's simpler and probably faster so
long as the set isn't sparse.

"jj log" without working copy snapshot:
```
% hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \
  -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \
  "target/release-with-debug/{bin} -R ~/mirrors/linux \
   --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=\"\"'"
Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""'
  Time (mean ± σ):     271.3 ms ±   9.9 ms    [User: 183.8 ms, System: 87.7 ms]
  Range (min … max):   250.5 ms … 282.7 ms    20 runs

Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""'
  Time (mean ± σ):     177.5 ms ±  12.6 ms    [User: 94.6 ms, System: 82.9 ms]
  Range (min … max):   154.4 ms … 188.7 ms    20 runs

Relative speed comparison
        1.53 ±  0.12  target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""'
        1.00          target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""'
```

"jj status" with working copy snapshot (watchman enabled):
```
% hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \
  -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \
  "target/release-with-debug/{bin} -R ~/mirrors/linux \
   status --config-toml='revsets.short-prefixes=\"\"'"
Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""'
  Time (mean ± σ):     318.6 ms ±  12.6 ms    [User: 219.1 ms, System: 94.1 ms]
  Range (min … max):   294.2 ms … 333.0 ms    20 runs

Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""'
  Time (mean ± σ):     214.7 ms ±  15.0 ms    [User: 117.4 ms, System: 96.1 ms]
  Range (min … max):   198.4 ms … 243.3 ms    20 runs

Relative speed comparison
        1.48 ±  0.12  target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""'
        1.00          target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""'
```
  • Loading branch information
yuja committed Feb 18, 2024
1 parent adcb01e commit a1b16c5
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 22 deletions.
33 changes: 11 additions & 22 deletions lib/src/default_index/composite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
use std::cmp::{max, min, Ordering};
use std::collections::{BTreeSet, BinaryHeap, HashSet};
use std::iter;
use std::sync::Arc;
use std::sync::{Arc, Mutex};

use itertools::Itertools;

Expand All @@ -26,7 +26,7 @@ use super::entry::{
SmallLocalPositionsVec,
};
use super::readonly::ReadonlyIndexSegment;
use super::rev_walk::RevWalk;
use super::rev_walk::{AncestorsBitSet, RevWalk};
use super::revset_engine;
use crate::backend::{ChangeId, CommitId};
use crate::hex_util;
Expand Down Expand Up @@ -496,28 +496,19 @@ impl Index for CompositeIndex<'_> {

pub(super) struct ChangeIdIndexImpl<I> {
index: I,
reachable_bitset: Vec<u64>,
reachable_set: Mutex<AncestorsBitSet>,
}

impl<I: AsCompositeIndex> ChangeIdIndexImpl<I> {
pub fn new(index: I, heads: &mut dyn Iterator<Item = &CommitId>) -> ChangeIdIndexImpl<I> {
// TODO: Calculate reachable bitset lazily.
let composite = index.as_composite();
let bitset_len =
usize::try_from(u32::div_ceil(composite.num_commits(), u64::BITS)).unwrap();
let mut reachable_bitset = vec![0; bitset_len]; // request zeroed page
let head_positions = heads
.map(|id| composite.commit_id_to_pos(id).unwrap())
.collect_vec();
for entry in composite.walk_revs(&head_positions, &[]) {
let IndexPosition(pos) = entry.position();
let bitset_pos = pos / u64::BITS;
let bit = 1_u64 << (pos % u64::BITS);
reachable_bitset[usize::try_from(bitset_pos).unwrap()] |= bit;
let mut reachable_set = AncestorsBitSet::with_capacity(composite.num_commits());
for id in heads {
reachable_set.add_head(composite.commit_id_to_pos(id).unwrap());
}
ChangeIdIndexImpl {
index,
reachable_bitset,
reachable_set: Mutex::new(reachable_set),
}
}
}
Expand All @@ -534,14 +525,12 @@ impl<I: AsCompositeIndex + Send + Sync> ChangeIdIndex for ChangeIdIndexImpl<I> {
match index.resolve_change_id_prefix(prefix) {
PrefixResolution::NoMatch => PrefixResolution::NoMatch,
PrefixResolution::SingleMatch((_change_id, positions)) => {
debug_assert!(positions.iter().tuple_windows().all(|(a, b)| a < b));
let mut reachable_set = self.reachable_set.lock().unwrap();
reachable_set.visit_until(index, *positions.first().unwrap());
let reachable_commit_ids = positions
.iter()
.filter(|IndexPosition(pos)| {
let bitset_pos = pos / u64::BITS;
let bit = 1_u64 << (pos % u64::BITS);
let bits = self.reachable_bitset[usize::try_from(bitset_pos).unwrap()];
bits & bit != 0
})
.filter(|&&pos| reachable_set.contains(pos))
.map(|&pos| index.entry_by_pos(pos).commit_id())
.collect_vec();
if reachable_commit_ids.is_empty() {
Expand Down
209 changes: 209 additions & 0 deletions lib/src/default_index/rev_walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,78 @@ impl<'a> Iterator for RevWalkDescendants<'a> {

impl FusedIterator for RevWalkDescendants<'_> {}

/// Computes ancestors set lazily.
///
/// This is similar to `RevWalk` functionality-wise, but implemented with the
/// different design goals:
///
/// * lazy updates with no lifetimed fields
/// * optimized for dense ancestors set
/// * optimized for testing set membership
/// * no iterator API (which could be implemented on top)
#[derive(Clone, Debug)]
pub(super) struct AncestorsBitSet {
bitset: Vec<u64>,
last_visited_bitset_pos: u32,
}

impl AncestorsBitSet {
/// Creates bit set of the specified capacity.
pub fn with_capacity(len: u32) -> Self {
let bitset_len = usize::try_from(u32::div_ceil(len, u64::BITS)).unwrap();
AncestorsBitSet {
bitset: vec![0; bitset_len], // request zeroed page
last_visited_bitset_pos: 0,
}
}

/// Adds head `pos` to the set.
///
/// Panics if the `pos` exceeds the capacity.
pub fn add_head(&mut self, pos: IndexPosition) {
let bitset_pos = pos.0 / u64::BITS;
let bit = 1_u64 << (pos.0 % u64::BITS);
self.bitset[usize::try_from(bitset_pos).unwrap()] |= bit;
self.last_visited_bitset_pos = max(self.last_visited_bitset_pos, bitset_pos + 1);
}

/// Returns `true` if the given `pos` is ancestors of the heads.
///
/// Panics if the `pos` exceeds the capacity or has not been visited yet.
pub fn contains(&self, pos: IndexPosition) -> bool {
let bitset_pos = pos.0 / u64::BITS;
let bit = 1_u64 << (pos.0 % u64::BITS);
assert!(bitset_pos >= self.last_visited_bitset_pos);
self.bitset[usize::try_from(bitset_pos).unwrap()] & bit != 0
}

/// Updates set by visiting ancestors until the given `to_visit_pos`.
pub fn visit_until(&mut self, index: CompositeIndex, to_visit_pos: IndexPosition) {
let to_visit_bitset_pos = to_visit_pos.0 / u64::BITS;
if to_visit_bitset_pos >= self.last_visited_bitset_pos {
return;
}
for visiting_bitset_pos in (to_visit_bitset_pos..self.last_visited_bitset_pos).rev() {
let mut unvisited_bits = self.bitset[usize::try_from(visiting_bitset_pos).unwrap()];
while unvisited_bits != 0 {
let bit_pos = u64::BITS - unvisited_bits.leading_zeros() - 1; // from MSB
unvisited_bits ^= 1_u64 << bit_pos;
let current_pos = IndexPosition(visiting_bitset_pos * u64::BITS + bit_pos);
for parent_pos in index.entry_by_pos(current_pos).parent_positions() {
assert!(parent_pos < current_pos);
let parent_bitset_pos = parent_pos.0 / u64::BITS;
let bit = 1_u64 << (parent_pos.0 % u64::BITS);
self.bitset[usize::try_from(parent_bitset_pos).unwrap()] |= bit;
if visiting_bitset_pos == parent_bitset_pos {
unvisited_bits |= bit;
}
}
}
}
self.last_visited_bitset_pos = to_visit_bitset_pos;
}
}

#[cfg(test)]
mod tests {
use itertools::Itertools as _;
Expand All @@ -512,6 +584,12 @@ mod tests {
use super::*;
use crate::backend::{ChangeId, CommitId};

/// Generator of unique 16-byte CommitId excluding root id
fn commit_id_generator() -> impl FnMut() -> CommitId {
let mut iter = (1_u128..).map(|n| CommitId::new(n.to_le_bytes().into()));
move || iter.next().unwrap()
}

/// Generator of unique 16-byte ChangeId excluding root id
fn change_id_generator() -> impl FnMut() -> ChangeId {
let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into()));
Expand Down Expand Up @@ -864,4 +942,135 @@ mod tests {
[&id_2, &id_4, &id_7].map(Clone::clone)
);
}

#[test]
fn test_ancestors_bit_set() {
let mut new_commit_id = commit_id_generator();
let mut new_change_id = change_id_generator();
let mut mutable_index = DefaultMutableIndex::full(16, 16);

// F F = 256
// |\ E = 193,194,195,..,254
// E | D D = 192,255
// | |/ C = 66,68,70,..,190
// B C B = 65,67,69,..,189,191
// |/ A = 0,1,2,..,64
// A
let id_a0 = new_commit_id();
mutable_index.add_commit_data(id_a0.clone(), new_change_id(), &[]);
let id_a64 = (1..=64).fold(id_a0.clone(), |parent_id, i| {
assert_eq!(mutable_index.as_composite().num_commits(), i);
let id = new_commit_id();
mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]);
id
});
let (id_b189, id_c190) = (65..=190).step_by(2).fold(
(id_a64.clone(), id_a64.clone()),
|(parent_id_b, parent_id_c), i| {
assert_eq!(mutable_index.as_composite().num_commits(), i);
let id_b = new_commit_id();
let id_c = new_commit_id();
mutable_index.add_commit_data(id_b.clone(), new_change_id(), &[parent_id_b]);
mutable_index.add_commit_data(id_c.clone(), new_change_id(), &[parent_id_c]);
(id_b, id_c)
},
);
let id_b191 = new_commit_id();
mutable_index.add_commit_data(id_b191.clone(), new_change_id(), &[id_b189]);
let id_d192 = new_commit_id();
mutable_index.add_commit_data(id_d192.clone(), new_change_id(), &[id_c190.clone()]);
let id_e254 = (193..=254).fold(id_b191.clone(), |parent_id, i| {
assert_eq!(mutable_index.as_composite().num_commits(), i);
let id = new_commit_id();
mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]);
id
});
let id_d255 = new_commit_id();
mutable_index.add_commit_data(id_d255.clone(), new_change_id(), &[id_d192.clone()]);
let id_f256 = new_commit_id();
mutable_index.add_commit_data(
id_f256.clone(),
new_change_id(),
&[id_c190.clone(), id_e254.clone()],
);
assert_eq!(mutable_index.as_composite().num_commits(), 257);

let index = mutable_index.as_composite();
let to_pos = |id: &CommitId| index.commit_id_to_pos(id).unwrap();
let new_ancestors_set = |heads: &[&CommitId]| {
let mut set = AncestorsBitSet::with_capacity(index.num_commits());
for &id in heads {
set.add_head(to_pos(id));
}
set
};

// Nothing reachable
let set = new_ancestors_set(&[]);
assert_eq!(set.last_visited_bitset_pos, 0);
for pos in (0..=256).map(IndexPosition) {
assert!(!set.contains(pos), "{pos:?} should be unreachable");
}

// All reachable
let mut set = new_ancestors_set(&[&id_f256, &id_d255]);
assert_eq!(set.last_visited_bitset_pos, 5);
set.visit_until(index, to_pos(&id_f256));
assert_eq!(set.last_visited_bitset_pos, 4);
assert!(set.contains(to_pos(&id_f256)));
set.visit_until(index, to_pos(&id_d192));
assert_eq!(set.last_visited_bitset_pos, 3);
assert!(set.contains(to_pos(&id_e254)));
assert!(set.contains(to_pos(&id_d255)));
assert!(set.contains(to_pos(&id_d192)));
set.visit_until(index, to_pos(&id_a0));
assert_eq!(set.last_visited_bitset_pos, 0);
set.visit_until(index, to_pos(&id_f256)); // should be noop
assert_eq!(set.last_visited_bitset_pos, 0);
for pos in (0..=256).map(IndexPosition) {
assert!(set.contains(pos), "{pos:?} should be reachable");
}

// A, B, C, E, F are reachable
let mut set = new_ancestors_set(&[&id_f256]);
assert_eq!(set.last_visited_bitset_pos, 5);
set.visit_until(index, to_pos(&id_f256));
assert_eq!(set.last_visited_bitset_pos, 4);
assert!(set.contains(to_pos(&id_f256)));
set.visit_until(index, to_pos(&id_d192));
assert_eq!(set.last_visited_bitset_pos, 3);
assert!(!set.contains(to_pos(&id_d255)));
assert!(!set.contains(to_pos(&id_d192)));
set.visit_until(index, to_pos(&id_c190));
assert_eq!(set.last_visited_bitset_pos, 2);
assert!(set.contains(to_pos(&id_c190)));
set.visit_until(index, to_pos(&id_a64));
assert_eq!(set.last_visited_bitset_pos, 1);
assert!(set.contains(to_pos(&id_b191)));
assert!(set.contains(to_pos(&id_a64)));
set.visit_until(index, to_pos(&id_a0));
assert_eq!(set.last_visited_bitset_pos, 0);
assert!(set.contains(to_pos(&id_a0)));

// A, C, D are reachable
let mut set = new_ancestors_set(&[&id_d255]);
assert_eq!(set.last_visited_bitset_pos, 4);
assert!(!set.contains(to_pos(&id_f256)));
set.visit_until(index, to_pos(&id_e254));
assert_eq!(set.last_visited_bitset_pos, 3);
assert!(!set.contains(to_pos(&id_e254)));
set.visit_until(index, to_pos(&id_d255));
assert_eq!(set.last_visited_bitset_pos, 3);
assert!(set.contains(to_pos(&id_d255)));
set.visit_until(index, to_pos(&id_b191));
assert_eq!(set.last_visited_bitset_pos, 2);
assert!(!set.contains(to_pos(&id_b191)));
set.visit_until(index, to_pos(&id_c190));
assert_eq!(set.last_visited_bitset_pos, 2);
assert!(set.contains(to_pos(&id_c190)));
set.visit_until(index, to_pos(&id_a0));
assert_eq!(set.last_visited_bitset_pos, 0);
assert!(set.contains(to_pos(&id_a64)));
assert!(set.contains(to_pos(&id_a0)));
}
}

0 comments on commit a1b16c5

Please sign in to comment.