From 3c7aa75b9b807509624364209dad2330ffa7e8f6 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 22 Dec 2023 17:32:49 +0900 Subject: [PATCH] index: switch to persistent change id index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shortest change id prefix will become a few digits longer, but I think that's acceptable. Entries included in the "revsets.short-prefixes" set are unaffected. The reachable set is calculated eagerly, but this is still faster as we no longer need to sort the reachable entries by change id. The lazy version will save another ~100ms in mid-size repos. "jj log" without working copy snapshot: ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=\"\"'" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 353.6 ms ± 11.9 ms [User: 266.7 ms, System: 87.0 ms] Range (min … max): 329.0 ms … 365.6 ms 20 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 271.3 ms ± 9.9 ms [User: 183.8 ms, System: 87.7 ms] Range (min … max): 250.5 ms … 282.7 ms 20 runs Relative speed comparison 1.99 ± 0.16 target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' 1.53 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' ``` "jj status" with working copy snapshot (watchman enabled): ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ status --config-toml='revsets.short-prefixes=\"\"'" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 396.6 ms ± 10.1 ms [User: 300.7 ms, System: 94.0 ms] Range (min … max): 373.6 ms … 408.0 ms 20 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 318.6 ms ± 12.6 ms [User: 219.1 ms, System: 94.1 ms] Range (min … max): 294.2 ms … 333.0 ms 20 runs Relative speed comparison 1.85 ± 0.14 target/release-with-debug/jj-0 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' 1.48 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' ``` --- cli/tests/test_log_command.rs | 8 ++-- lib/src/default_index/composite.rs | 75 +++++++++++++++++++----------- lib/src/default_index/readonly.rs | 1 - lib/tests/test_index.rs | 17 ++----- 4 files changed, 56 insertions(+), 45 deletions(-) diff --git a/cli/tests/test_log_command.rs b/cli/tests/test_log_command.rs index 6721d29efb..1dd598e9d3 100644 --- a/cli/tests/test_log_command.rs +++ b/cli/tests/test_log_command.rs @@ -606,10 +606,10 @@ fn test_log_prefix_highlight_counts_hidden_commits() { insta::assert_snapshot!( test_env.jj_cmd_success(&repo_path, &["log", "-T", prefix_format]), @r###" - @ Change w[qnwkozpkust] 44[4c3c5066d3] - │ ◉ Change q[pvuntsmwlqt] initial ba[1a30916d29] original + @ Change wq[nwkozpkust] 44[4c3c5066d3] + │ ◉ Change qpv[untsmwlqt] initial ba[1a30916d29] original ├─╯ - ◉ Change z[zzzzzzzzzzz] 00[0000000000] + ◉ Change zzz[zzzzzzzzz] 00[0000000000] "### ); insta::assert_snapshot!( @@ -621,7 +621,7 @@ fn test_log_prefix_highlight_counts_hidden_commits() { insta::assert_snapshot!( test_env.jj_cmd_success(&repo_path, &["log", "-r", "44", "-T", prefix_format]), @r###" - @ Change w[qnwkozpkust] 44[4c3c5066d3] + @ Change wq[nwkozpkust] 44[4c3c5066d3] │ ~ "### diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 3e8eac08b9..4cc773ee3c 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -30,7 +30,6 @@ use super::rev_walk::RevWalk; use super::revset_engine; use crate::backend::{ChangeId, CommitId}; use crate::hex_util; -use crate::id_prefix::{IdIndex, IdIndexSource, IdIndexSourceEntry}; use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index}; use crate::object_id::{HexPrefix, ObjectId, PrefixResolution}; use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; @@ -202,7 +201,6 @@ impl<'a> CompositeIndex<'a> { /// Suppose the given `change_id` exists, returns the minimum prefix length /// to disambiguate it within all the indexed ids including hidden ones. - #[cfg(test)] // TODO pub(super) fn shortest_unique_change_id_prefix_len(&self, change_id: &ChangeId) -> usize { let (prev_id, next_id) = self.resolve_neighbor_change_ids(change_id); itertools::chain(prev_id, next_id) @@ -214,7 +212,6 @@ impl<'a> CompositeIndex<'a> { /// Suppose the given `change_id` exists, returns the previous and next /// change ids in lexicographical order. The returned change ids may be /// hidden. - #[cfg(test)] // TODO pub(super) fn resolve_neighbor_change_ids( &self, change_id: &ChangeId, @@ -234,7 +231,6 @@ impl<'a> CompositeIndex<'a> { /// returned entries may be hidden. /// /// The returned index positions are sorted in ascending order. - #[cfg(test)] // TODO pub(super) fn resolve_change_id_prefix( &self, prefix: &HexPrefix, @@ -500,50 +496,73 @@ impl Index for CompositeIndex<'_> { pub(super) struct ChangeIdIndexImpl { index: I, - pos_by_change: IdIndex, + reachable_bitset: Vec, } impl ChangeIdIndexImpl { pub fn new(index: I, heads: &mut dyn Iterator) -> ChangeIdIndexImpl { - let mut pos_by_change = IdIndex::builder(); + // TODO: Calculate reachable bitset lazily. let composite = index.as_composite(); + let bitset_len = + usize::try_from(u32::div_ceil(composite.num_commits(), u64::BITS)).unwrap(); + let mut reachable_bitset = vec![0; bitset_len]; // request zeroed page let head_positions = heads .map(|id| composite.commit_id_to_pos(id).unwrap()) .collect_vec(); for entry in composite.walk_revs(&head_positions, &[]) { - pos_by_change.insert(&entry.change_id(), entry.position()); + let IndexPosition(pos) = entry.position(); + let bitset_pos = pos / u64::BITS; + let bit = 1_u64 << (pos % u64::BITS); + reachable_bitset[usize::try_from(bitset_pos).unwrap()] |= bit; } - Self { + ChangeIdIndexImpl { index, - pos_by_change: pos_by_change.build(), + reachable_bitset, } } } impl ChangeIdIndex for ChangeIdIndexImpl { + /// Resolves change id prefix among all ids, then filters out hidden + /// entries. + /// + /// If `SingleMatch` is returned, the commits including in the set are all + /// visible. `AmbiguousMatch` may be returned even if the prefix is unique + /// within the visible entries. fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution> { - self.pos_by_change - .resolve_prefix_with(self.index.as_composite(), prefix, |entry| entry.commit_id()) - .map(|(_, commit_ids)| commit_ids) + let index = self.index.as_composite(); + match index.resolve_change_id_prefix(prefix) { + PrefixResolution::NoMatch => PrefixResolution::NoMatch, + PrefixResolution::SingleMatch((_change_id, positions)) => { + let reachable_commit_ids = positions + .iter() + .filter(|IndexPosition(pos)| { + let bitset_pos = pos / u64::BITS; + let bit = 1_u64 << (pos % u64::BITS); + let bits = self.reachable_bitset[usize::try_from(bitset_pos).unwrap()]; + bits & bit != 0 + }) + .map(|&pos| index.entry_by_pos(pos).commit_id()) + .collect_vec(); + if reachable_commit_ids.is_empty() { + PrefixResolution::NoMatch + } else { + PrefixResolution::SingleMatch(reachable_commit_ids) + } + } + PrefixResolution::AmbiguousMatch => PrefixResolution::AmbiguousMatch, + } } + /// Calculates the shortest prefix length of the given `change_id` among all + /// ids including hidden entries. + /// + /// The returned length is usually a few digits longer than the minimum + /// length to disambiguate within the visible entries. fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize { - self.pos_by_change - .shortest_unique_prefix_len(self.index.as_composite(), change_id) - } -} - -impl<'index> IdIndexSource for CompositeIndex<'index> { - type Entry = IndexEntry<'index>; - - fn entry_at(&self, pointer: &IndexPosition) -> Self::Entry { - self.entry_by_pos(*pointer) - } -} - -impl IdIndexSourceEntry for IndexEntry<'_> { - fn to_key(&self) -> ChangeId { - self.change_id() + self.index + .as_composite() + .shortest_unique_change_id_prefix_len(change_id) } } diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index 688729ff15..7b7157c4ae 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -652,7 +652,6 @@ impl ReadonlyIndex for DefaultReadonlyIndex { self } - // TODO: Create a persistent lookup from change id to commit ids. fn change_id_index( &self, heads: &mut dyn Iterator, diff --git a/lib/tests/test_index.rs b/lib/tests/test_index.rs index f08b420959..16a7f9ce39 100644 --- a/lib/tests/test_index.rs +++ b/lib/tests/test_index.rs @@ -821,14 +821,10 @@ fn test_change_id_index() { // No match assert_eq!(resolve_prefix("ba"), PrefixResolution::NoMatch); - // Test with a revset containing only some of the commits. We should get shorter - // prefixes and be able to resolve shorter prefixes. + // Test with an index containing only some of the commits. The shortest + // length doesn't have to be minimized further, but unreachable commits + // should never be included in the resolved set. let change_id_index = index_for_heads(&[&commit_1, &commit_2]); - let prefix_len = - |commit: &Commit| change_id_index.shortest_unique_prefix_len(commit.change_id()); - assert_eq!(prefix_len(&commit_1), 2); - assert_eq!(prefix_len(&commit_2), 2); - assert_eq!(prefix_len(&commit_3), 6); let resolve_prefix = |prefix: &str| { change_id_index .resolve_prefix(&HexPrefix::new(prefix).unwrap()) @@ -839,13 +835,10 @@ fn test_change_id_index() { PrefixResolution::SingleMatch(hashset! {root_commit.id().clone()}) ); assert_eq!( - resolve_prefix("aa"), + resolve_prefix("aaaaab"), PrefixResolution::SingleMatch(hashset! {commit_2.id().clone()}) ); - assert_eq!( - resolve_prefix("ab"), - PrefixResolution::SingleMatch(hashset! {commit_1.id().clone()}) - ); + assert_eq!(resolve_prefix("aaaaaa"), PrefixResolution::NoMatch); assert_eq!(resolve_prefix("a"), PrefixResolution::AmbiguousMatch); assert_eq!(resolve_prefix("b"), PrefixResolution::NoMatch); }