From 0b99b379e1db68c24cda963d2d81a5a4a2383d1e Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk <martinvonz@google.com> Date: Fri, 5 May 2023 17:12:08 -0700 Subject: [PATCH] prefixes: allow resolving shorter ids within a revset In large repos, the unique prefixes can get somewhat long (~6 hex digits seems typical in the Linux repo), which makes them less useful for manually entering on the CLI. The user typically cares most about a small set of commits, so it would be nice to give shorter unique ids to those. That's what Mercurial enables with its `experimental.revisions.disambiguatewithin` config. This commit provides an implementation of that feature in `IdPrefixContext`. In very large repos, it can also be slow to calculate the unique prefixes, especially if it involves a request to a server. This feature becomes much more important in such repos. --- lib/src/id_prefix.rs | 116 +++++++++++++++++++-- lib/tests/test_id_prefix.rs | 201 ++++++++++++++++++++++++++++++++++++ 2 files changed, 311 insertions(+), 6 deletions(-) create mode 100644 lib/tests/test_id_prefix.rs diff --git a/lib/src/id_prefix.rs b/lib/src/id_prefix.rs index a93dfdeabe..6598177060 100644 --- a/lib/src/id_prefix.rs +++ b/lib/src/id_prefix.rs @@ -12,27 +12,114 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::rc::Rc; + +use once_cell::unsync::OnceCell; + use crate::backend::{self, ChangeId, CommitId, ObjectId}; use crate::index::{HexPrefix, PrefixResolution}; +use crate::op_store::WorkspaceId; use crate::repo::Repo; +use crate::revset::{DefaultSymbolResolver, RevsetExpression, RevsetIteratorExt}; + +struct PrefixDisambiguationError; + +struct DisambiguationData<'a> { + expression: Rc<RevsetExpression>, + workspace_id: Option<&'a WorkspaceId>, + // TODO: We shouldn't have to duplicate the CommitId as value + indexes: OnceCell<Indexes>, +} + +struct Indexes { + commit_index: IdIndex<CommitId, CommitId>, + change_index: IdIndex<ChangeId, CommitId>, +} -pub struct IdPrefixContext<'repo> { - repo: &'repo dyn Repo, +impl<'a> DisambiguationData<'a> { + fn indexes(&self, repo: &'a dyn Repo) -> Result<&Indexes, PrefixDisambiguationError> { + self.indexes.get_or_try_init(|| { + let symbol_resolver = DefaultSymbolResolver::new(repo, self.workspace_id); + let resolved_expression = self + .expression + .clone() + .resolve_user_expression(repo, &symbol_resolver) + .map_err(|_| PrefixDisambiguationError)?; + let revset = resolved_expression + .evaluate(repo) + .map_err(|_| PrefixDisambiguationError)?; + + // TODO: We should be able to get the change IDs from the revset, without having + // to read the whole commit objects + let mut commit_id_vec = vec![]; + let mut change_id_vec = vec![]; + for commit in revset.iter().commits(repo.store()) { + let commit = commit.map_err(|_| PrefixDisambiguationError)?; + commit_id_vec.push((commit.id().clone(), commit.id().clone())); + change_id_vec.push((commit.change_id().clone(), commit.id().clone())); + } + Ok(Indexes { + commit_index: IdIndex::from_vec(commit_id_vec), + change_index: IdIndex::from_vec(change_id_vec), + }) + }) + } } -impl<'repo> IdPrefixContext<'repo> { - pub fn new(repo: &'repo dyn Repo) -> IdPrefixContext<'repo> { - IdPrefixContext { repo } +pub struct IdPrefixContext<'a> { + repo: &'a dyn Repo, + disambiguation: Option<DisambiguationData<'a>>, +} + +impl<'a> IdPrefixContext<'a> { + pub fn new(repo: &'a dyn Repo) -> IdPrefixContext<'a> { + IdPrefixContext { + repo, + disambiguation: None, + } + } + + pub fn disambiguate_within( + mut self, + expression: Rc<RevsetExpression>, + workspace_id: Option<&'a WorkspaceId>, + ) -> Self { + self.disambiguation = Some(DisambiguationData { + workspace_id, + expression, + indexes: OnceCell::new(), + }); + self + } + + fn disambiguation_indexes(&self) -> Option<&Indexes> { + // TODO: propagate errors instead of treating them as if no revset was specified + self.disambiguation + .as_ref() + .and_then(|disambiguation| disambiguation.indexes(self.repo).ok()) } /// Resolve an unambiguous commit ID prefix. pub fn resolve_commit_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> { + if let Some(indexes) = self.disambiguation_indexes() { + let resolution = indexes.commit_index.resolve_prefix(prefix); + if let PrefixResolution::SingleMatch(mut ids) = resolution { + assert_eq!(ids.len(), 1); + return PrefixResolution::SingleMatch(ids.pop().unwrap()); + } + } self.repo.index().resolve_prefix(prefix) } /// Returns the shortest length of a prefix of `commit_id` that /// can still be resolved by `resolve_commit_prefix()`. pub fn shortest_commit_prefix_len(&self, commit_id: &CommitId) -> usize { + if let Some(indexes) = self.disambiguation_indexes() { + // TODO: Avoid the double lookup here (has_key() + shortest_unique_prefix_len()) + if indexes.commit_index.has_key(commit_id) { + return indexes.commit_index.shortest_unique_prefix_len(commit_id); + } + } self.repo .index() .shortest_unique_commit_id_prefix_len(commit_id) @@ -40,12 +127,23 @@ impl<'repo> IdPrefixContext<'repo> { /// Resolve an unambiguous change ID prefix to the commit IDs in the revset. pub fn resolve_change_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> { + if let Some(indexes) = self.disambiguation_indexes() { + let resolution = indexes.change_index.resolve_prefix(prefix); + if let PrefixResolution::SingleMatch(ids) = resolution { + return PrefixResolution::SingleMatch(ids); + } + } self.repo.resolve_change_id_prefix(prefix) } /// Returns the shortest length of a prefix of `change_id` that /// can still be resolved by `resolve_change_prefix()`. pub fn shortest_change_prefix_len(&self, change_id: &ChangeId) -> usize { + if let Some(indexes) = self.disambiguation_indexes() { + if indexes.change_index.has_key(change_id) { + return indexes.change_index.shortest_unique_prefix_len(change_id); + } + } self.repo.shortest_unique_change_id_prefix_len(change_id) } } @@ -71,6 +169,10 @@ where prefix: &HexPrefix, mut value_mapper: impl FnMut(&V) -> U, ) -> PrefixResolution<Vec<U>> { + if prefix.min_prefix_bytes().is_empty() { + // We consider an empty prefix ambiguous even if the index has a single entry. + return PrefixResolution::AmbiguousMatch; + } let mut range = self.resolve_prefix_range(prefix).peekable(); if let Some((first_key, _)) = range.peek().copied() { let maybe_entries: Option<Vec<_>> = range @@ -136,6 +238,8 @@ where }) .max() .unwrap_or(0) + // Even if the key is the only one in the index, we require at least one digit. + .max(1) } } @@ -214,7 +318,7 @@ mod tests { let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>); assert_eq!( id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")), - 0 + 1 ); let id_index = IdIndex::from_vec(vec![ diff --git a/lib/tests/test_id_prefix.rs b/lib/tests/test_id_prefix.rs new file mode 100644 index 0000000000..40b9320780 --- /dev/null +++ b/lib/tests/test_id_prefix.rs @@ -0,0 +1,201 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use itertools::Itertools; +use jujutsu_lib::backend::{CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp}; +use jujutsu_lib::id_prefix::IdPrefixContext; +use jujutsu_lib::index::HexPrefix; +use jujutsu_lib::index::PrefixResolution::{AmbiguousMatch, NoMatch, SingleMatch}; +use jujutsu_lib::repo::Repo; +use jujutsu_lib::revset::RevsetExpression; +use testutils::TestRepo; + +#[test] +fn test_id_prefix() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(true); + let repo = &test_repo.repo; + let root_commit_id = repo.store().root_commit_id(); + let root_change_id = repo.store().root_change_id(); + + let mut tx = repo.start_transaction(&settings, "test"); + let mut create_commit = |parent_id: &CommitId| { + let signature = Signature { + name: "Some One".to_string(), + email: "some.one@example.com".to_string(), + timestamp: Timestamp { + timestamp: MillisSinceEpoch(0), + tz_offset: 0, + }, + }; + tx.mut_repo() + .new_commit( + &settings, + vec![parent_id.clone()], + repo.store().empty_tree_id().clone(), + ) + .set_author(signature.clone()) + .set_committer(signature) + .write() + .unwrap() + }; + let mut commits = vec![create_commit(root_commit_id)]; + for _ in 0..25 { + commits.push(create_commit(commits.last().unwrap().id())); + } + let repo = tx.commit(); + + // Print the commit IDs and change IDs for reference + let commit_prefixes = commits + .iter() + .enumerate() + .map(|(i, commit)| format!("{} {}", &commit.id().hex()[..3], i)) + .sorted() + .join("\n"); + insta::assert_snapshot!(commit_prefixes, @r###" + 11a 5 + 214 24 + 2a6 2 + 33e 14 + 3be 16 + 3ea 18 + 593 20 + 5d3 1 + 5f6 13 + 676 3 + 7b6 25 + 7da 9 + 81c 10 + 87e 12 + 997 21 + 9f7 22 + a0e 4 + a55 19 + ac4 23 + c18 17 + ce9 0 + d42 6 + d9d 8 + eec 15 + efe 7 + fa3 11 + "###); + let change_prefixes = commits + .iter() + .enumerate() + .map(|(i, commit)| format!("{} {}", &commit.change_id().hex()[..3], i)) + .sorted() + .join("\n"); + insta::assert_snapshot!(change_prefixes, @r###" + 026 9 + 030 13 + 1b5 6 + 26b 3 + 26c 8 + 271 10 + 439 2 + 44a 17 + 4e9 16 + 5b2 23 + 6c2 19 + 781 0 + 79f 14 + 7d5 24 + 86b 20 + 871 7 + 896 5 + 9e4 18 + a2c 1 + a63 22 + b19 11 + b93 4 + bf5 21 + c24 15 + d64 12 + fee 25 + "###); + + let prefix = |x| HexPrefix::new(x).unwrap(); + + // Without a disambiguation revset + // --------------------------------------------------------------------------------------------- + let c = IdPrefixContext::new(repo.as_ref()); + assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 2); + assert_eq!(c.shortest_commit_prefix_len(commits[5].id()), 1); + assert_eq!(c.resolve_commit_prefix(&prefix("2")), AmbiguousMatch); + assert_eq!( + c.resolve_commit_prefix(&prefix("2a")), + SingleMatch(commits[2].id().clone()) + ); + assert_eq!(c.resolve_commit_prefix(&prefix("20")), NoMatch); + assert_eq!(c.resolve_commit_prefix(&prefix("2a0")), NoMatch); + assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 2); + assert_eq!(c.shortest_change_prefix_len(commits[6].change_id()), 1); + assert_eq!(c.resolve_change_prefix(&prefix("7")), AmbiguousMatch); + assert_eq!( + c.resolve_change_prefix(&prefix("78")), + SingleMatch(vec![commits[0].id().clone()]) + ); + assert_eq!(c.resolve_change_prefix(&prefix("70")), NoMatch); + assert_eq!(c.resolve_change_prefix(&prefix("780")), NoMatch); + + // Disambiguate within a revset + // --------------------------------------------------------------------------------------------- + let expression = + RevsetExpression::commits(vec![commits[0].id().clone(), commits[2].id().clone()]); + let c = c.disambiguate_within(expression, None); + // The prefix is now shorter + assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 1); + // Shorter prefix within the set can be used + assert_eq!( + c.resolve_commit_prefix(&prefix("2")), + SingleMatch(commits[2].id().clone()) + ); + // Can still resolve commits outside the set + assert_eq!( + c.resolve_commit_prefix(&prefix("21")), + SingleMatch(commits[24].id().clone()) + ); + assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 1); + assert_eq!( + c.resolve_change_prefix(&prefix("7")), + SingleMatch(vec![commits[0].id().clone()]) + ); + + // Single commit in revset. Length 0 is unambiguous, but we pretend 1 digit is + // needed. + // --------------------------------------------------------------------------------------------- + let expression = RevsetExpression::commit(root_commit_id.clone()); + let c = c.disambiguate_within(expression, None); + assert_eq!(c.shortest_commit_prefix_len(root_commit_id), 1); + assert_eq!(c.resolve_commit_prefix(&prefix("")), AmbiguousMatch); + assert_eq!( + c.resolve_commit_prefix(&prefix("0")), + SingleMatch(root_commit_id.clone()) + ); + assert_eq!(c.shortest_change_prefix_len(root_change_id), 1); + assert_eq!(c.resolve_change_prefix(&prefix("")), AmbiguousMatch); + assert_eq!( + c.resolve_change_prefix(&prefix("0")), + SingleMatch(vec![root_commit_id.clone()]) + ); + + // Disambiguate within revset that fails to evaluate + // --------------------------------------------------------------------------------------------- + // TODO: Should be an error + let expression = RevsetExpression::symbol("nonexistent".to_string()); + let context = c.disambiguate_within(expression, None); + assert_eq!(context.shortest_commit_prefix_len(commits[2].id()), 2); + assert_eq!(context.resolve_commit_prefix(&prefix("2")), AmbiguousMatch); +}