From 0b99b379e1db68c24cda963d2d81a5a4a2383d1e Mon Sep 17 00:00:00 2001
From: Martin von Zweigbergk <martinvonz@google.com>
Date: Fri, 5 May 2023 17:12:08 -0700
Subject: [PATCH] prefixes: allow resolving shorter ids within a revset

In large repos, the unique prefixes can get somewhat long (~6 hex
digits seems typical in the Linux repo), which makes them less useful
for manually entering on the CLI. The user typically cares most about
a small set of commits, so it would be nice to give shorter unique ids
to those. That's what Mercurial enables with its
`experimental.revisions.disambiguatewithin` config. This commit
provides an implementation of that feature in `IdPrefixContext`.

In very large repos, it can also be slow to calculate the unique
prefixes, especially if it involves a request to a server. This
feature becomes much more important in such repos.
---
 lib/src/id_prefix.rs        | 116 +++++++++++++++++++--
 lib/tests/test_id_prefix.rs | 201 ++++++++++++++++++++++++++++++++++++
 2 files changed, 311 insertions(+), 6 deletions(-)
 create mode 100644 lib/tests/test_id_prefix.rs

diff --git a/lib/src/id_prefix.rs b/lib/src/id_prefix.rs
index a93dfdeabe..6598177060 100644
--- a/lib/src/id_prefix.rs
+++ b/lib/src/id_prefix.rs
@@ -12,27 +12,114 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::rc::Rc;
+
+use once_cell::unsync::OnceCell;
+
 use crate::backend::{self, ChangeId, CommitId, ObjectId};
 use crate::index::{HexPrefix, PrefixResolution};
+use crate::op_store::WorkspaceId;
 use crate::repo::Repo;
+use crate::revset::{DefaultSymbolResolver, RevsetExpression, RevsetIteratorExt};
+
+struct PrefixDisambiguationError;
+
+struct DisambiguationData<'a> {
+    expression: Rc<RevsetExpression>,
+    workspace_id: Option<&'a WorkspaceId>,
+    // TODO: We shouldn't have to duplicate the CommitId as value
+    indexes: OnceCell<Indexes>,
+}
+
+struct Indexes {
+    commit_index: IdIndex<CommitId, CommitId>,
+    change_index: IdIndex<ChangeId, CommitId>,
+}
 
-pub struct IdPrefixContext<'repo> {
-    repo: &'repo dyn Repo,
+impl<'a> DisambiguationData<'a> {
+    fn indexes(&self, repo: &'a dyn Repo) -> Result<&Indexes, PrefixDisambiguationError> {
+        self.indexes.get_or_try_init(|| {
+            let symbol_resolver = DefaultSymbolResolver::new(repo, self.workspace_id);
+            let resolved_expression = self
+                .expression
+                .clone()
+                .resolve_user_expression(repo, &symbol_resolver)
+                .map_err(|_| PrefixDisambiguationError)?;
+            let revset = resolved_expression
+                .evaluate(repo)
+                .map_err(|_| PrefixDisambiguationError)?;
+
+            // TODO: We should be able to get the change IDs from the revset, without having
+            // to read the whole commit objects
+            let mut commit_id_vec = vec![];
+            let mut change_id_vec = vec![];
+            for commit in revset.iter().commits(repo.store()) {
+                let commit = commit.map_err(|_| PrefixDisambiguationError)?;
+                commit_id_vec.push((commit.id().clone(), commit.id().clone()));
+                change_id_vec.push((commit.change_id().clone(), commit.id().clone()));
+            }
+            Ok(Indexes {
+                commit_index: IdIndex::from_vec(commit_id_vec),
+                change_index: IdIndex::from_vec(change_id_vec),
+            })
+        })
+    }
 }
 
-impl<'repo> IdPrefixContext<'repo> {
-    pub fn new(repo: &'repo dyn Repo) -> IdPrefixContext<'repo> {
-        IdPrefixContext { repo }
+pub struct IdPrefixContext<'a> {
+    repo: &'a dyn Repo,
+    disambiguation: Option<DisambiguationData<'a>>,
+}
+
+impl<'a> IdPrefixContext<'a> {
+    pub fn new(repo: &'a dyn Repo) -> IdPrefixContext<'a> {
+        IdPrefixContext {
+            repo,
+            disambiguation: None,
+        }
+    }
+
+    pub fn disambiguate_within(
+        mut self,
+        expression: Rc<RevsetExpression>,
+        workspace_id: Option<&'a WorkspaceId>,
+    ) -> Self {
+        self.disambiguation = Some(DisambiguationData {
+            workspace_id,
+            expression,
+            indexes: OnceCell::new(),
+        });
+        self
+    }
+
+    fn disambiguation_indexes(&self) -> Option<&Indexes> {
+        // TODO: propagate errors instead of treating them as if no revset was specified
+        self.disambiguation
+            .as_ref()
+            .and_then(|disambiguation| disambiguation.indexes(self.repo).ok())
     }
 
     /// Resolve an unambiguous commit ID prefix.
     pub fn resolve_commit_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
+        if let Some(indexes) = self.disambiguation_indexes() {
+            let resolution = indexes.commit_index.resolve_prefix(prefix);
+            if let PrefixResolution::SingleMatch(mut ids) = resolution {
+                assert_eq!(ids.len(), 1);
+                return PrefixResolution::SingleMatch(ids.pop().unwrap());
+            }
+        }
         self.repo.index().resolve_prefix(prefix)
     }
 
     /// Returns the shortest length of a prefix of `commit_id` that
     /// can still be resolved by `resolve_commit_prefix()`.
     pub fn shortest_commit_prefix_len(&self, commit_id: &CommitId) -> usize {
+        if let Some(indexes) = self.disambiguation_indexes() {
+            // TODO: Avoid the double lookup here (has_key() + shortest_unique_prefix_len())
+            if indexes.commit_index.has_key(commit_id) {
+                return indexes.commit_index.shortest_unique_prefix_len(commit_id);
+            }
+        }
         self.repo
             .index()
             .shortest_unique_commit_id_prefix_len(commit_id)
@@ -40,12 +127,23 @@ impl<'repo> IdPrefixContext<'repo> {
 
     /// Resolve an unambiguous change ID prefix to the commit IDs in the revset.
     pub fn resolve_change_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> {
+        if let Some(indexes) = self.disambiguation_indexes() {
+            let resolution = indexes.change_index.resolve_prefix(prefix);
+            if let PrefixResolution::SingleMatch(ids) = resolution {
+                return PrefixResolution::SingleMatch(ids);
+            }
+        }
         self.repo.resolve_change_id_prefix(prefix)
     }
 
     /// Returns the shortest length of a prefix of `change_id` that
     /// can still be resolved by `resolve_change_prefix()`.
     pub fn shortest_change_prefix_len(&self, change_id: &ChangeId) -> usize {
+        if let Some(indexes) = self.disambiguation_indexes() {
+            if indexes.change_index.has_key(change_id) {
+                return indexes.change_index.shortest_unique_prefix_len(change_id);
+            }
+        }
         self.repo.shortest_unique_change_id_prefix_len(change_id)
     }
 }
@@ -71,6 +169,10 @@ where
         prefix: &HexPrefix,
         mut value_mapper: impl FnMut(&V) -> U,
     ) -> PrefixResolution<Vec<U>> {
+        if prefix.min_prefix_bytes().is_empty() {
+            // We consider an empty prefix ambiguous even if the index has a single entry.
+            return PrefixResolution::AmbiguousMatch;
+        }
         let mut range = self.resolve_prefix_range(prefix).peekable();
         if let Some((first_key, _)) = range.peek().copied() {
             let maybe_entries: Option<Vec<_>> = range
@@ -136,6 +238,8 @@ where
             })
             .max()
             .unwrap_or(0)
+            // Even if the key is the only one in the index, we require at least one digit.
+            .max(1)
     }
 }
 
@@ -214,7 +318,7 @@ mod tests {
         let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
         assert_eq!(
             id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
-            0
+            1
         );
 
         let id_index = IdIndex::from_vec(vec![
diff --git a/lib/tests/test_id_prefix.rs b/lib/tests/test_id_prefix.rs
new file mode 100644
index 0000000000..40b9320780
--- /dev/null
+++ b/lib/tests/test_id_prefix.rs
@@ -0,0 +1,201 @@
+// Copyright 2023 The Jujutsu Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use itertools::Itertools;
+use jujutsu_lib::backend::{CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp};
+use jujutsu_lib::id_prefix::IdPrefixContext;
+use jujutsu_lib::index::HexPrefix;
+use jujutsu_lib::index::PrefixResolution::{AmbiguousMatch, NoMatch, SingleMatch};
+use jujutsu_lib::repo::Repo;
+use jujutsu_lib::revset::RevsetExpression;
+use testutils::TestRepo;
+
+#[test]
+fn test_id_prefix() {
+    let settings = testutils::user_settings();
+    let test_repo = TestRepo::init(true);
+    let repo = &test_repo.repo;
+    let root_commit_id = repo.store().root_commit_id();
+    let root_change_id = repo.store().root_change_id();
+
+    let mut tx = repo.start_transaction(&settings, "test");
+    let mut create_commit = |parent_id: &CommitId| {
+        let signature = Signature {
+            name: "Some One".to_string(),
+            email: "some.one@example.com".to_string(),
+            timestamp: Timestamp {
+                timestamp: MillisSinceEpoch(0),
+                tz_offset: 0,
+            },
+        };
+        tx.mut_repo()
+            .new_commit(
+                &settings,
+                vec![parent_id.clone()],
+                repo.store().empty_tree_id().clone(),
+            )
+            .set_author(signature.clone())
+            .set_committer(signature)
+            .write()
+            .unwrap()
+    };
+    let mut commits = vec![create_commit(root_commit_id)];
+    for _ in 0..25 {
+        commits.push(create_commit(commits.last().unwrap().id()));
+    }
+    let repo = tx.commit();
+
+    // Print the commit IDs and change IDs for reference
+    let commit_prefixes = commits
+        .iter()
+        .enumerate()
+        .map(|(i, commit)| format!("{} {}", &commit.id().hex()[..3], i))
+        .sorted()
+        .join("\n");
+    insta::assert_snapshot!(commit_prefixes, @r###"
+    11a 5
+    214 24
+    2a6 2
+    33e 14
+    3be 16
+    3ea 18
+    593 20
+    5d3 1
+    5f6 13
+    676 3
+    7b6 25
+    7da 9
+    81c 10
+    87e 12
+    997 21
+    9f7 22
+    a0e 4
+    a55 19
+    ac4 23
+    c18 17
+    ce9 0
+    d42 6
+    d9d 8
+    eec 15
+    efe 7
+    fa3 11
+    "###);
+    let change_prefixes = commits
+        .iter()
+        .enumerate()
+        .map(|(i, commit)| format!("{} {}", &commit.change_id().hex()[..3], i))
+        .sorted()
+        .join("\n");
+    insta::assert_snapshot!(change_prefixes, @r###"
+    026 9
+    030 13
+    1b5 6
+    26b 3
+    26c 8
+    271 10
+    439 2
+    44a 17
+    4e9 16
+    5b2 23
+    6c2 19
+    781 0
+    79f 14
+    7d5 24
+    86b 20
+    871 7
+    896 5
+    9e4 18
+    a2c 1
+    a63 22
+    b19 11
+    b93 4
+    bf5 21
+    c24 15
+    d64 12
+    fee 25
+    "###);
+
+    let prefix = |x| HexPrefix::new(x).unwrap();
+
+    // Without a disambiguation revset
+    // ---------------------------------------------------------------------------------------------
+    let c = IdPrefixContext::new(repo.as_ref());
+    assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 2);
+    assert_eq!(c.shortest_commit_prefix_len(commits[5].id()), 1);
+    assert_eq!(c.resolve_commit_prefix(&prefix("2")), AmbiguousMatch);
+    assert_eq!(
+        c.resolve_commit_prefix(&prefix("2a")),
+        SingleMatch(commits[2].id().clone())
+    );
+    assert_eq!(c.resolve_commit_prefix(&prefix("20")), NoMatch);
+    assert_eq!(c.resolve_commit_prefix(&prefix("2a0")), NoMatch);
+    assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 2);
+    assert_eq!(c.shortest_change_prefix_len(commits[6].change_id()), 1);
+    assert_eq!(c.resolve_change_prefix(&prefix("7")), AmbiguousMatch);
+    assert_eq!(
+        c.resolve_change_prefix(&prefix("78")),
+        SingleMatch(vec![commits[0].id().clone()])
+    );
+    assert_eq!(c.resolve_change_prefix(&prefix("70")), NoMatch);
+    assert_eq!(c.resolve_change_prefix(&prefix("780")), NoMatch);
+
+    // Disambiguate within a revset
+    // ---------------------------------------------------------------------------------------------
+    let expression =
+        RevsetExpression::commits(vec![commits[0].id().clone(), commits[2].id().clone()]);
+    let c = c.disambiguate_within(expression, None);
+    // The prefix is now shorter
+    assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 1);
+    // Shorter prefix within the set can be used
+    assert_eq!(
+        c.resolve_commit_prefix(&prefix("2")),
+        SingleMatch(commits[2].id().clone())
+    );
+    // Can still resolve commits outside the set
+    assert_eq!(
+        c.resolve_commit_prefix(&prefix("21")),
+        SingleMatch(commits[24].id().clone())
+    );
+    assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 1);
+    assert_eq!(
+        c.resolve_change_prefix(&prefix("7")),
+        SingleMatch(vec![commits[0].id().clone()])
+    );
+
+    // Single commit in revset. Length 0 is unambiguous, but we pretend 1 digit is
+    // needed.
+    // ---------------------------------------------------------------------------------------------
+    let expression = RevsetExpression::commit(root_commit_id.clone());
+    let c = c.disambiguate_within(expression, None);
+    assert_eq!(c.shortest_commit_prefix_len(root_commit_id), 1);
+    assert_eq!(c.resolve_commit_prefix(&prefix("")), AmbiguousMatch);
+    assert_eq!(
+        c.resolve_commit_prefix(&prefix("0")),
+        SingleMatch(root_commit_id.clone())
+    );
+    assert_eq!(c.shortest_change_prefix_len(root_change_id), 1);
+    assert_eq!(c.resolve_change_prefix(&prefix("")), AmbiguousMatch);
+    assert_eq!(
+        c.resolve_change_prefix(&prefix("0")),
+        SingleMatch(vec![root_commit_id.clone()])
+    );
+
+    // Disambiguate within revset that fails to evaluate
+    // ---------------------------------------------------------------------------------------------
+    // TODO: Should be an error
+    let expression = RevsetExpression::symbol("nonexistent".to_string());
+    let context = c.disambiguate_within(expression, None);
+    assert_eq!(context.shortest_commit_prefix_len(commits[2].id()), 2);
+    assert_eq!(context.resolve_commit_prefix(&prefix("2")), AmbiguousMatch);
+}