From 5f3a31300bb5033d9ee0a3503b92ca2530510899 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 22 Dec 2023 16:57:21 +0900 Subject: [PATCH] index: implement index-level change id lookup methods These methods are basically the same as the commit_id versions, but resolve_change_id_prefix() is a bit more involved as we need to gather matches from multiple segments. --- lib/src/default_index/composite.rs | 67 +++++++++++ lib/src/default_index/mod.rs | 181 +++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+) diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 79f2aa6fcc..3e8eac08b9 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -200,6 +200,73 @@ impl<'a> CompositeIndex<'a> { .unwrap() } + /// Suppose the given `change_id` exists, returns the minimum prefix length + /// to disambiguate it within all the indexed ids including hidden ones. + #[cfg(test)] // TODO + pub(super) fn shortest_unique_change_id_prefix_len(&self, change_id: &ChangeId) -> usize { + let (prev_id, next_id) = self.resolve_neighbor_change_ids(change_id); + itertools::chain(prev_id, next_id) + .map(|id| hex_util::common_hex_len(change_id.as_bytes(), id.as_bytes()) + 1) + .max() + .unwrap_or(0) + } + + /// Suppose the given `change_id` exists, returns the previous and next + /// change ids in lexicographical order. The returned change ids may be + /// hidden. + #[cfg(test)] // TODO + pub(super) fn resolve_neighbor_change_ids( + &self, + change_id: &ChangeId, + ) -> (Option, Option) { + self.ancestor_index_segments() + .map(|segment| segment.resolve_neighbor_change_ids(change_id)) + .reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| { + ( + acc_prev_id.into_iter().chain(prev_id).max(), + acc_next_id.into_iter().chain(next_id).min(), + ) + }) + .unwrap() + } + + /// Resolves the given change id `prefix` to the associated entries. The + /// returned entries may be hidden. + /// + /// The returned index positions are sorted in ascending order. + #[cfg(test)] // TODO + pub(super) fn resolve_change_id_prefix( + &self, + prefix: &HexPrefix, + ) -> PrefixResolution<(ChangeId, SmallIndexPositionsVec)> { + use PrefixResolution::*; + self.ancestor_index_segments() + .fold(NoMatch, |acc_match, segment| { + if acc_match == AmbiguousMatch { + return acc_match; // avoid checking the parent file(s) + } + let to_global_pos = { + let num_parent_commits = segment.num_parent_commits(); + move |LocalPosition(pos)| IndexPosition(pos + num_parent_commits) + }; + // Similar to PrefixResolution::plus(), but merges matches of the same id. + match (acc_match, segment.resolve_change_id_prefix(prefix)) { + (NoMatch, local_match) => local_match.map(|(id, positions)| { + (id, positions.into_iter().map(to_global_pos).collect()) + }), + (acc_match, NoMatch) => acc_match, + (AmbiguousMatch, _) => AmbiguousMatch, + (_, AmbiguousMatch) => AmbiguousMatch, + (SingleMatch((id1, _)), SingleMatch((id2, _))) if id1 != id2 => AmbiguousMatch, + (SingleMatch((id, mut acc_positions)), SingleMatch((_, local_positions))) => { + acc_positions + .insert_many(0, local_positions.into_iter().map(to_global_pos)); + SingleMatch((id, acc_positions)) + } + } + }) + } + pub(super) fn is_ancestor_pos( &self, ancestor_pos: IndexPosition, diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index a11c710cf0..b559794943 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -595,6 +595,9 @@ mod tests { let local_positions_vec = |positions: &[u32]| -> SmallLocalPositionsVec { positions.iter().copied().map(LocalPosition).collect() }; + let index_positions_vec = |positions: &[u32]| -> SmallIndexPositionsVec { + positions.iter().copied().map(IndexPosition).collect() + }; let id_0 = ChangeId::from_hex("00000001"); let id_1 = ChangeId::from_hex("00999999"); @@ -683,6 +686,76 @@ mod tests { mutable_segment.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()), PrefixResolution::AmbiguousMatch ); + + let index = mutable_segment.as_composite(); + + // Global lookup with the full hex digits + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_0.hex()).unwrap()), + PrefixResolution::SingleMatch((id_0.clone(), index_positions_vec(&[0]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()), + PrefixResolution::SingleMatch((id_1.clone(), index_positions_vec(&[1, 3, 9]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_2.hex()).unwrap()), + PrefixResolution::SingleMatch((id_2.clone(), index_positions_vec(&[2, 4, 5]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_3.hex()).unwrap()), + PrefixResolution::SingleMatch((id_3.clone(), index_positions_vec(&[6, 7]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_4.hex()).unwrap()), + PrefixResolution::SingleMatch((id_4.clone(), index_positions_vec(&[8]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new(&id_5.hex()).unwrap()), + PrefixResolution::SingleMatch((id_5.clone(), index_positions_vec(&[10]))) + ); + + // Global lookup with unknown prefix + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("ffffffff").unwrap()), + PrefixResolution::NoMatch + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("00000002").unwrap()), + PrefixResolution::NoMatch + ); + + // Global lookup with globally unique prefix + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("000").unwrap()), + PrefixResolution::SingleMatch((id_0.clone(), index_positions_vec(&[0]))) + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("055553").unwrap()), + PrefixResolution::SingleMatch((id_5.clone(), index_positions_vec(&[10]))) + ); + + // Global lookup with globally unique prefix stored in both parts + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("009").unwrap()), + PrefixResolution::SingleMatch((id_1.clone(), index_positions_vec(&[1, 3, 9]))) + ); + + // Global lookup with locally ambiguous prefix + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("00").unwrap()), + PrefixResolution::AmbiguousMatch + ); + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()), + PrefixResolution::AmbiguousMatch + ); + + // Global lookup with locally unique but globally ambiguous prefix + assert_eq!( + index.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()), + PrefixResolution::AmbiguousMatch + ); } #[test] @@ -781,6 +854,114 @@ mod tests { mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")), (Some(id_4.clone()), None), ); + + let index = mutable_segment.as_composite(); + + // Global lookup, change_id exists. + // id_0 < id_1 < id_3 < id_2 < id_5 < id_4 + assert_eq!( + index.resolve_neighbor_change_ids(&id_0), + (None, Some(id_1.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&id_1), + (Some(id_0.clone()), Some(id_3.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&id_3), + (Some(id_1.clone()), Some(id_2.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&id_2), + (Some(id_3.clone()), Some(id_5.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&id_5), + (Some(id_2.clone()), Some(id_4.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&id_4), + (Some(id_5.clone()), None), + ); + + // Global lookup, change_id doesn't exist. + // id_0 < id_1 < id_3 < id_2 < id_5 < id_4 + assert_eq!( + index.resolve_neighbor_change_ids(&ChangeId::from_hex("00000000")), + (None, Some(id_0.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&ChangeId::from_hex("01000000")), + (Some(id_1.clone()), Some(id_3.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&ChangeId::from_hex("05544555")), + (Some(id_3.clone()), Some(id_2.clone())), + ); + assert_eq!( + index.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")), + (Some(id_4.clone()), None), + ); + } + + #[test] + fn shortest_unique_change_id_prefix() { + let temp_dir = testutils::new_temp_dir(); + let mut new_commit_id = commit_id_generator(); + + let id_0 = ChangeId::from_hex("00000001"); + let id_1 = ChangeId::from_hex("00999999"); + let id_2 = ChangeId::from_hex("05548888"); + let id_3 = ChangeId::from_hex("05544444"); + let id_4 = ChangeId::from_hex("05555555"); + let id_5 = ChangeId::from_hex("05555333"); + + // Create some commits with different various common prefixes. + let mut mutable_segment = MutableIndexSegment::full(16, 4); + mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + + // Write these commits to one file and build the remainder on top. + let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap(); + mutable_segment = MutableIndexSegment::incremental(initial_file.clone()); + + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]); + + let index = mutable_segment.as_composite(); + + // Calculate shortest unique prefix len with known change_id + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_0), 3); + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_1), 3); + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_2), 5); + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_3), 5); + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_4), 6); + assert_eq!(index.shortest_unique_change_id_prefix_len(&id_5), 6); + + // Calculate shortest unique prefix len with unknown change_id + assert_eq!( + index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("00000002")), + 8 + ); + assert_eq!( + index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("01000000")), + 2 + ); + assert_eq!( + index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("05555344")), + 7 + ); + assert_eq!( + index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("ffffffff")), + 1 + ); } #[test]