Skip to content

Commit

Permalink
index: implement index-level change id lookup methods
Browse files Browse the repository at this point in the history
These methods are basically the same as the commit_id versions, but
resolve_change_id_prefix() is a bit more involved as we need to gather matches
from multiple segments.
  • Loading branch information
yuja committed Feb 18, 2024
1 parent f73e590 commit 5f3a313
Show file tree
Hide file tree
Showing 2 changed files with 248 additions and 0 deletions.
67 changes: 67 additions & 0 deletions lib/src/default_index/composite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,73 @@ impl<'a> CompositeIndex<'a> {
.unwrap()
}

/// Suppose the given `change_id` exists, returns the minimum prefix length
/// to disambiguate it within all the indexed ids including hidden ones.
#[cfg(test)] // TODO
pub(super) fn shortest_unique_change_id_prefix_len(&self, change_id: &ChangeId) -> usize {
let (prev_id, next_id) = self.resolve_neighbor_change_ids(change_id);
itertools::chain(prev_id, next_id)
.map(|id| hex_util::common_hex_len(change_id.as_bytes(), id.as_bytes()) + 1)
.max()
.unwrap_or(0)
}

/// Suppose the given `change_id` exists, returns the previous and next
/// change ids in lexicographical order. The returned change ids may be
/// hidden.
#[cfg(test)] // TODO
pub(super) fn resolve_neighbor_change_ids(
&self,
change_id: &ChangeId,
) -> (Option<ChangeId>, Option<ChangeId>) {
self.ancestor_index_segments()
.map(|segment| segment.resolve_neighbor_change_ids(change_id))
.reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| {
(
acc_prev_id.into_iter().chain(prev_id).max(),
acc_next_id.into_iter().chain(next_id).min(),
)
})
.unwrap()
}

/// Resolves the given change id `prefix` to the associated entries. The
/// returned entries may be hidden.
///
/// The returned index positions are sorted in ascending order.
#[cfg(test)] // TODO
pub(super) fn resolve_change_id_prefix(
&self,
prefix: &HexPrefix,
) -> PrefixResolution<(ChangeId, SmallIndexPositionsVec)> {
use PrefixResolution::*;
self.ancestor_index_segments()
.fold(NoMatch, |acc_match, segment| {
if acc_match == AmbiguousMatch {
return acc_match; // avoid checking the parent file(s)
}
let to_global_pos = {
let num_parent_commits = segment.num_parent_commits();
move |LocalPosition(pos)| IndexPosition(pos + num_parent_commits)
};
// Similar to PrefixResolution::plus(), but merges matches of the same id.
match (acc_match, segment.resolve_change_id_prefix(prefix)) {
(NoMatch, local_match) => local_match.map(|(id, positions)| {
(id, positions.into_iter().map(to_global_pos).collect())
}),
(acc_match, NoMatch) => acc_match,
(AmbiguousMatch, _) => AmbiguousMatch,
(_, AmbiguousMatch) => AmbiguousMatch,
(SingleMatch((id1, _)), SingleMatch((id2, _))) if id1 != id2 => AmbiguousMatch,
(SingleMatch((id, mut acc_positions)), SingleMatch((_, local_positions))) => {
acc_positions
.insert_many(0, local_positions.into_iter().map(to_global_pos));
SingleMatch((id, acc_positions))
}
}
})
}

pub(super) fn is_ancestor_pos(
&self,
ancestor_pos: IndexPosition,
Expand Down
181 changes: 181 additions & 0 deletions lib/src/default_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,9 @@ mod tests {
let local_positions_vec = |positions: &[u32]| -> SmallLocalPositionsVec {
positions.iter().copied().map(LocalPosition).collect()
};
let index_positions_vec = |positions: &[u32]| -> SmallIndexPositionsVec {
positions.iter().copied().map(IndexPosition).collect()
};

let id_0 = ChangeId::from_hex("00000001");
let id_1 = ChangeId::from_hex("00999999");
Expand Down Expand Up @@ -683,6 +686,76 @@ mod tests {
mutable_segment.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()),
PrefixResolution::AmbiguousMatch
);

let index = mutable_segment.as_composite();

// Global lookup with the full hex digits
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_0.hex()).unwrap()),
PrefixResolution::SingleMatch((id_0.clone(), index_positions_vec(&[0])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()),
PrefixResolution::SingleMatch((id_1.clone(), index_positions_vec(&[1, 3, 9])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_2.hex()).unwrap()),
PrefixResolution::SingleMatch((id_2.clone(), index_positions_vec(&[2, 4, 5])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_3.hex()).unwrap()),
PrefixResolution::SingleMatch((id_3.clone(), index_positions_vec(&[6, 7])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_4.hex()).unwrap()),
PrefixResolution::SingleMatch((id_4.clone(), index_positions_vec(&[8])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new(&id_5.hex()).unwrap()),
PrefixResolution::SingleMatch((id_5.clone(), index_positions_vec(&[10])))
);

// Global lookup with unknown prefix
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("ffffffff").unwrap()),
PrefixResolution::NoMatch
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("00000002").unwrap()),
PrefixResolution::NoMatch
);

// Global lookup with globally unique prefix
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("000").unwrap()),
PrefixResolution::SingleMatch((id_0.clone(), index_positions_vec(&[0])))
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("055553").unwrap()),
PrefixResolution::SingleMatch((id_5.clone(), index_positions_vec(&[10])))
);

// Global lookup with globally unique prefix stored in both parts
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("009").unwrap()),
PrefixResolution::SingleMatch((id_1.clone(), index_positions_vec(&[1, 3, 9])))
);

// Global lookup with locally ambiguous prefix
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("00").unwrap()),
PrefixResolution::AmbiguousMatch
);
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()),
PrefixResolution::AmbiguousMatch
);

// Global lookup with locally unique but globally ambiguous prefix
assert_eq!(
index.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()),
PrefixResolution::AmbiguousMatch
);
}

#[test]
Expand Down Expand Up @@ -781,6 +854,114 @@ mod tests {
mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")),
(Some(id_4.clone()), None),
);

let index = mutable_segment.as_composite();

// Global lookup, change_id exists.
// id_0 < id_1 < id_3 < id_2 < id_5 < id_4
assert_eq!(
index.resolve_neighbor_change_ids(&id_0),
(None, Some(id_1.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&id_1),
(Some(id_0.clone()), Some(id_3.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&id_3),
(Some(id_1.clone()), Some(id_2.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&id_2),
(Some(id_3.clone()), Some(id_5.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&id_5),
(Some(id_2.clone()), Some(id_4.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&id_4),
(Some(id_5.clone()), None),
);

// Global lookup, change_id doesn't exist.
// id_0 < id_1 < id_3 < id_2 < id_5 < id_4
assert_eq!(
index.resolve_neighbor_change_ids(&ChangeId::from_hex("00000000")),
(None, Some(id_0.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&ChangeId::from_hex("01000000")),
(Some(id_1.clone()), Some(id_3.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&ChangeId::from_hex("05544555")),
(Some(id_3.clone()), Some(id_2.clone())),
);
assert_eq!(
index.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")),
(Some(id_4.clone()), None),
);
}

#[test]
fn shortest_unique_change_id_prefix() {
let temp_dir = testutils::new_temp_dir();
let mut new_commit_id = commit_id_generator();

let id_0 = ChangeId::from_hex("00000001");
let id_1 = ChangeId::from_hex("00999999");
let id_2 = ChangeId::from_hex("05548888");
let id_3 = ChangeId::from_hex("05544444");
let id_4 = ChangeId::from_hex("05555555");
let id_5 = ChangeId::from_hex("05555333");

// Create some commits with different various common prefixes.
let mut mutable_segment = MutableIndexSegment::full(16, 4);
mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);

// Write these commits to one file and build the remainder on top.
let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap();
mutable_segment = MutableIndexSegment::incremental(initial_file.clone());

mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]);

let index = mutable_segment.as_composite();

// Calculate shortest unique prefix len with known change_id
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_0), 3);
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_1), 3);
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_2), 5);
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_3), 5);
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_4), 6);
assert_eq!(index.shortest_unique_change_id_prefix_len(&id_5), 6);

// Calculate shortest unique prefix len with unknown change_id
assert_eq!(
index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("00000002")),
8
);
assert_eq!(
index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("01000000")),
2
);
assert_eq!(
index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("05555344")),
7
);
assert_eq!(
index.shortest_unique_change_id_prefix_len(&ChangeId::from_hex("ffffffff")),
1
);
}

#[test]
Expand Down

0 comments on commit 5f3a313

Please sign in to comment.