Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index: extract id lookup helpers #3057

Merged
merged 3 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 35 additions & 22 deletions lib/src/default_index/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,32 +334,13 @@ impl IndexSegment for MutableIndexSegment {
&self,
commit_id: &CommitId,
) -> (Option<CommitId>, Option<CommitId>) {
let prev_id = self
.commit_lookup
.range((Bound::Unbounded, Bound::Excluded(commit_id)))
.next_back()
.map(|(id, _)| id.clone());
let next_id = self
.commit_lookup
.range((Bound::Excluded(commit_id), Bound::Unbounded))
.next()
.map(|(id, _)| id.clone());
(prev_id, next_id)
let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
(prev_id.cloned(), next_id.cloned())
}

fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
let mut matches = self
.commit_lookup
.range((Bound::Included(&min_bytes_prefix), Bound::Unbounded))
.map(|(id, _pos)| id)
.take_while(|&id| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id.clone()),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}
resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|id| id.clone())
}

fn generation_number(&self, local_pos: LocalPosition) -> u32 {
Expand Down Expand Up @@ -499,3 +480,35 @@ impl MutableIndex for DefaultMutableIndex {
self.0.merge_in(other.as_segment().clone());
}
}

fn resolve_neighbor_ids<'a, K: Ord, V>(
lookup_table: &'a BTreeMap<K, V>,
id: &K,
) -> (Option<&'a K>, Option<&'a K>) {
let prev_id = lookup_table
.range((Bound::Unbounded, Bound::Excluded(id)))
.next_back()
.map(|(id, _)| id);
let next_id = lookup_table
.range((Bound::Excluded(id), Bound::Unbounded))
.next()
.map(|(id, _)| id);
(prev_id, next_id)
}

fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
lookup_table: &'a BTreeMap<K, V>,
prefix: &HexPrefix,
min_bytes_prefix: &K,
) -> PrefixResolution<&'a K> {
let mut matches = lookup_table
.range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
.map(|(id, _pos)| id)
.take_while(|&id| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}
}
121 changes: 80 additions & 41 deletions lib/src/default_index/readonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,27 +367,12 @@ impl ReadonlyIndexSegment {
.collect()
}

/// Binary searches commit id by `prefix`.
///
/// If the `prefix` matches exactly, returns `Ok` with the lookup position.
/// Otherwise, returns `Err` containing the position where the id could be
/// inserted.
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> Result<u32, u32> {
let mut low = 0;
let mut high = self.num_local_commits;
while low < high {
let mid = (low + high) / 2;
let entry = self.commit_lookup_entry(mid);
let cmp = entry.commit_id_bytes().cmp(prefix);
// According to Rust std lib, this produces cmov instructions.
// https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855
low = if cmp == Ordering::Less { mid + 1 } else { low };
high = if cmp == Ordering::Greater { mid } else { high };
if cmp == Ordering::Equal {
return Ok(mid);
}
}
Err(low)
/// Binary searches commit id by `prefix`. Returns the lookup position.
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
binary_search_pos_by(self.num_local_commits, |pos| {
let entry = self.commit_lookup_entry(pos);
entry.commit_id_bytes().cmp(prefix)
})
}
}

Expand Down Expand Up @@ -420,29 +405,13 @@ impl IndexSegment for ReadonlyIndexSegment {
&self,
commit_id: &CommitId,
) -> (Option<CommitId>, Option<CommitId>) {
let (prev_lookup_pos, next_lookup_pos) =
match self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes()) {
Ok(pos) => (pos.checked_sub(1), (pos + 1..self.num_local_commits).next()),
Err(pos) => (pos.checked_sub(1), (pos..self.num_local_commits).next()),
};
let prev_id = prev_lookup_pos.map(|p| self.commit_lookup_entry(p).commit_id());
let next_id = next_lookup_pos.map(|p| self.commit_lookup_entry(p).commit_id());
(prev_id, next_id)
self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
.map_neighbors(|pos| self.commit_lookup_entry(pos).commit_id())
}

fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
let lookup_pos = self
.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
.unwrap_or_else(|pos| pos);
let mut matches = (lookup_pos..self.num_local_commits)
.map(|pos| self.commit_lookup_entry(pos).commit_id())
.take_while(|id| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}
self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
.prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id())
}

fn generation_number(&self, local_pos: LocalPosition) -> u32 {
Expand Down Expand Up @@ -571,3 +540,73 @@ impl ReadonlyIndex for DefaultReadonlyIndex {
Box::new(DefaultMutableIndex::incremental(self.0.clone()))
}
}

/// Binary search result in a sorted lookup table.
#[derive(Clone, Copy, Debug)]
struct PositionLookupResult {
/// `Ok` means the element is found at the position. `Err` contains the
/// position where the element could be inserted.
result: Result<u32, u32>,
size: u32,
}

impl PositionLookupResult {
/// Returns position of the element if exactly matched.
fn ok(self) -> Option<u32> {
self.result.ok()
}

/// Returns `(previous, next)` positions of the matching element or
/// boundary.
fn neighbors(self) -> (Option<u32>, Option<u32>) {
match self.result {
Ok(pos) => (pos.checked_sub(1), (pos + 1..self.size).next()),
Err(pos) => (pos.checked_sub(1), (pos..self.size).next()),
}
}

/// Looks up `(previous, next)` elements by the given function.
fn map_neighbors<T>(self, mut lookup: impl FnMut(u32) -> T) -> (Option<T>, Option<T>) {
let (prev_pos, next_pos) = self.neighbors();
(prev_pos.map(&mut lookup), next_pos.map(&mut lookup))
}

/// Looks up matching elements from the current position, returns one if
/// the given `prefix` unambiguously matches.
fn prefix_matches<T: ObjectId>(
self,
prefix: &HexPrefix,
lookup: impl FnMut(u32) -> T,
) -> PrefixResolution<T> {
let lookup_pos = self.result.unwrap_or_else(|pos| pos);
let mut matches = (lookup_pos..self.size)
.map(lookup)
.take_while(|id| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}
}
}

/// Binary searches u32 position with the given comparison function.
fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> PositionLookupResult {
let mut low = 0;
let mut high = size;
while low < high {
let mid = (low + high) / 2;
let cmp = f(mid);
// According to Rust std lib, this produces cmov instructions.
// https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855
low = if cmp == Ordering::Less { mid + 1 } else { low };
high = if cmp == Ordering::Greater { mid } else { high };
if cmp == Ordering::Equal {
let result = Ok(mid);
return PositionLookupResult { result, size };
}
}
let result = Err(low);
PositionLookupResult { result, size }
}