From b83f53db6cd4d6841453947422c99f40a6bc47df Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 11 Feb 2024 22:35:44 +0900 Subject: [PATCH 1/3] index: extract inner binary search function The callback returns Ordering instead of &[u8] due to lifetime difficulty. --- lib/src/default_index/readonly.rs | 40 +++++++++++++++++++------------ 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index acb4632a83..cfe2daaed2 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -373,21 +373,10 @@ impl ReadonlyIndexSegment { /// Otherwise, returns `Err` containing the position where the id could be /// inserted. fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> Result { - let mut low = 0; - let mut high = self.num_local_commits; - while low < high { - let mid = (low + high) / 2; - let entry = self.commit_lookup_entry(mid); - let cmp = entry.commit_id_bytes().cmp(prefix); - // According to Rust std lib, this produces cmov instructions. - // https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855 - low = if cmp == Ordering::Less { mid + 1 } else { low }; - high = if cmp == Ordering::Greater { mid } else { high }; - if cmp == Ordering::Equal { - return Ok(mid); - } - } - Err(low) + binary_search_pos_by(self.num_local_commits, |pos| { + let entry = self.commit_lookup_entry(pos); + entry.commit_id_bytes().cmp(prefix) + }) } } @@ -571,3 +560,24 @@ impl ReadonlyIndex for DefaultReadonlyIndex { Box::new(DefaultMutableIndex::incremental(self.0.clone())) } } + +/// Binary searches u32 position with the given comparison function. +/// +/// If matched exactly, returns `Ok` with the position. Otherwise, returns `Err` +/// containing the position where the element could be inserted. +fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> Result { + let mut low = 0; + let mut high = size; + while low < high { + let mid = (low + high) / 2; + let cmp = f(mid); + // According to Rust std lib, this produces cmov instructions. + // https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855 + low = if cmp == Ordering::Less { mid + 1 } else { low }; + high = if cmp == Ordering::Greater { mid } else { high }; + if cmp == Ordering::Equal { + return Ok(mid); + } + } + Err(low) +} From 2f931ce7f47b3b3f469a04f84577e595234f947f Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 11 Feb 2024 23:01:26 +0900 Subject: [PATCH 2/3] index: extract helper struct for post processing binary search result This code will be shared among commit id and change id lookup functions. --- lib/src/default_index/readonly.rs | 93 ++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 32 deletions(-) diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index cfe2daaed2..bee735a934 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -367,12 +367,8 @@ impl ReadonlyIndexSegment { .collect() } - /// Binary searches commit id by `prefix`. - /// - /// If the `prefix` matches exactly, returns `Ok` with the lookup position. - /// Otherwise, returns `Err` containing the position where the id could be - /// inserted. - fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> Result { + /// Binary searches commit id by `prefix`. Returns the lookup position. + fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult { binary_search_pos_by(self.num_local_commits, |pos| { let entry = self.commit_lookup_entry(pos); entry.commit_id_bytes().cmp(prefix) @@ -409,29 +405,13 @@ impl IndexSegment for ReadonlyIndexSegment { &self, commit_id: &CommitId, ) -> (Option, Option) { - let (prev_lookup_pos, next_lookup_pos) = - match self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes()) { - Ok(pos) => (pos.checked_sub(1), (pos + 1..self.num_local_commits).next()), - Err(pos) => (pos.checked_sub(1), (pos..self.num_local_commits).next()), - }; - let prev_id = prev_lookup_pos.map(|p| self.commit_lookup_entry(p).commit_id()); - let next_id = next_lookup_pos.map(|p| self.commit_lookup_entry(p).commit_id()); - (prev_id, next_id) + self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes()) + .map_neighbors(|pos| self.commit_lookup_entry(pos).commit_id()) } fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - let lookup_pos = self - .commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes()) - .unwrap_or_else(|pos| pos); - let mut matches = (lookup_pos..self.num_local_commits) - .map(|pos| self.commit_lookup_entry(pos).commit_id()) - .take_while(|id| prefix.matches(id)) - .fuse(); - match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id), - (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, - (None, _) => PrefixResolution::NoMatch, - } + self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes()) + .prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id()) } fn generation_number(&self, local_pos: LocalPosition) -> u32 { @@ -561,11 +541,58 @@ impl ReadonlyIndex for DefaultReadonlyIndex { } } +/// Binary search result in a sorted lookup table. +#[derive(Clone, Copy, Debug)] +struct PositionLookupResult { + /// `Ok` means the element is found at the position. `Err` contains the + /// position where the element could be inserted. + result: Result, + size: u32, +} + +impl PositionLookupResult { + /// Returns position of the element if exactly matched. + fn ok(self) -> Option { + self.result.ok() + } + + /// Returns `(previous, next)` positions of the matching element or + /// boundary. + fn neighbors(self) -> (Option, Option) { + match self.result { + Ok(pos) => (pos.checked_sub(1), (pos + 1..self.size).next()), + Err(pos) => (pos.checked_sub(1), (pos..self.size).next()), + } + } + + /// Looks up `(previous, next)` elements by the given function. + fn map_neighbors(self, mut lookup: impl FnMut(u32) -> T) -> (Option, Option) { + let (prev_pos, next_pos) = self.neighbors(); + (prev_pos.map(&mut lookup), next_pos.map(&mut lookup)) + } + + /// Looks up matching elements from the current position, returns one if + /// the given `prefix` unambiguously matches. + fn prefix_matches( + self, + prefix: &HexPrefix, + lookup: impl FnMut(u32) -> T, + ) -> PrefixResolution { + let lookup_pos = self.result.unwrap_or_else(|pos| pos); + let mut matches = (lookup_pos..self.size) + .map(lookup) + .take_while(|id| prefix.matches(id)) + .fuse(); + match (matches.next(), matches.next()) { + (Some(id), None) => PrefixResolution::SingleMatch(id), + (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, + (None, _) => PrefixResolution::NoMatch, + } + } +} + /// Binary searches u32 position with the given comparison function. -/// -/// If matched exactly, returns `Ok` with the position. Otherwise, returns `Err` -/// containing the position where the element could be inserted. -fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> Result { +fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> PositionLookupResult { let mut low = 0; let mut high = size; while low < high { @@ -576,8 +603,10 @@ fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> Result low = if cmp == Ordering::Less { mid + 1 } else { low }; high = if cmp == Ordering::Greater { mid } else { high }; if cmp == Ordering::Equal { - return Ok(mid); + let result = Ok(mid); + return PositionLookupResult { result, size }; } } - Err(low) + let result = Err(low); + PositionLookupResult { result, size } } From c78cf2a1fa646f0ddb24271ccfd72eb003ce5001 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 12 Feb 2024 11:39:55 +0900 Subject: [PATCH 3/3] index: extract helper functions for id lookup in mutable table Similar to the previous commit, these functions will be reused by the change id lookup methods. The return value isn't cloned because resolve_id_prefix() will return (key, value) pair, and the current caller doesn't need a cloned value. --- lib/src/default_index/mutable.rs | 57 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 26847fde59..d5e6a7987f 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -334,32 +334,13 @@ impl IndexSegment for MutableIndexSegment { &self, commit_id: &CommitId, ) -> (Option, Option) { - let prev_id = self - .commit_lookup - .range((Bound::Unbounded, Bound::Excluded(commit_id))) - .next_back() - .map(|(id, _)| id.clone()); - let next_id = self - .commit_lookup - .range((Bound::Excluded(commit_id), Bound::Unbounded)) - .next() - .map(|(id, _)| id.clone()); - (prev_id, next_id) + let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id); + (prev_id.cloned(), next_id.cloned()) } fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); - let mut matches = self - .commit_lookup - .range((Bound::Included(&min_bytes_prefix), Bound::Unbounded)) - .map(|(id, _pos)| id) - .take_while(|&id| prefix.matches(id)) - .fuse(); - match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id.clone()), - (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, - (None, _) => PrefixResolution::NoMatch, - } + resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|id| id.clone()) } fn generation_number(&self, local_pos: LocalPosition) -> u32 { @@ -499,3 +480,35 @@ impl MutableIndex for DefaultMutableIndex { self.0.merge_in(other.as_segment().clone()); } } + +fn resolve_neighbor_ids<'a, K: Ord, V>( + lookup_table: &'a BTreeMap, + id: &K, +) -> (Option<&'a K>, Option<&'a K>) { + let prev_id = lookup_table + .range((Bound::Unbounded, Bound::Excluded(id))) + .next_back() + .map(|(id, _)| id); + let next_id = lookup_table + .range((Bound::Excluded(id), Bound::Unbounded)) + .next() + .map(|(id, _)| id); + (prev_id, next_id) +} + +fn resolve_id_prefix<'a, K: ObjectId + Ord, V>( + lookup_table: &'a BTreeMap, + prefix: &HexPrefix, + min_bytes_prefix: &K, +) -> PrefixResolution<&'a K> { + let mut matches = lookup_table + .range((Bound::Included(min_bytes_prefix), Bound::Unbounded)) + .map(|(id, _pos)| id) + .take_while(|&id| prefix.matches(id)) + .fuse(); + match (matches.next(), matches.next()) { + (Some(id), None) => PrefixResolution::SingleMatch(id), + (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, + (None, _) => PrefixResolution::NoMatch, + } +}