diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 97c277590c..13c3a2f21c 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -21,10 +21,9 @@ use std::sync::Arc; use itertools::Itertools; +use super::readonly::ReadonlyIndexSegment; use super::rev_walk::RevWalk; -use super::{ - IndexEntry, IndexPosition, IndexPositionByGeneration, IndexSegment, ReadonlyIndexSegment, -}; +use super::{IndexEntry, IndexPosition, IndexPositionByGeneration, IndexSegment}; use crate::backend::{CommitId, ObjectId}; use crate::index::{HexPrefix, Index, PrefixResolution}; use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index eafb7f5861..f8a4de6ae2 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -15,38 +15,28 @@ #![allow(missing_docs)] mod composite; +mod mutable; +mod readonly; mod rev_walk; mod store; -use std::any::Any; -use std::cmp::{max, Ordering}; -use std::collections::{BTreeMap, Bound}; +use std::cmp::Ordering; use std::fmt::{Debug, Formatter}; -use std::fs::File; use std::hash::{Hash, Hasher}; -use std::io; -use std::io::{Read, Write}; -use std::path::PathBuf; use std::sync::Arc; -use blake2::Blake2b512; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use digest::Digest; -use itertools::Itertools; use smallvec::SmallVec; -use tempfile::NamedTempFile; pub use self::composite::{CompositeIndex, IndexLevelStats, IndexStats}; +pub use self::mutable::DefaultMutableIndex; +pub use self::readonly::DefaultReadonlyIndex; +use self::readonly::ReadonlyIndexSegment; pub use self::rev_walk::{ RevWalk, RevWalkDescendants, RevWalkDescendantsGenerationRange, RevWalkGenerationRange, }; pub use self::store::{DefaultIndexStore, DefaultIndexStoreError, IndexLoadError}; use crate::backend::{ChangeId, CommitId, ObjectId}; -use crate::commit::Commit; -use crate::file_util::persist_content_addressed_temp_file; -use crate::index::{HexPrefix, Index, MutableIndex, PrefixResolution, ReadonlyIndex}; -use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; -use crate::store::Store; +use crate::index::{HexPrefix, PrefixResolution}; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct IndexPosition(u32); @@ -59,502 +49,6 @@ impl IndexPosition { // inline up to 16 bytes (on 64-bit platform) for free. type SmallIndexPositionsVec = SmallVec<[IndexPosition; 4]>; -struct CommitGraphEntry<'a> { - data: &'a [u8], - commit_id_length: usize, - change_id_length: usize, -} - -// TODO: Add pointers to ancestors further back, like a skip list. Clear the -// lowest set bit to determine which generation number the pointers point to. -impl CommitGraphEntry<'_> { - fn size(commit_id_length: usize, change_id_length: usize) -> usize { - 20 + commit_id_length + change_id_length - } - - fn generation_number(&self) -> u32 { - (&self.data[4..]).read_u32::().unwrap() - } - - fn num_parents(&self) -> u32 { - (&self.data[8..]).read_u32::().unwrap() - } - - fn parent1_pos(&self) -> IndexPosition { - IndexPosition((&self.data[12..]).read_u32::().unwrap()) - } - - fn parent2_overflow_pos(&self) -> u32 { - (&self.data[16..]).read_u32::().unwrap() - } - - // TODO: Consider storing the change ids in a separate table. That table could - // be sorted by change id and have the end index into a list as value. That list - // would be the concatenation of all index positions associated with the change. - // Possible advantages: avoids duplicating change ids; smaller main graph leads - // to better cache locality when walking it; ability to quickly find all - // commits associated with a change id. - fn change_id(&self) -> ChangeId { - ChangeId::new(self.data[20..][..self.change_id_length].to_vec()) - } - - fn commit_id(&self) -> CommitId { - CommitId::from_bytes(&self.data[20 + self.change_id_length..][..self.commit_id_length]) - } -} - -struct CommitLookupEntry<'a> { - data: &'a [u8], - commit_id_length: usize, -} - -impl CommitLookupEntry<'_> { - fn size(commit_id_length: usize) -> usize { - commit_id_length + 4 - } - - fn commit_id(&self) -> CommitId { - CommitId::from_bytes(self.commit_id_bytes()) - } - - // might be better to add borrowed version of CommitId - fn commit_id_bytes(&self) -> &[u8] { - &self.data[0..self.commit_id_length] - } - - fn pos(&self) -> IndexPosition { - IndexPosition( - (&self.data[self.commit_id_length..][..4]) - .read_u32::() - .unwrap(), - ) - } -} - -// File format: -// u32: number of entries -// u32: number of parent overflow entries -// for each entry, in some topological order with parents first: -// u32: generation number -// u32: number of parents -// u32: position in this table for parent 1 -// u32: position in the overflow table of parent 2 -// : commit id -// for each entry, sorted by commit id: -// : commit id -// u32: position in the entry table above -// TODO: add a version number -// TODO: replace the table by a trie so we don't have to repeat the full commit -// ids -// TODO: add a fanout table like git's commit graph has? -struct ReadonlyIndexSegment { - parent_file: Option>, - num_parent_commits: u32, - name: String, - commit_id_length: usize, - change_id_length: usize, - commit_graph_entry_size: usize, - commit_lookup_entry_size: usize, - // Number of commits not counting the parent file - num_local_commits: u32, - graph: Vec, - lookup: Vec, - overflow_parent: Vec, -} - -/// Commit index backend which stores data on local disk. -#[derive(Debug)] -pub struct DefaultReadonlyIndex(Arc); - -impl ReadonlyIndex for DefaultReadonlyIndex { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_index(&self) -> &dyn Index { - self - } - - fn start_modification(&self) -> Box { - let mutable_segment = MutableIndexSegment::incremental(self.0.clone()); - Box::new(DefaultMutableIndex(mutable_segment)) - } -} - -impl Debug for ReadonlyIndexSegment { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - f.debug_struct("ReadonlyIndexSegment") - .field("name", &self.name) - .field("parent_file", &self.parent_file) - .finish() - } -} - -impl DefaultReadonlyIndex { - pub fn as_composite(&self) -> CompositeIndex { - self.0.as_composite() - } -} - -#[derive(Debug)] -struct MutableGraphEntry { - commit_id: CommitId, - change_id: ChangeId, - generation_number: u32, - parent_positions: SmallIndexPositionsVec, -} - -struct MutableIndexSegment { - parent_file: Option>, - num_parent_commits: u32, - commit_id_length: usize, - change_id_length: usize, - graph: Vec, - lookup: BTreeMap, -} - -impl MutableIndexSegment { - fn full(commit_id_length: usize, change_id_length: usize) -> Self { - Self { - parent_file: None, - num_parent_commits: 0, - commit_id_length, - change_id_length, - graph: vec![], - lookup: BTreeMap::new(), - } - } - - fn incremental(parent_file: Arc) -> Self { - let num_parent_commits = parent_file.num_parent_commits + parent_file.num_local_commits; - let commit_id_length = parent_file.commit_id_length; - let change_id_length = parent_file.change_id_length; - Self { - parent_file: Some(parent_file), - num_parent_commits, - commit_id_length, - change_id_length, - graph: vec![], - lookup: BTreeMap::new(), - } - } - - fn as_composite(&self) -> CompositeIndex { - CompositeIndex::new(self) - } - - fn add_commit(&mut self, commit: &Commit) { - self.add_commit_data( - commit.id().clone(), - commit.change_id().clone(), - commit.parent_ids(), - ); - } - - fn add_commit_data( - &mut self, - commit_id: CommitId, - change_id: ChangeId, - parent_ids: &[CommitId], - ) { - if self.as_composite().has_id(&commit_id) { - return; - } - let mut entry = MutableGraphEntry { - commit_id, - change_id, - generation_number: 0, - parent_positions: SmallVec::new(), - }; - for parent_id in parent_ids { - let parent_entry = self - .as_composite() - .entry_by_id(parent_id) - .expect("parent commit is not indexed"); - entry.generation_number = max( - entry.generation_number, - parent_entry.generation_number() + 1, - ); - entry.parent_positions.push(parent_entry.pos); - } - self.lookup.insert( - entry.commit_id.clone(), - IndexPosition(self.graph.len() as u32 + self.num_parent_commits), - ); - self.graph.push(entry); - } - - fn add_commits_from(&mut self, other_segment: &dyn IndexSegment) { - let other = CompositeIndex::new(other_segment); - for pos in other_segment.segment_num_parent_commits()..other.num_commits() { - let entry = other.entry_by_pos(IndexPosition(pos)); - let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec(); - self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids); - } - } - - fn merge_in(&mut self, other: Arc) { - let mut maybe_own_ancestor = self.parent_file.clone(); - let mut maybe_other_ancestor = Some(other); - let mut files_to_add = vec![]; - loop { - if maybe_other_ancestor.is_none() { - break; - } - let other_ancestor = maybe_other_ancestor.as_ref().unwrap(); - if maybe_own_ancestor.is_none() { - files_to_add.push(other_ancestor.clone()); - maybe_other_ancestor = other_ancestor.parent_file.clone(); - continue; - } - let own_ancestor = maybe_own_ancestor.as_ref().unwrap(); - if own_ancestor.name == other_ancestor.name { - break; - } - if own_ancestor.as_composite().num_commits() - < other_ancestor.as_composite().num_commits() - { - files_to_add.push(other_ancestor.clone()); - maybe_other_ancestor = other_ancestor.parent_file.clone(); - } else { - maybe_own_ancestor = own_ancestor.parent_file.clone(); - } - } - - for file in files_to_add.iter().rev() { - self.add_commits_from(file.as_ref()); - } - } - - fn serialize(self) -> Vec { - assert_eq!(self.graph.len(), self.lookup.len()); - - let num_commits = self.graph.len() as u32; - - let mut buf = vec![]; - - if let Some(parent_file) = &self.parent_file { - buf.write_u32::(parent_file.name.len() as u32) - .unwrap(); - buf.write_all(parent_file.name.as_bytes()).unwrap(); - } else { - buf.write_u32::(0).unwrap(); - } - - buf.write_u32::(num_commits).unwrap(); - // We'll write the actual value later - let parent_overflow_offset = buf.len(); - buf.write_u32::(0_u32).unwrap(); - - let mut parent_overflow = vec![]; - for entry in self.graph { - let flags = 0; - buf.write_u32::(flags).unwrap(); - - buf.write_u32::(entry.generation_number) - .unwrap(); - - buf.write_u32::(entry.parent_positions.len() as u32) - .unwrap(); - let mut parent1_pos = IndexPosition(0); - let parent_overflow_pos = parent_overflow.len() as u32; - for (i, parent_pos) in entry.parent_positions.iter().enumerate() { - if i == 0 { - parent1_pos = *parent_pos; - } else { - parent_overflow.push(*parent_pos); - } - } - buf.write_u32::(parent1_pos.0).unwrap(); - buf.write_u32::(parent_overflow_pos).unwrap(); - - assert_eq!(entry.change_id.as_bytes().len(), self.change_id_length); - buf.write_all(entry.change_id.as_bytes()).unwrap(); - - assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length); - buf.write_all(entry.commit_id.as_bytes()).unwrap(); - } - - for (commit_id, pos) in self.lookup { - buf.write_all(commit_id.as_bytes()).unwrap(); - buf.write_u32::(pos.0).unwrap(); - } - - (&mut buf[parent_overflow_offset..][..4]) - .write_u32::(parent_overflow.len() as u32) - .unwrap(); - for parent_pos in parent_overflow { - buf.write_u32::(parent_pos.0).unwrap(); - } - - buf - } - - /// If the MutableIndex has more than half the commits of its parent - /// ReadonlyIndex, return MutableIndex with the commits from both. This - /// is done recursively, so the stack of index files has O(log n) files. - fn maybe_squash_with_ancestors(self) -> MutableIndexSegment { - let mut num_new_commits = self.segment_num_commits(); - let mut files_to_squash = vec![]; - let mut maybe_parent_file = self.parent_file.clone(); - let mut squashed; - loop { - match maybe_parent_file { - Some(parent_file) => { - // TODO: We should probably also squash if the parent file has less than N - // commits, regardless of how many (few) are in `self`. - if 2 * num_new_commits < parent_file.segment_num_commits() { - squashed = MutableIndexSegment::incremental(parent_file); - break; - } - num_new_commits += parent_file.segment_num_commits(); - files_to_squash.push(parent_file.clone()); - maybe_parent_file = parent_file.parent_file.clone(); - } - None => { - squashed = - MutableIndexSegment::full(self.commit_id_length, self.change_id_length); - break; - } - } - } - - if files_to_squash.is_empty() { - return self; - } - - for parent_file in files_to_squash.iter().rev() { - squashed.add_commits_from(parent_file.as_ref()); - } - squashed.add_commits_from(&self); - squashed - } - - fn save_in(self, dir: PathBuf) -> io::Result> { - if self.segment_num_commits() == 0 && self.parent_file.is_some() { - return Ok(self.parent_file.unwrap()); - } - - let commit_id_length = self.commit_id_length; - let change_id_length = self.change_id_length; - - let buf = self.maybe_squash_with_ancestors().serialize(); - let mut hasher = Blake2b512::new(); - hasher.update(&buf); - let index_file_id_hex = hex::encode(hasher.finalize()); - let index_file_path = dir.join(&index_file_id_hex); - - let mut temp_file = NamedTempFile::new_in(&dir)?; - let file = temp_file.as_file_mut(); - file.write_all(&buf)?; - persist_content_addressed_temp_file(temp_file, index_file_path)?; - - ReadonlyIndexSegment::load_from( - &mut buf.as_slice(), - dir, - index_file_id_hex, - commit_id_length, - change_id_length, - ) - .map_err(|err| match err { - IndexLoadError::IndexCorrupt(err) => { - panic!("Just-created index file is corrupt: {err}") - } - IndexLoadError::IoError(err) => err, - }) - } -} - -/// In-memory mutable records for the on-disk commit index backend. -pub struct DefaultMutableIndex(MutableIndexSegment); - -impl DefaultMutableIndex { - #[cfg(test)] - pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self { - let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length); - DefaultMutableIndex(mutable_segment) - } - - pub fn as_composite(&self) -> CompositeIndex { - self.0.as_composite() - } - - #[cfg(test)] - pub(crate) fn add_commit_data( - &mut self, - commit_id: CommitId, - change_id: ChangeId, - parent_ids: &[CommitId], - ) { - self.0.add_commit_data(commit_id, change_id, parent_ids); - } -} - -impl Index for DefaultMutableIndex { - fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { - self.as_composite() - .shortest_unique_commit_id_prefix_len(commit_id) - } - - fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - self.as_composite().resolve_prefix(prefix) - } - - fn has_id(&self, commit_id: &CommitId) -> bool { - self.as_composite().has_id(commit_id) - } - - fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { - self.as_composite().is_ancestor(ancestor_id, descendant_id) - } - - fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { - self.as_composite().common_ancestors(set1, set2) - } - - fn heads(&self, candidates: &mut dyn Iterator) -> Vec { - self.as_composite().heads(candidates) - } - - fn topo_order(&self, input: &mut dyn Iterator) -> Vec { - self.as_composite().topo_order(input) - } - - fn evaluate_revset<'index>( - &'index self, - expression: &ResolvedExpression, - store: &Arc, - ) -> Result + 'index>, RevsetEvaluationError> { - self.as_composite().evaluate_revset(expression, store) - } -} - -impl MutableIndex for DefaultMutableIndex { - fn as_any(&self) -> &dyn Any { - self - } - - fn into_any(self: Box) -> Box { - Box::new(*self) - } - - fn as_index(&self) -> &dyn Index { - self - } - - fn add_commit(&mut self, commit: &Commit) { - self.0.add_commit(commit); - } - - fn merge_in(&mut self, other: &dyn ReadonlyIndex) { - let other = other - .as_any() - .downcast_ref::() - .expect("index to merge in must be a DefaultReadonlyIndex"); - self.0.merge_in(other.0.clone()); - } -} - trait IndexSegment: Send + Sync { fn segment_num_parent_commits(&self) -> u32; @@ -630,193 +124,6 @@ impl From<&IndexEntry<'_>> for IndexPositionByGeneration { } } -impl IndexSegment for ReadonlyIndexSegment { - fn segment_num_parent_commits(&self) -> u32 { - self.num_parent_commits - } - - fn segment_num_commits(&self) -> u32 { - self.num_local_commits - } - - fn segment_parent_file(&self) -> Option<&Arc> { - self.parent_file.as_ref() - } - - fn segment_name(&self) -> Option { - Some(self.name.clone()) - } - - fn segment_commit_id_to_pos(&self, commit_id: &CommitId) -> Option { - let lookup_pos = self.commit_id_byte_prefix_to_lookup_pos(commit_id)?; - let entry = self.lookup_entry(lookup_pos); - (&entry.commit_id() == commit_id).then(|| entry.pos()) - } - - fn segment_commit_id_to_neighbor_positions( - &self, - commit_id: &CommitId, - ) -> (Option, Option) { - if let Some(lookup_pos) = self.commit_id_byte_prefix_to_lookup_pos(commit_id) { - let entry_commit_id = self.lookup_entry(lookup_pos).commit_id(); - let (prev_lookup_pos, next_lookup_pos) = match entry_commit_id.cmp(commit_id) { - Ordering::Less => { - assert_eq!(lookup_pos + 1, self.num_local_commits); - (Some(lookup_pos), None) - } - Ordering::Equal => { - let succ = ((lookup_pos + 1)..self.num_local_commits).next(); - (lookup_pos.checked_sub(1), succ) - } - Ordering::Greater => (lookup_pos.checked_sub(1), Some(lookup_pos)), - }; - let prev_pos = prev_lookup_pos.map(|p| self.lookup_entry(p).pos()); - let next_pos = next_lookup_pos.map(|p| self.lookup_entry(p).pos()); - (prev_pos, next_pos) - } else { - (None, None) - } - } - - fn segment_resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); - let lookup_pos = self - .commit_id_byte_prefix_to_lookup_pos(&min_bytes_prefix) - .unwrap_or(self.num_local_commits); - let mut matches = (lookup_pos..self.num_local_commits) - .map(|pos| self.lookup_entry(pos).commit_id()) - .take_while(|id| prefix.matches(id)) - .fuse(); - match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id), - (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, - (None, _) => PrefixResolution::NoMatch, - } - } - - fn segment_generation_number(&self, local_pos: u32) -> u32 { - self.graph_entry(local_pos).generation_number() - } - - fn segment_commit_id(&self, local_pos: u32) -> CommitId { - self.graph_entry(local_pos).commit_id() - } - - fn segment_change_id(&self, local_pos: u32) -> ChangeId { - self.graph_entry(local_pos).change_id() - } - - fn segment_num_parents(&self, local_pos: u32) -> u32 { - self.graph_entry(local_pos).num_parents() - } - - fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec { - let graph_entry = self.graph_entry(local_pos); - let mut parent_entries = SmallVec::with_capacity(graph_entry.num_parents() as usize); - if graph_entry.num_parents() >= 1 { - parent_entries.push(graph_entry.parent1_pos()); - } - if graph_entry.num_parents() >= 2 { - let mut parent_overflow_pos = graph_entry.parent2_overflow_pos(); - for _ in 1..graph_entry.num_parents() { - parent_entries.push(self.overflow_parent(parent_overflow_pos)); - parent_overflow_pos += 1; - } - } - parent_entries - } - - fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry { - IndexEntry { - source: self, - local_pos, - pos, - } - } -} - -impl IndexSegment for MutableIndexSegment { - fn segment_num_parent_commits(&self) -> u32 { - self.num_parent_commits - } - - fn segment_num_commits(&self) -> u32 { - self.graph.len() as u32 - } - - fn segment_parent_file(&self) -> Option<&Arc> { - self.parent_file.as_ref() - } - - fn segment_name(&self) -> Option { - None - } - - fn segment_commit_id_to_pos(&self, commit_id: &CommitId) -> Option { - self.lookup.get(commit_id).cloned() - } - - fn segment_commit_id_to_neighbor_positions( - &self, - commit_id: &CommitId, - ) -> (Option, Option) { - let prev_pos = self - .lookup - .range((Bound::Unbounded, Bound::Excluded(commit_id))) - .next_back() - .map(|(_, &pos)| pos); - let next_pos = self - .lookup - .range((Bound::Excluded(commit_id), Bound::Unbounded)) - .next() - .map(|(_, &pos)| pos); - (prev_pos, next_pos) - } - - fn segment_resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); - let mut matches = self - .lookup - .range((Bound::Included(&min_bytes_prefix), Bound::Unbounded)) - .map(|(id, _pos)| id) - .take_while(|&id| prefix.matches(id)) - .fuse(); - match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id.clone()), - (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, - (None, _) => PrefixResolution::NoMatch, - } - } - - fn segment_generation_number(&self, local_pos: u32) -> u32 { - self.graph[local_pos as usize].generation_number - } - - fn segment_commit_id(&self, local_pos: u32) -> CommitId { - self.graph[local_pos as usize].commit_id.clone() - } - - fn segment_change_id(&self, local_pos: u32) -> ChangeId { - self.graph[local_pos as usize].change_id.clone() - } - - fn segment_num_parents(&self, local_pos: u32) -> u32 { - self.graph[local_pos as usize].parent_positions.len() as u32 - } - - fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec { - self.graph[local_pos as usize].parent_positions.clone() - } - - fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry { - IndexEntry { - source: self, - local_pos, - pos, - } - } -} - #[derive(Clone)] pub struct IndexEntry<'a> { source: &'a dyn IndexSegment, @@ -882,171 +189,15 @@ impl<'a> IndexEntry<'a> { } } -impl ReadonlyIndexSegment { - fn load_from( - file: &mut dyn Read, - dir: PathBuf, - name: String, - commit_id_length: usize, - change_id_length: usize, - ) -> Result, IndexLoadError> { - let parent_filename_len = file.read_u32::()?; - let num_parent_commits; - let maybe_parent_file; - if parent_filename_len > 0 { - let mut parent_filename_bytes = vec![0; parent_filename_len as usize]; - file.read_exact(&mut parent_filename_bytes)?; - let parent_filename = String::from_utf8(parent_filename_bytes).unwrap(); - let parent_file_path = dir.join(&parent_filename); - let mut index_file = File::open(parent_file_path).unwrap(); - let parent_file = ReadonlyIndexSegment::load_from( - &mut index_file, - dir, - parent_filename, - commit_id_length, - change_id_length, - )?; - num_parent_commits = parent_file.num_parent_commits + parent_file.num_local_commits; - maybe_parent_file = Some(parent_file); - } else { - num_parent_commits = 0; - maybe_parent_file = None; - }; - let num_commits = file.read_u32::()?; - let num_parent_overflow_entries = file.read_u32::()?; - let mut data = vec![]; - file.read_to_end(&mut data)?; - let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length, change_id_length); - let graph_size = (num_commits as usize) * commit_graph_entry_size; - let commit_lookup_entry_size = CommitLookupEntry::size(commit_id_length); - let lookup_size = (num_commits as usize) * commit_lookup_entry_size; - let parent_overflow_size = (num_parent_overflow_entries as usize) * 4; - let expected_size = graph_size + lookup_size + parent_overflow_size; - if data.len() != expected_size { - return Err(IndexLoadError::IndexCorrupt(name)); - } - let overflow_parent = data.split_off(graph_size + lookup_size); - let lookup = data.split_off(graph_size); - let graph = data; - Ok(Arc::new(ReadonlyIndexSegment { - parent_file: maybe_parent_file, - num_parent_commits, - name, - commit_id_length, - change_id_length, - commit_graph_entry_size, - commit_lookup_entry_size, - num_local_commits: num_commits, - graph, - lookup, - overflow_parent, - })) - } - - fn as_composite(&self) -> CompositeIndex { - CompositeIndex::new(self) - } - - fn name(&self) -> &str { - &self.name - } - - fn graph_entry(&self, local_pos: u32) -> CommitGraphEntry { - let offset = (local_pos as usize) * self.commit_graph_entry_size; - CommitGraphEntry { - data: &self.graph[offset..][..self.commit_graph_entry_size], - commit_id_length: self.commit_id_length, - change_id_length: self.change_id_length, - } - } - - fn lookup_entry(&self, lookup_pos: u32) -> CommitLookupEntry { - let offset = (lookup_pos as usize) * self.commit_lookup_entry_size; - CommitLookupEntry { - data: &self.lookup[offset..][..self.commit_lookup_entry_size], - commit_id_length: self.commit_id_length, - } - } - - fn overflow_parent(&self, overflow_pos: u32) -> IndexPosition { - let offset = (overflow_pos as usize) * 4; - IndexPosition( - (&self.overflow_parent[offset..][..4]) - .read_u32::() - .unwrap(), - ) - } - - fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &CommitId) -> Option { - if self.num_local_commits == 0 { - // Avoid overflow when subtracting 1 below - return None; - } - let mut low = 0; - let mut high = self.num_local_commits - 1; - - // binary search for the commit id - loop { - let mid = (low + high) / 2; - if high == low { - return Some(mid); - } - let entry = self.lookup_entry(mid); - if entry.commit_id_bytes() < prefix.as_bytes() { - low = mid + 1; - } else { - high = mid; - } - } - } -} - -impl Index for DefaultReadonlyIndex { - fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { - self.as_composite() - .shortest_unique_commit_id_prefix_len(commit_id) - } - - fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - self.as_composite().resolve_prefix(prefix) - } - - fn has_id(&self, commit_id: &CommitId) -> bool { - self.as_composite().has_id(commit_id) - } - - fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { - self.as_composite().is_ancestor(ancestor_id, descendant_id) - } - - fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { - self.as_composite().common_ancestors(set1, set2) - } - - fn heads(&self, candidates: &mut dyn Iterator) -> Vec { - self.as_composite().heads(candidates) - } - - fn topo_order(&self, input: &mut dyn Iterator) -> Vec { - self.as_composite().topo_order(input) - } - - fn evaluate_revset<'index>( - &'index self, - expression: &ResolvedExpression, - store: &Arc, - ) -> Result + 'index>, RevsetEvaluationError> { - self.as_composite().evaluate_revset(expression, store) - } -} - #[cfg(test)] mod tests { use std::ops::Range; + use itertools::Itertools; use smallvec::smallvec_inline; use test_case::test_case; + use super::mutable::MutableIndexSegment; use super::*; use crate::backend::{ChangeId, CommitId, ObjectId}; use crate::index::Index; diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs new file mode 100644 index 0000000000..bbb1ad96f0 --- /dev/null +++ b/lib/src/default_index/mutable.rs @@ -0,0 +1,492 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(missing_docs)] + +use std::any::Any; +use std::cmp::max; +use std::collections::BTreeMap; +use std::io; +use std::io::Write; +use std::ops::Bound; +use std::path::PathBuf; +use std::sync::Arc; + +use blake2::Blake2b512; +use byteorder::{LittleEndian, WriteBytesExt}; +use digest::Digest; +use itertools::Itertools; +use smallvec::SmallVec; +use tempfile::NamedTempFile; + +use super::composite::CompositeIndex; +use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment}; +use super::store::IndexLoadError; +use super::{IndexEntry, IndexPosition, IndexSegment, SmallIndexPositionsVec}; +use crate::backend::{ChangeId, CommitId, ObjectId}; +use crate::commit::Commit; +use crate::file_util::persist_content_addressed_temp_file; +use crate::index::{HexPrefix, Index, MutableIndex, PrefixResolution, ReadonlyIndex}; +use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; +use crate::store::Store; + +#[derive(Debug)] +struct MutableGraphEntry { + commit_id: CommitId, + change_id: ChangeId, + generation_number: u32, + parent_positions: SmallIndexPositionsVec, +} + +pub(super) struct MutableIndexSegment { + parent_file: Option>, + num_parent_commits: u32, + commit_id_length: usize, + change_id_length: usize, + graph: Vec, + lookup: BTreeMap, +} + +impl MutableIndexSegment { + pub(super) fn full(commit_id_length: usize, change_id_length: usize) -> Self { + Self { + parent_file: None, + num_parent_commits: 0, + commit_id_length, + change_id_length, + graph: vec![], + lookup: BTreeMap::new(), + } + } + + pub(super) fn incremental(parent_file: Arc) -> Self { + let num_parent_commits = parent_file.as_composite().num_commits(); + let commit_id_length = parent_file.commit_id_length(); + let change_id_length = parent_file.change_id_length(); + Self { + parent_file: Some(parent_file), + num_parent_commits, + commit_id_length, + change_id_length, + graph: vec![], + lookup: BTreeMap::new(), + } + } + + pub(super) fn as_composite(&self) -> CompositeIndex { + CompositeIndex::new(self) + } + + pub(super) fn add_commit(&mut self, commit: &Commit) { + self.add_commit_data( + commit.id().clone(), + commit.change_id().clone(), + commit.parent_ids(), + ); + } + + pub(super) fn add_commit_data( + &mut self, + commit_id: CommitId, + change_id: ChangeId, + parent_ids: &[CommitId], + ) { + if self.as_composite().has_id(&commit_id) { + return; + } + let mut entry = MutableGraphEntry { + commit_id, + change_id, + generation_number: 0, + parent_positions: SmallVec::new(), + }; + for parent_id in parent_ids { + let parent_entry = self + .as_composite() + .entry_by_id(parent_id) + .expect("parent commit is not indexed"); + entry.generation_number = max( + entry.generation_number, + parent_entry.generation_number() + 1, + ); + entry.parent_positions.push(parent_entry.pos); + } + self.lookup.insert( + entry.commit_id.clone(), + IndexPosition(self.graph.len() as u32 + self.num_parent_commits), + ); + self.graph.push(entry); + } + + pub(super) fn add_commits_from(&mut self, other_segment: &dyn IndexSegment) { + let other = CompositeIndex::new(other_segment); + for pos in other_segment.segment_num_parent_commits()..other.num_commits() { + let entry = other.entry_by_pos(IndexPosition(pos)); + let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec(); + self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids); + } + } + + pub(super) fn merge_in(&mut self, other: Arc) { + let mut maybe_own_ancestor = self.parent_file.clone(); + let mut maybe_other_ancestor = Some(other); + let mut files_to_add = vec![]; + loop { + if maybe_other_ancestor.is_none() { + break; + } + let other_ancestor = maybe_other_ancestor.as_ref().unwrap(); + if maybe_own_ancestor.is_none() { + files_to_add.push(other_ancestor.clone()); + maybe_other_ancestor = other_ancestor.segment_parent_file().cloned(); + continue; + } + let own_ancestor = maybe_own_ancestor.as_ref().unwrap(); + if own_ancestor.name() == other_ancestor.name() { + break; + } + if own_ancestor.as_composite().num_commits() + < other_ancestor.as_composite().num_commits() + { + files_to_add.push(other_ancestor.clone()); + maybe_other_ancestor = other_ancestor.segment_parent_file().cloned(); + } else { + maybe_own_ancestor = own_ancestor.segment_parent_file().cloned(); + } + } + + for file in files_to_add.iter().rev() { + self.add_commits_from(file.as_ref()); + } + } + + fn serialize(self) -> Vec { + assert_eq!(self.graph.len(), self.lookup.len()); + + let num_commits = self.graph.len() as u32; + + let mut buf = vec![]; + + if let Some(parent_file) = &self.parent_file { + buf.write_u32::(parent_file.name().len() as u32) + .unwrap(); + buf.write_all(parent_file.name().as_bytes()).unwrap(); + } else { + buf.write_u32::(0).unwrap(); + } + + buf.write_u32::(num_commits).unwrap(); + // We'll write the actual value later + let parent_overflow_offset = buf.len(); + buf.write_u32::(0_u32).unwrap(); + + let mut parent_overflow = vec![]; + for entry in self.graph { + let flags = 0; + buf.write_u32::(flags).unwrap(); + + buf.write_u32::(entry.generation_number) + .unwrap(); + + buf.write_u32::(entry.parent_positions.len() as u32) + .unwrap(); + let mut parent1_pos = IndexPosition(0); + let parent_overflow_pos = parent_overflow.len() as u32; + for (i, parent_pos) in entry.parent_positions.iter().enumerate() { + if i == 0 { + parent1_pos = *parent_pos; + } else { + parent_overflow.push(*parent_pos); + } + } + buf.write_u32::(parent1_pos.0).unwrap(); + buf.write_u32::(parent_overflow_pos).unwrap(); + + assert_eq!(entry.change_id.as_bytes().len(), self.change_id_length); + buf.write_all(entry.change_id.as_bytes()).unwrap(); + + assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length); + buf.write_all(entry.commit_id.as_bytes()).unwrap(); + } + + for (commit_id, pos) in self.lookup { + buf.write_all(commit_id.as_bytes()).unwrap(); + buf.write_u32::(pos.0).unwrap(); + } + + (&mut buf[parent_overflow_offset..][..4]) + .write_u32::(parent_overflow.len() as u32) + .unwrap(); + for parent_pos in parent_overflow { + buf.write_u32::(parent_pos.0).unwrap(); + } + + buf + } + + /// If the MutableIndex has more than half the commits of its parent + /// ReadonlyIndex, return MutableIndex with the commits from both. This + /// is done recursively, so the stack of index files has O(log n) files. + fn maybe_squash_with_ancestors(self) -> MutableIndexSegment { + let mut num_new_commits = self.segment_num_commits(); + let mut files_to_squash = vec![]; + let mut maybe_parent_file = self.parent_file.clone(); + let mut squashed; + loop { + match maybe_parent_file { + Some(parent_file) => { + // TODO: We should probably also squash if the parent file has less than N + // commits, regardless of how many (few) are in `self`. + if 2 * num_new_commits < parent_file.segment_num_commits() { + squashed = MutableIndexSegment::incremental(parent_file); + break; + } + num_new_commits += parent_file.segment_num_commits(); + files_to_squash.push(parent_file.clone()); + maybe_parent_file = parent_file.segment_parent_file().cloned(); + } + None => { + squashed = + MutableIndexSegment::full(self.commit_id_length, self.change_id_length); + break; + } + } + } + + if files_to_squash.is_empty() { + return self; + } + + for parent_file in files_to_squash.iter().rev() { + squashed.add_commits_from(parent_file.as_ref()); + } + squashed.add_commits_from(&self); + squashed + } + + pub(super) fn save_in(self, dir: PathBuf) -> io::Result> { + if self.segment_num_commits() == 0 && self.parent_file.is_some() { + return Ok(self.parent_file.unwrap()); + } + + let commit_id_length = self.commit_id_length; + let change_id_length = self.change_id_length; + + let buf = self.maybe_squash_with_ancestors().serialize(); + let mut hasher = Blake2b512::new(); + hasher.update(&buf); + let index_file_id_hex = hex::encode(hasher.finalize()); + let index_file_path = dir.join(&index_file_id_hex); + + let mut temp_file = NamedTempFile::new_in(&dir)?; + let file = temp_file.as_file_mut(); + file.write_all(&buf)?; + persist_content_addressed_temp_file(temp_file, index_file_path)?; + + ReadonlyIndexSegment::load_from( + &mut buf.as_slice(), + dir, + index_file_id_hex, + commit_id_length, + change_id_length, + ) + .map_err(|err| match err { + IndexLoadError::IndexCorrupt(err) => { + panic!("Just-created index file is corrupt: {err}") + } + IndexLoadError::IoError(err) => err, + }) + } +} + +impl IndexSegment for MutableIndexSegment { + fn segment_num_parent_commits(&self) -> u32 { + self.num_parent_commits + } + + fn segment_num_commits(&self) -> u32 { + self.graph.len() as u32 + } + + fn segment_parent_file(&self) -> Option<&Arc> { + self.parent_file.as_ref() + } + + fn segment_name(&self) -> Option { + None + } + + fn segment_commit_id_to_pos(&self, commit_id: &CommitId) -> Option { + self.lookup.get(commit_id).cloned() + } + + fn segment_commit_id_to_neighbor_positions( + &self, + commit_id: &CommitId, + ) -> (Option, Option) { + let prev_pos = self + .lookup + .range((Bound::Unbounded, Bound::Excluded(commit_id))) + .next_back() + .map(|(_, &pos)| pos); + let next_pos = self + .lookup + .range((Bound::Excluded(commit_id), Bound::Unbounded)) + .next() + .map(|(_, &pos)| pos); + (prev_pos, next_pos) + } + + fn segment_resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { + let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); + let mut matches = self + .lookup + .range((Bound::Included(&min_bytes_prefix), Bound::Unbounded)) + .map(|(id, _pos)| id) + .take_while(|&id| prefix.matches(id)) + .fuse(); + match (matches.next(), matches.next()) { + (Some(id), None) => PrefixResolution::SingleMatch(id.clone()), + (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, + (None, _) => PrefixResolution::NoMatch, + } + } + + fn segment_generation_number(&self, local_pos: u32) -> u32 { + self.graph[local_pos as usize].generation_number + } + + fn segment_commit_id(&self, local_pos: u32) -> CommitId { + self.graph[local_pos as usize].commit_id.clone() + } + + fn segment_change_id(&self, local_pos: u32) -> ChangeId { + self.graph[local_pos as usize].change_id.clone() + } + + fn segment_num_parents(&self, local_pos: u32) -> u32 { + self.graph[local_pos as usize].parent_positions.len() as u32 + } + + fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec { + self.graph[local_pos as usize].parent_positions.clone() + } + + fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry { + IndexEntry { + source: self, + local_pos, + pos, + } + } +} + +/// In-memory mutable records for the on-disk commit index backend. +pub struct DefaultMutableIndex(MutableIndexSegment); + +impl DefaultMutableIndex { + #[cfg(test)] + pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self { + let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length); + DefaultMutableIndex(mutable_segment) + } + + pub(super) fn incremental(parent_file: Arc) -> Self { + let mutable_segment = MutableIndexSegment::incremental(parent_file); + DefaultMutableIndex(mutable_segment) + } + + pub fn as_composite(&self) -> CompositeIndex { + self.0.as_composite() + } + + #[cfg(test)] + pub(crate) fn add_commit_data( + &mut self, + commit_id: CommitId, + change_id: ChangeId, + parent_ids: &[CommitId], + ) { + self.0.add_commit_data(commit_id, change_id, parent_ids); + } + + pub(super) fn save_in(self, dir: PathBuf) -> io::Result> { + self.0.save_in(dir) + } +} + +impl Index for DefaultMutableIndex { + fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { + self.as_composite() + .shortest_unique_commit_id_prefix_len(commit_id) + } + + fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { + self.as_composite().resolve_prefix(prefix) + } + + fn has_id(&self, commit_id: &CommitId) -> bool { + self.as_composite().has_id(commit_id) + } + + fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { + self.as_composite().is_ancestor(ancestor_id, descendant_id) + } + + fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { + self.as_composite().common_ancestors(set1, set2) + } + + fn heads(&self, candidates: &mut dyn Iterator) -> Vec { + self.as_composite().heads(candidates) + } + + fn topo_order(&self, input: &mut dyn Iterator) -> Vec { + self.as_composite().topo_order(input) + } + + fn evaluate_revset<'index>( + &'index self, + expression: &ResolvedExpression, + store: &Arc, + ) -> Result + 'index>, RevsetEvaluationError> { + self.as_composite().evaluate_revset(expression, store) + } +} + +impl MutableIndex for DefaultMutableIndex { + fn as_any(&self) -> &dyn Any { + self + } + + fn into_any(self: Box) -> Box { + Box::new(*self) + } + + fn as_index(&self) -> &dyn Index { + self + } + + fn add_commit(&mut self, commit: &Commit) { + self.0.add_commit(commit); + } + + fn merge_in(&mut self, other: &dyn ReadonlyIndex) { + let other = other + .as_any() + .downcast_ref::() + .expect("index to merge in must be a DefaultReadonlyIndex"); + self.0.merge_in(other.as_segment().clone()); + } +} diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs new file mode 100644 index 0000000000..342c8f821d --- /dev/null +++ b/lib/src/default_index/readonly.rs @@ -0,0 +1,450 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(missing_docs)] + +use std::any::Any; +use std::cmp::Ordering; +use std::fmt::{Debug, Formatter}; +use std::fs::File; +use std::io::Read; +use std::path::PathBuf; +use std::sync::Arc; + +use byteorder::{LittleEndian, ReadBytesExt}; +use smallvec::SmallVec; + +use super::composite::CompositeIndex; +use super::mutable::DefaultMutableIndex; +use super::store::IndexLoadError; +use super::{IndexEntry, IndexPosition, IndexSegment, SmallIndexPositionsVec}; +use crate::backend::{ChangeId, CommitId, ObjectId}; +use crate::index::{HexPrefix, Index, MutableIndex, PrefixResolution, ReadonlyIndex}; +use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; +use crate::store::Store; + +struct CommitGraphEntry<'a> { + data: &'a [u8], + commit_id_length: usize, + change_id_length: usize, +} + +// TODO: Add pointers to ancestors further back, like a skip list. Clear the +// lowest set bit to determine which generation number the pointers point to. +impl CommitGraphEntry<'_> { + fn size(commit_id_length: usize, change_id_length: usize) -> usize { + 20 + commit_id_length + change_id_length + } + + fn generation_number(&self) -> u32 { + (&self.data[4..]).read_u32::().unwrap() + } + + fn num_parents(&self) -> u32 { + (&self.data[8..]).read_u32::().unwrap() + } + + fn parent1_pos(&self) -> IndexPosition { + IndexPosition((&self.data[12..]).read_u32::().unwrap()) + } + + fn parent2_overflow_pos(&self) -> u32 { + (&self.data[16..]).read_u32::().unwrap() + } + + // TODO: Consider storing the change ids in a separate table. That table could + // be sorted by change id and have the end index into a list as value. That list + // would be the concatenation of all index positions associated with the change. + // Possible advantages: avoids duplicating change ids; smaller main graph leads + // to better cache locality when walking it; ability to quickly find all + // commits associated with a change id. + fn change_id(&self) -> ChangeId { + ChangeId::new(self.data[20..][..self.change_id_length].to_vec()) + } + + fn commit_id(&self) -> CommitId { + CommitId::from_bytes(&self.data[20 + self.change_id_length..][..self.commit_id_length]) + } +} + +struct CommitLookupEntry<'a> { + data: &'a [u8], + commit_id_length: usize, +} + +impl CommitLookupEntry<'_> { + fn size(commit_id_length: usize) -> usize { + commit_id_length + 4 + } + + fn commit_id(&self) -> CommitId { + CommitId::from_bytes(self.commit_id_bytes()) + } + + // might be better to add borrowed version of CommitId + fn commit_id_bytes(&self) -> &[u8] { + &self.data[0..self.commit_id_length] + } + + fn pos(&self) -> IndexPosition { + IndexPosition( + (&self.data[self.commit_id_length..][..4]) + .read_u32::() + .unwrap(), + ) + } +} + +// File format: +// u32: number of entries +// u32: number of parent overflow entries +// for each entry, in some topological order with parents first: +// u32: generation number +// u32: number of parents +// u32: position in this table for parent 1 +// u32: position in the overflow table of parent 2 +// : commit id +// for each entry, sorted by commit id: +// : commit id +// u32: position in the entry table above +// TODO: add a version number +// TODO: replace the table by a trie so we don't have to repeat the full commit +// ids +// TODO: add a fanout table like git's commit graph has? +pub(super) struct ReadonlyIndexSegment { + parent_file: Option>, + num_parent_commits: u32, + name: String, + commit_id_length: usize, + change_id_length: usize, + commit_graph_entry_size: usize, + commit_lookup_entry_size: usize, + // Number of commits not counting the parent file + num_local_commits: u32, + graph: Vec, + lookup: Vec, + overflow_parent: Vec, +} + +impl Debug for ReadonlyIndexSegment { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + f.debug_struct("ReadonlyIndexSegment") + .field("name", &self.name) + .field("parent_file", &self.parent_file) + .finish() + } +} + +impl ReadonlyIndexSegment { + pub(super) fn load_from( + file: &mut dyn Read, + dir: PathBuf, + name: String, + commit_id_length: usize, + change_id_length: usize, + ) -> Result, IndexLoadError> { + let parent_filename_len = file.read_u32::()?; + let num_parent_commits; + let maybe_parent_file; + if parent_filename_len > 0 { + let mut parent_filename_bytes = vec![0; parent_filename_len as usize]; + file.read_exact(&mut parent_filename_bytes)?; + let parent_filename = String::from_utf8(parent_filename_bytes).unwrap(); + let parent_file_path = dir.join(&parent_filename); + let mut index_file = File::open(parent_file_path).unwrap(); + let parent_file = ReadonlyIndexSegment::load_from( + &mut index_file, + dir, + parent_filename, + commit_id_length, + change_id_length, + )?; + num_parent_commits = parent_file.num_parent_commits + parent_file.num_local_commits; + maybe_parent_file = Some(parent_file); + } else { + num_parent_commits = 0; + maybe_parent_file = None; + }; + let num_commits = file.read_u32::()?; + let num_parent_overflow_entries = file.read_u32::()?; + let mut data = vec![]; + file.read_to_end(&mut data)?; + let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length, change_id_length); + let graph_size = (num_commits as usize) * commit_graph_entry_size; + let commit_lookup_entry_size = CommitLookupEntry::size(commit_id_length); + let lookup_size = (num_commits as usize) * commit_lookup_entry_size; + let parent_overflow_size = (num_parent_overflow_entries as usize) * 4; + let expected_size = graph_size + lookup_size + parent_overflow_size; + if data.len() != expected_size { + return Err(IndexLoadError::IndexCorrupt(name)); + } + let overflow_parent = data.split_off(graph_size + lookup_size); + let lookup = data.split_off(graph_size); + let graph = data; + Ok(Arc::new(ReadonlyIndexSegment { + parent_file: maybe_parent_file, + num_parent_commits, + name, + commit_id_length, + change_id_length, + commit_graph_entry_size, + commit_lookup_entry_size, + num_local_commits: num_commits, + graph, + lookup, + overflow_parent, + })) + } + + pub(super) fn as_composite(&self) -> CompositeIndex { + CompositeIndex::new(self) + } + + pub(super) fn name(&self) -> &str { + &self.name + } + + pub(super) fn commit_id_length(&self) -> usize { + self.commit_id_length + } + + pub(super) fn change_id_length(&self) -> usize { + self.change_id_length + } + + fn graph_entry(&self, local_pos: u32) -> CommitGraphEntry { + let offset = (local_pos as usize) * self.commit_graph_entry_size; + CommitGraphEntry { + data: &self.graph[offset..][..self.commit_graph_entry_size], + commit_id_length: self.commit_id_length, + change_id_length: self.change_id_length, + } + } + + fn lookup_entry(&self, lookup_pos: u32) -> CommitLookupEntry { + let offset = (lookup_pos as usize) * self.commit_lookup_entry_size; + CommitLookupEntry { + data: &self.lookup[offset..][..self.commit_lookup_entry_size], + commit_id_length: self.commit_id_length, + } + } + + fn overflow_parent(&self, overflow_pos: u32) -> IndexPosition { + let offset = (overflow_pos as usize) * 4; + IndexPosition( + (&self.overflow_parent[offset..][..4]) + .read_u32::() + .unwrap(), + ) + } + + fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &CommitId) -> Option { + if self.num_local_commits == 0 { + // Avoid overflow when subtracting 1 below + return None; + } + let mut low = 0; + let mut high = self.num_local_commits - 1; + + // binary search for the commit id + loop { + let mid = (low + high) / 2; + if high == low { + return Some(mid); + } + let entry = self.lookup_entry(mid); + if entry.commit_id_bytes() < prefix.as_bytes() { + low = mid + 1; + } else { + high = mid; + } + } + } +} + +impl IndexSegment for ReadonlyIndexSegment { + fn segment_num_parent_commits(&self) -> u32 { + self.num_parent_commits + } + + fn segment_num_commits(&self) -> u32 { + self.num_local_commits + } + + fn segment_parent_file(&self) -> Option<&Arc> { + self.parent_file.as_ref() + } + + fn segment_name(&self) -> Option { + Some(self.name.clone()) + } + + fn segment_commit_id_to_pos(&self, commit_id: &CommitId) -> Option { + let lookup_pos = self.commit_id_byte_prefix_to_lookup_pos(commit_id)?; + let entry = self.lookup_entry(lookup_pos); + (&entry.commit_id() == commit_id).then(|| entry.pos()) + } + + fn segment_commit_id_to_neighbor_positions( + &self, + commit_id: &CommitId, + ) -> (Option, Option) { + if let Some(lookup_pos) = self.commit_id_byte_prefix_to_lookup_pos(commit_id) { + let entry_commit_id = self.lookup_entry(lookup_pos).commit_id(); + let (prev_lookup_pos, next_lookup_pos) = match entry_commit_id.cmp(commit_id) { + Ordering::Less => { + assert_eq!(lookup_pos + 1, self.num_local_commits); + (Some(lookup_pos), None) + } + Ordering::Equal => { + let succ = ((lookup_pos + 1)..self.num_local_commits).next(); + (lookup_pos.checked_sub(1), succ) + } + Ordering::Greater => (lookup_pos.checked_sub(1), Some(lookup_pos)), + }; + let prev_pos = prev_lookup_pos.map(|p| self.lookup_entry(p).pos()); + let next_pos = next_lookup_pos.map(|p| self.lookup_entry(p).pos()); + (prev_pos, next_pos) + } else { + (None, None) + } + } + + fn segment_resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { + let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); + let lookup_pos = self + .commit_id_byte_prefix_to_lookup_pos(&min_bytes_prefix) + .unwrap_or(self.num_local_commits); + let mut matches = (lookup_pos..self.num_local_commits) + .map(|pos| self.lookup_entry(pos).commit_id()) + .take_while(|id| prefix.matches(id)) + .fuse(); + match (matches.next(), matches.next()) { + (Some(id), None) => PrefixResolution::SingleMatch(id), + (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, + (None, _) => PrefixResolution::NoMatch, + } + } + + fn segment_generation_number(&self, local_pos: u32) -> u32 { + self.graph_entry(local_pos).generation_number() + } + + fn segment_commit_id(&self, local_pos: u32) -> CommitId { + self.graph_entry(local_pos).commit_id() + } + + fn segment_change_id(&self, local_pos: u32) -> ChangeId { + self.graph_entry(local_pos).change_id() + } + + fn segment_num_parents(&self, local_pos: u32) -> u32 { + self.graph_entry(local_pos).num_parents() + } + + fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec { + let graph_entry = self.graph_entry(local_pos); + let mut parent_entries = SmallVec::with_capacity(graph_entry.num_parents() as usize); + if graph_entry.num_parents() >= 1 { + parent_entries.push(graph_entry.parent1_pos()); + } + if graph_entry.num_parents() >= 2 { + let mut parent_overflow_pos = graph_entry.parent2_overflow_pos(); + for _ in 1..graph_entry.num_parents() { + parent_entries.push(self.overflow_parent(parent_overflow_pos)); + parent_overflow_pos += 1; + } + } + parent_entries + } + + fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry { + IndexEntry { + source: self, + local_pos, + pos, + } + } +} + +/// Commit index backend which stores data on local disk. +#[derive(Debug)] +pub struct DefaultReadonlyIndex(Arc); + +impl DefaultReadonlyIndex { + pub(super) fn from_segment(segment: Arc) -> Self { + DefaultReadonlyIndex(segment) + } + + pub(super) fn as_segment(&self) -> &Arc { + &self.0 + } + + pub fn as_composite(&self) -> CompositeIndex { + self.0.as_composite() + } +} + +impl Index for DefaultReadonlyIndex { + fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { + self.as_composite() + .shortest_unique_commit_id_prefix_len(commit_id) + } + + fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { + self.as_composite().resolve_prefix(prefix) + } + + fn has_id(&self, commit_id: &CommitId) -> bool { + self.as_composite().has_id(commit_id) + } + + fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { + self.as_composite().is_ancestor(ancestor_id, descendant_id) + } + + fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { + self.as_composite().common_ancestors(set1, set2) + } + + fn heads(&self, candidates: &mut dyn Iterator) -> Vec { + self.as_composite().heads(candidates) + } + + fn topo_order(&self, input: &mut dyn Iterator) -> Vec { + self.as_composite().topo_order(input) + } + + fn evaluate_revset<'index>( + &'index self, + expression: &ResolvedExpression, + store: &Arc, + ) -> Result + 'index>, RevsetEvaluationError> { + self.as_composite().evaluate_revset(expression, store) + } +} + +impl ReadonlyIndex for DefaultReadonlyIndex { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_index(&self) -> &dyn Index { + self + } + + fn start_modification(&self) -> Box { + Box::new(DefaultMutableIndex::incremental(self.0.clone())) + } +} diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs index 0a347972eb..9c3142298d 100644 --- a/lib/src/default_index/store.rs +++ b/lib/src/default_index/store.rs @@ -25,7 +25,8 @@ use itertools::Itertools; use tempfile::NamedTempFile; use thiserror::Error; -use super::{DefaultMutableIndex, DefaultReadonlyIndex, MutableIndexSegment, ReadonlyIndexSegment}; +use super::mutable::{DefaultMutableIndex, MutableIndexSegment}; +use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment}; use crate::backend::{CommitId, ObjectId}; use crate::commit::CommitByCommitterTimestamp; use crate::dag_walk; @@ -235,7 +236,7 @@ impl IndexStore for DefaultIndexStore { } else { self.index_at_operation(store, op).unwrap() }; - Box::new(DefaultReadonlyIndex(index_segment)) + Box::new(DefaultReadonlyIndex::from_segment(index_segment)) } fn write_index( @@ -247,7 +248,7 @@ impl IndexStore for DefaultIndexStore { .into_any() .downcast::() .expect("index to merge in must be a DefaultMutableIndex"); - let index_segment = index.0.save_in(self.dir.clone()).map_err(|err| { + let index_segment = index.save_in(self.dir.clone()).map_err(|err| { IndexWriteError::Other(format!("Failed to write commit index file: {err}")) })?; self.associate_file_with_operation(&index_segment, op_id) @@ -256,6 +257,6 @@ impl IndexStore for DefaultIndexStore { "Failed to associate commit index file with a operation {op_id:?}: {err}" )) })?; - Ok(Box::new(DefaultReadonlyIndex(index_segment))) + Ok(Box::new(DefaultReadonlyIndex::from_segment(index_segment))) } }