From 466ec2faf278dac853b45e171336e4b7e16b8f3c Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Tue, 19 Dec 2023 20:09:23 +0900 Subject: [PATCH 1/2] index: leverage ancestor iterator to collect segments to be squashed I think "for" loop is easier to follow. Maybe it could be rewritten further to .find_map() loop, but that would be too clever. I also made ancestor_index_segments() pub(super) since it doesn't make sense to only provide ancestor_files_without_local(). --- lib/src/default_index/composite.rs | 8 +++++-- lib/src/default_index/mutable.rs | 34 ++++++++++++------------------ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 943a084c4d..67903921c6 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -91,12 +91,16 @@ impl<'a> CompositeIndex<'a> { CompositeIndex(segment) } - fn ancestor_files_without_local(&self) -> impl Iterator> { + /// Iterates parent and its ancestor readonly index segments. + pub(super) fn ancestor_files_without_local( + &self, + ) -> impl Iterator> { let parent_file = self.0.segment_parent_file(); iter::successors(parent_file, |file| file.segment_parent_file()) } - fn ancestor_index_segments(&self) -> impl Iterator { + /// Iterates self and its ancestor index segments. + pub(super) fn ancestor_index_segments(&self) -> impl Iterator { iter::once(self.0).chain( self.ancestor_files_without_local() .map(|file| file.as_ref() as &dyn IndexSegment), diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 3b45036f07..55ae68c87b 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -240,33 +240,27 @@ impl MutableIndexSegment { fn maybe_squash_with_ancestors(self) -> MutableIndexSegment { let mut num_new_commits = self.segment_num_commits(); let mut files_to_squash = vec![]; - let mut maybe_parent_file = self.parent_file.clone(); - let mut squashed; - loop { - match maybe_parent_file { - Some(parent_file) => { - // TODO: We should probably also squash if the parent file has less than N - // commits, regardless of how many (few) are in `self`. - if 2 * num_new_commits < parent_file.segment_num_commits() { - squashed = MutableIndexSegment::incremental(parent_file); - break; - } - num_new_commits += parent_file.segment_num_commits(); - files_to_squash.push(parent_file.clone()); - maybe_parent_file = parent_file.segment_parent_file().cloned(); - } - None => { - squashed = - MutableIndexSegment::full(self.commit_id_length, self.change_id_length); - break; - } + let mut base_parent_file = None; + for parent_file in self.as_composite().ancestor_files_without_local() { + // TODO: We should probably also squash if the parent file has less than N + // commits, regardless of how many (few) are in `self`. + if 2 * num_new_commits < parent_file.segment_num_commits() { + base_parent_file = Some(parent_file.clone()); + break; } + num_new_commits += parent_file.segment_num_commits(); + files_to_squash.push(parent_file.clone()); } if files_to_squash.is_empty() { return self; } + let mut squashed = if let Some(parent_file) = base_parent_file { + MutableIndexSegment::incremental(parent_file) + } else { + MutableIndexSegment::full(self.commit_id_length, self.change_id_length) + }; for parent_file in files_to_squash.iter().rev() { squashed.add_commits_from(parent_file.as_ref()); } From 8ee12c7c0d5d5f34803d4c01101ee50493aa29f5 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Tue, 19 Dec 2023 20:57:54 +0900 Subject: [PATCH 2/2] index: let caller of segment-level save-in() squash segments explicitly There are many unit tests that call mutable_segment.save_in(), but I don't think these callers expect that the segment file could be squashed depending on the size. Let's make it caller's responsibility. maybe_squash_with_ancestors() should be cheap if segment_num_commits() == 0, so it's okay to call it before checking the emptiness. --- lib/src/default_index/mutable.rs | 15 +++++++-------- lib/src/default_index/store.rs | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 55ae68c87b..7260225ebd 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -273,11 +273,10 @@ impl MutableIndexSegment { return Ok(self.parent_file.unwrap()); } - let squashed = self.maybe_squash_with_ancestors(); let mut buf = Vec::new(); - squashed.serialize_parent_filename(&mut buf); + self.serialize_parent_filename(&mut buf); let local_entries_offset = buf.len(); - squashed.serialize_local_entries(&mut buf); + self.serialize_local_entries(&mut buf); let mut hasher = Blake2b512::new(); hasher.update(&buf); let index_file_id_hex = hex::encode(hasher.finalize()); @@ -291,9 +290,9 @@ impl MutableIndexSegment { Ok(ReadonlyIndexSegment::load_with_parent_file( &mut &buf[local_entries_offset..], index_file_id_hex, - squashed.parent_file, - squashed.commit_id_length, - squashed.change_id_length, + self.parent_file, + self.commit_id_length, + self.change_id_length, ) .expect("in-memory index data should be valid and readable")) } @@ -405,8 +404,8 @@ impl DefaultMutableIndex { self.0.add_commit_data(commit_id, change_id, parent_ids); } - pub(super) fn save_in(self, dir: &Path) -> io::Result> { - self.0.save_in(dir) + pub(super) fn squash_and_save_in(self, dir: &Path) -> io::Result> { + self.0.maybe_squash_with_ancestors().save_in(dir) } } diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs index 4c59d709c5..0d188a2c12 100644 --- a/lib/src/default_index/store.rs +++ b/lib/src/default_index/store.rs @@ -194,7 +194,7 @@ impl DefaultIndexStore { op_id: &OperationId, ) -> Result, DefaultIndexStoreError> { let index_segment = mutable_index - .save_in(&self.dir) + .squash_and_save_in(&self.dir) .map_err(DefaultIndexStoreError::SaveIndex)?; self.associate_file_with_operation(&index_segment, op_id) .map_err(|source| DefaultIndexStoreError::AssociateIndex {