From e40f0f08dd4a63713839f8b8f5f87092cf10bc97 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 11 Dec 2023 18:47:02 +0900 Subject: [PATCH 1/5] index: move default_index_store.rs to sub directory named default_index default_index_store.rs is relatively big, and it contains types and impls in arbitrary order. Let's split them into sub modules. After everything moved, mod.rs will only contain tests. --- cli/src/commands/debug.rs | 2 +- lib/src/{default_index_store.rs => default_index/mod.rs} | 0 lib/src/default_revset_engine.rs | 4 ++-- lib/src/default_revset_graph_iterator.rs | 2 +- lib/src/lib.rs | 2 +- lib/src/repo.rs | 2 +- lib/tests/test_default_revset_graph_iterator.rs | 2 +- lib/tests/test_index.rs | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) rename lib/src/{default_index_store.rs => default_index/mod.rs} (100%) diff --git a/cli/src/commands/debug.rs b/cli/src/commands/debug.rs index 8583f29e22..c6f3cb8f5c 100644 --- a/cli/src/commands/debug.rs +++ b/cli/src/commands/debug.rs @@ -18,7 +18,7 @@ use std::io::Write as _; use clap::Subcommand; use jj_lib::backend::ObjectId; -use jj_lib::default_index_store::{DefaultIndexStore, DefaultReadonlyIndex}; +use jj_lib::default_index::{DefaultIndexStore, DefaultReadonlyIndex}; use jj_lib::local_working_copy::LocalWorkingCopy; use jj_lib::revset; use jj_lib::working_copy::WorkingCopy; diff --git a/lib/src/default_index_store.rs b/lib/src/default_index/mod.rs similarity index 100% rename from lib/src/default_index_store.rs rename to lib/src/default_index/mod.rs diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 6b5644149a..49b06b9fb3 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use itertools::Itertools; use crate::backend::{ChangeId, CommitId, MillisSinceEpoch}; -use crate::default_index_store::{CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition}; +use crate::default_index::{CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition}; use crate::default_revset_graph_iterator::RevsetGraphIterator; use crate::id_prefix::{IdIndex, IdIndexSource, IdIndexSourceEntry}; use crate::index::{HexPrefix, PrefixResolution}; @@ -857,7 +857,7 @@ fn has_diff_from_parent( mod tests { use super::*; use crate::backend::{ChangeId, CommitId, ObjectId}; - use crate::default_index_store::DefaultMutableIndex; + use crate::default_index::DefaultMutableIndex; /// Generator of unique 16-byte ChangeId excluding root id fn change_id_generator() -> impl FnMut() -> ChangeId { diff --git a/lib/src/default_revset_graph_iterator.rs b/lib/src/default_revset_graph_iterator.rs index 4229608048..78d3cf825a 100644 --- a/lib/src/default_revset_graph_iterator.rs +++ b/lib/src/default_revset_graph_iterator.rs @@ -18,7 +18,7 @@ use std::cmp::{min, Ordering}; use std::collections::{BTreeMap, HashSet}; use crate::backend::CommitId; -use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; +use crate::default_index::{CompositeIndex, IndexEntry, IndexPosition}; use crate::revset_graph::{RevsetGraphEdge, RevsetGraphEdgeType}; /// Like `RevsetGraphEdge`, but stores `IndexPosition` instead. diff --git a/lib/src/lib.rs b/lib/src/lib.rs index cf649466e5..3f09ec6f4a 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -25,7 +25,7 @@ pub mod commit; pub mod commit_builder; pub mod conflicts; pub mod dag_walk; -pub mod default_index_store; +pub mod default_index; pub mod default_revset_engine; pub mod default_revset_graph_iterator; pub mod default_submodule_store; diff --git a/lib/src/repo.rs b/lib/src/repo.rs index 76810dc2f0..6fd87ee437 100644 --- a/lib/src/repo.rs +++ b/lib/src/repo.rs @@ -35,7 +35,7 @@ use crate::backend::{ }; use crate::commit::{Commit, CommitByCommitterTimestamp}; use crate::commit_builder::CommitBuilder; -use crate::default_index_store::DefaultIndexStore; +use crate::default_index::DefaultIndexStore; use crate::default_submodule_store::DefaultSubmoduleStore; use crate::file_util::{IoResultExt as _, PathError}; use crate::git_backend::GitBackend; diff --git a/lib/tests/test_default_revset_graph_iterator.rs b/lib/tests/test_default_revset_graph_iterator.rs index 7a3912d1d1..60b5666809 100644 --- a/lib/tests/test_default_revset_graph_iterator.rs +++ b/lib/tests/test_default_revset_graph_iterator.rs @@ -14,7 +14,7 @@ use itertools::Itertools; use jj_lib::commit::Commit; -use jj_lib::default_index_store::DefaultReadonlyIndex; +use jj_lib::default_index::DefaultReadonlyIndex; use jj_lib::default_revset_engine::{evaluate, RevsetImpl}; use jj_lib::repo::{ReadonlyRepo, Repo as _}; use jj_lib::revset::ResolvedExpression; diff --git a/lib/tests/test_index.rs b/lib/tests/test_index.rs index 2370169211..3028328d6e 100644 --- a/lib/tests/test_index.rs +++ b/lib/tests/test_index.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use jj_lib::backend::CommitId; use jj_lib::commit::Commit; use jj_lib::commit_builder::CommitBuilder; -use jj_lib::default_index_store::{ +use jj_lib::default_index::{ CompositeIndex, DefaultMutableIndex, DefaultReadonlyIndex, IndexPosition, }; use jj_lib::index::Index as _; From f2eecf2831b5d8a4363011b3ca072a49b28a5423 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 11 Dec 2023 18:52:32 +0900 Subject: [PATCH 2/5] index: split DefaultIndexStore and Load/StoreError types to "store" module IndexLoadError isn't store-specific, but I think it's better to put I/O stuff in the store module. --- lib/src/default_index/mod.rs | 243 +----------------------------- lib/src/default_index/store.rs | 261 +++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 235 deletions(-) create mode 100644 lib/src/default_index/store.rs diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index 22d1127a73..43a39167ff 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -14,6 +14,8 @@ #![allow(missing_docs)] +mod store; + use std::any::Any; use std::cmp::{max, min, Ordering, Reverse}; use std::collections::{BTreeMap, BTreeSet, BinaryHeap, Bound, HashMap, HashSet}; @@ -23,9 +25,9 @@ use std::hash::{Hash, Hasher}; use std::io::{Read, Write}; use std::iter::FusedIterator; use std::ops::Range; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::sync::Arc; -use std::{fs, io, iter}; +use std::{io, iter}; use blake2::Blake2b512; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -33,236 +35,15 @@ use digest::Digest; use itertools::Itertools; use smallvec::SmallVec; use tempfile::NamedTempFile; -use thiserror::Error; +pub use self::store::{DefaultIndexStore, DefaultIndexStoreError, IndexLoadError}; use crate::backend::{ChangeId, CommitId, ObjectId}; -use crate::commit::{Commit, CommitByCommitterTimestamp}; +use crate::commit::Commit; use crate::file_util::persist_content_addressed_temp_file; -use crate::index::{ - HexPrefix, Index, IndexStore, IndexWriteError, MutableIndex, PrefixResolution, ReadonlyIndex, -}; -use crate::op_store::{OpStoreError, OperationId}; -use crate::operation::Operation; +use crate::index::{HexPrefix, Index, MutableIndex, PrefixResolution, ReadonlyIndex}; use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; use crate::store::Store; -use crate::{backend, dag_walk, default_revset_engine}; - -#[derive(Debug, Error)] -pub enum DefaultIndexStoreError { - #[error(transparent)] - Io(#[from] io::Error), - #[error(transparent)] - OpStore(#[from] OpStoreError), -} - -#[derive(Debug)] -pub struct DefaultIndexStore { - dir: PathBuf, -} - -impl DefaultIndexStore { - pub fn name() -> &'static str { - "default" - } - - pub fn init(dir: &Path) -> Self { - std::fs::create_dir(dir.join("operations")).unwrap(); - DefaultIndexStore { - dir: dir.to_owned(), - } - } - - pub fn load(dir: &Path) -> DefaultIndexStore { - DefaultIndexStore { - dir: dir.to_owned(), - } - } - - pub fn reinit(&self) { - let op_dir = self.dir.join("operations"); - std::fs::remove_dir_all(&op_dir).unwrap(); - std::fs::create_dir(op_dir).unwrap(); - } - - fn load_index_at_operation( - &self, - commit_id_length: usize, - change_id_length: usize, - op_id: &OperationId, - ) -> Result, IndexLoadError> { - let op_id_file = self.dir.join("operations").join(op_id.hex()); - let buf = fs::read(op_id_file).unwrap(); - let index_file_id_hex = String::from_utf8(buf).unwrap(); - let index_file_path = self.dir.join(&index_file_id_hex); - let mut index_file = File::open(index_file_path).unwrap(); - ReadonlyIndexSegment::load_from( - &mut index_file, - self.dir.to_owned(), - index_file_id_hex, - commit_id_length, - change_id_length, - ) - } - - #[tracing::instrument(skip(self, store))] - fn index_at_operation( - &self, - store: &Arc, - operation: &Operation, - ) -> Result, DefaultIndexStoreError> { - let view = operation.view()?; - let operations_dir = self.dir.join("operations"); - let commit_id_length = store.commit_id_length(); - let change_id_length = store.change_id_length(); - let mut new_heads = view.heads().clone(); - let mut parent_op_id: Option = None; - for op in dag_walk::dfs_ok( - [Ok(operation.clone())], - |op: &Operation| op.id().clone(), - |op: &Operation| op.parents().collect_vec(), - ) { - let op = op?; - if operations_dir.join(op.id().hex()).is_file() { - if parent_op_id.is_none() { - parent_op_id = Some(op.id().clone()) - } - } else { - for head in op.view()?.heads() { - new_heads.insert(head.clone()); - } - } - } - let mut data; - let maybe_parent_file; - match parent_op_id { - None => { - maybe_parent_file = None; - data = MutableIndexSegment::full(commit_id_length, change_id_length); - } - Some(parent_op_id) => { - let parent_file = self - .load_index_at_operation(commit_id_length, change_id_length, &parent_op_id) - .unwrap(); - maybe_parent_file = Some(parent_file.clone()); - data = MutableIndexSegment::incremental(parent_file) - } - } - - tracing::info!( - ?maybe_parent_file, - new_heads_count = new_heads.len(), - "indexing commits reachable from historical heads" - ); - // Build a list of ancestors of heads where parents and predecessors come after - // the commit itself. - let parent_file_has_id = |id: &CommitId| { - maybe_parent_file - .as_ref() - .map_or(false, |segment| segment.as_composite().has_id(id)) - }; - let commits = dag_walk::topo_order_reverse_ord( - new_heads - .iter() - .filter(|&id| !parent_file_has_id(id)) - .map(|id| store.get_commit(id).unwrap()) - .map(CommitByCommitterTimestamp), - |CommitByCommitterTimestamp(commit)| commit.id().clone(), - |CommitByCommitterTimestamp(commit)| { - itertools::chain(commit.parent_ids(), commit.predecessor_ids()) - .filter(|&id| !parent_file_has_id(id)) - .map(|id| store.get_commit(id).unwrap()) - .map(CommitByCommitterTimestamp) - .collect_vec() - }, - ); - for CommitByCommitterTimestamp(commit) in commits.iter().rev() { - data.add_commit(commit); - } - - let index_file = data.save_in(self.dir.clone())?; - self.associate_file_with_operation(&index_file, operation.id())?; - tracing::info!( - ?index_file, - commits_count = commits.len(), - "saved new index file" - ); - - Ok(index_file) - } - - /// Records a link from the given operation to the this index version. - fn associate_file_with_operation( - &self, - index: &ReadonlyIndexSegment, - op_id: &OperationId, - ) -> io::Result<()> { - let mut temp_file = NamedTempFile::new_in(&self.dir)?; - let file = temp_file.as_file_mut(); - file.write_all(index.name().as_bytes())?; - persist_content_addressed_temp_file( - temp_file, - self.dir.join("operations").join(op_id.hex()), - )?; - Ok(()) - } -} - -impl IndexStore for DefaultIndexStore { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - Self::name() - } - - fn get_index_at_op(&self, op: &Operation, store: &Arc) -> Box { - let op_id_hex = op.id().hex(); - let op_id_file = self.dir.join("operations").join(op_id_hex); - let index_segment = if op_id_file.exists() { - match self.load_index_at_operation( - store.commit_id_length(), - store.change_id_length(), - op.id(), - ) { - Err(IndexLoadError::IndexCorrupt(_)) => { - // If the index was corrupt (maybe it was written in a different format), - // we just reindex. - // TODO: Move this message to a callback or something. - println!("The index was corrupt (maybe the format has changed). Reindexing..."); - std::fs::remove_dir_all(self.dir.join("operations")).unwrap(); - std::fs::create_dir(self.dir.join("operations")).unwrap(); - self.index_at_operation(store, op).unwrap() - } - result => result.unwrap(), - } - } else { - self.index_at_operation(store, op).unwrap() - }; - Box::new(DefaultReadonlyIndex(index_segment)) - } - - fn write_index( - &self, - index: Box, - op_id: &OperationId, - ) -> Result, IndexWriteError> { - let index = index - .into_any() - .downcast::() - .expect("index to merge in must be a DefaultMutableIndex"); - let index_segment = index.0.save_in(self.dir.clone()).map_err(|err| { - IndexWriteError::Other(format!("Failed to write commit index file: {err}")) - })?; - self.associate_file_with_operation(&index_segment, op_id) - .map_err(|err| { - IndexWriteError::Other(format!( - "Failed to associate commit index file with a operation {op_id:?}: {err}" - )) - })?; - Ok(Box::new(DefaultReadonlyIndex(index_segment))) - } -} +use crate::{backend, default_revset_engine}; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct IndexPosition(u32); @@ -347,14 +128,6 @@ impl CommitLookupEntry<'_> { } } -#[derive(Error, Debug)] -pub enum IndexLoadError { - #[error("Index file '{0}' is corrupt.")] - IndexCorrupt(String), - #[error("I/O error while loading index file: {0}")] - IoError(#[from] io::Error), -} - // File format: // u32: number of entries // u32: number of parent overflow entries diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs new file mode 100644 index 0000000000..0a347972eb --- /dev/null +++ b/lib/src/default_index/store.rs @@ -0,0 +1,261 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(missing_docs)] + +use std::any::Any; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::{fs, io}; + +use itertools::Itertools; +use tempfile::NamedTempFile; +use thiserror::Error; + +use super::{DefaultMutableIndex, DefaultReadonlyIndex, MutableIndexSegment, ReadonlyIndexSegment}; +use crate::backend::{CommitId, ObjectId}; +use crate::commit::CommitByCommitterTimestamp; +use crate::dag_walk; +use crate::file_util::persist_content_addressed_temp_file; +use crate::index::{Index, IndexStore, IndexWriteError, MutableIndex, ReadonlyIndex}; +use crate::op_store::{OpStoreError, OperationId}; +use crate::operation::Operation; +use crate::store::Store; + +#[derive(Error, Debug)] +pub enum IndexLoadError { + #[error("Index file '{0}' is corrupt.")] + IndexCorrupt(String), + #[error("I/O error while loading index file: {0}")] + IoError(#[from] io::Error), +} + +#[derive(Debug, Error)] +pub enum DefaultIndexStoreError { + #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] + OpStore(#[from] OpStoreError), +} + +#[derive(Debug)] +pub struct DefaultIndexStore { + dir: PathBuf, +} + +impl DefaultIndexStore { + pub fn name() -> &'static str { + "default" + } + + pub fn init(dir: &Path) -> Self { + std::fs::create_dir(dir.join("operations")).unwrap(); + DefaultIndexStore { + dir: dir.to_owned(), + } + } + + pub fn load(dir: &Path) -> DefaultIndexStore { + DefaultIndexStore { + dir: dir.to_owned(), + } + } + + pub fn reinit(&self) { + let op_dir = self.dir.join("operations"); + std::fs::remove_dir_all(&op_dir).unwrap(); + std::fs::create_dir(op_dir).unwrap(); + } + + fn load_index_at_operation( + &self, + commit_id_length: usize, + change_id_length: usize, + op_id: &OperationId, + ) -> Result, IndexLoadError> { + let op_id_file = self.dir.join("operations").join(op_id.hex()); + let buf = fs::read(op_id_file).unwrap(); + let index_file_id_hex = String::from_utf8(buf).unwrap(); + let index_file_path = self.dir.join(&index_file_id_hex); + let mut index_file = File::open(index_file_path).unwrap(); + ReadonlyIndexSegment::load_from( + &mut index_file, + self.dir.to_owned(), + index_file_id_hex, + commit_id_length, + change_id_length, + ) + } + + #[tracing::instrument(skip(self, store))] + fn index_at_operation( + &self, + store: &Arc, + operation: &Operation, + ) -> Result, DefaultIndexStoreError> { + let view = operation.view()?; + let operations_dir = self.dir.join("operations"); + let commit_id_length = store.commit_id_length(); + let change_id_length = store.change_id_length(); + let mut new_heads = view.heads().clone(); + let mut parent_op_id: Option = None; + for op in dag_walk::dfs_ok( + [Ok(operation.clone())], + |op: &Operation| op.id().clone(), + |op: &Operation| op.parents().collect_vec(), + ) { + let op = op?; + if operations_dir.join(op.id().hex()).is_file() { + if parent_op_id.is_none() { + parent_op_id = Some(op.id().clone()) + } + } else { + for head in op.view()?.heads() { + new_heads.insert(head.clone()); + } + } + } + let mut data; + let maybe_parent_file; + match parent_op_id { + None => { + maybe_parent_file = None; + data = MutableIndexSegment::full(commit_id_length, change_id_length); + } + Some(parent_op_id) => { + let parent_file = self + .load_index_at_operation(commit_id_length, change_id_length, &parent_op_id) + .unwrap(); + maybe_parent_file = Some(parent_file.clone()); + data = MutableIndexSegment::incremental(parent_file) + } + } + + tracing::info!( + ?maybe_parent_file, + new_heads_count = new_heads.len(), + "indexing commits reachable from historical heads" + ); + // Build a list of ancestors of heads where parents and predecessors come after + // the commit itself. + let parent_file_has_id = |id: &CommitId| { + maybe_parent_file + .as_ref() + .map_or(false, |segment| segment.as_composite().has_id(id)) + }; + let commits = dag_walk::topo_order_reverse_ord( + new_heads + .iter() + .filter(|&id| !parent_file_has_id(id)) + .map(|id| store.get_commit(id).unwrap()) + .map(CommitByCommitterTimestamp), + |CommitByCommitterTimestamp(commit)| commit.id().clone(), + |CommitByCommitterTimestamp(commit)| { + itertools::chain(commit.parent_ids(), commit.predecessor_ids()) + .filter(|&id| !parent_file_has_id(id)) + .map(|id| store.get_commit(id).unwrap()) + .map(CommitByCommitterTimestamp) + .collect_vec() + }, + ); + for CommitByCommitterTimestamp(commit) in commits.iter().rev() { + data.add_commit(commit); + } + + let index_file = data.save_in(self.dir.clone())?; + self.associate_file_with_operation(&index_file, operation.id())?; + tracing::info!( + ?index_file, + commits_count = commits.len(), + "saved new index file" + ); + + Ok(index_file) + } + + /// Records a link from the given operation to the this index version. + fn associate_file_with_operation( + &self, + index: &ReadonlyIndexSegment, + op_id: &OperationId, + ) -> io::Result<()> { + let mut temp_file = NamedTempFile::new_in(&self.dir)?; + let file = temp_file.as_file_mut(); + file.write_all(index.name().as_bytes())?; + persist_content_addressed_temp_file( + temp_file, + self.dir.join("operations").join(op_id.hex()), + )?; + Ok(()) + } +} + +impl IndexStore for DefaultIndexStore { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + Self::name() + } + + fn get_index_at_op(&self, op: &Operation, store: &Arc) -> Box { + let op_id_hex = op.id().hex(); + let op_id_file = self.dir.join("operations").join(op_id_hex); + let index_segment = if op_id_file.exists() { + match self.load_index_at_operation( + store.commit_id_length(), + store.change_id_length(), + op.id(), + ) { + Err(IndexLoadError::IndexCorrupt(_)) => { + // If the index was corrupt (maybe it was written in a different format), + // we just reindex. + // TODO: Move this message to a callback or something. + println!("The index was corrupt (maybe the format has changed). Reindexing..."); + std::fs::remove_dir_all(self.dir.join("operations")).unwrap(); + std::fs::create_dir(self.dir.join("operations")).unwrap(); + self.index_at_operation(store, op).unwrap() + } + result => result.unwrap(), + } + } else { + self.index_at_operation(store, op).unwrap() + }; + Box::new(DefaultReadonlyIndex(index_segment)) + } + + fn write_index( + &self, + index: Box, + op_id: &OperationId, + ) -> Result, IndexWriteError> { + let index = index + .into_any() + .downcast::() + .expect("index to merge in must be a DefaultMutableIndex"); + let index_segment = index.0.save_in(self.dir.clone()).map_err(|err| { + IndexWriteError::Other(format!("Failed to write commit index file: {err}")) + })?; + self.associate_file_with_operation(&index_segment, op_id) + .map_err(|err| { + IndexWriteError::Other(format!( + "Failed to associate commit index file with a operation {op_id:?}: {err}" + )) + })?; + Ok(Box::new(DefaultReadonlyIndex(index_segment))) + } +} From b9aa65c17e9bf8b9361874372722070b413b7dc0 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 11 Dec 2023 19:08:50 +0900 Subject: [PATCH 3/5] index: split CompositeIndex and stats types to "composite" module Added pub(super) where needed or makes sense. --- lib/src/default_index/composite.rs | 356 +++++++++++++++++++++++++++++ lib/src/default_index/mod.rs | 329 +------------------------- 2 files changed, 361 insertions(+), 324 deletions(-) create mode 100644 lib/src/default_index/composite.rs diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs new file mode 100644 index 0000000000..2a432fc06a --- /dev/null +++ b/lib/src/default_index/composite.rs @@ -0,0 +1,356 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(missing_docs)] + +use std::cmp::{max, min, Ordering}; +use std::collections::{BTreeSet, BinaryHeap, HashSet}; +use std::iter; +use std::sync::Arc; + +use itertools::Itertools; + +use super::{ + IndexEntry, IndexPosition, IndexPositionByGeneration, IndexSegment, ReadonlyIndexSegment, + RevWalk, +}; +use crate::backend::{CommitId, ObjectId}; +use crate::index::{HexPrefix, Index, PrefixResolution}; +use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; +use crate::store::Store; +use crate::{backend, default_revset_engine}; + +#[derive(Clone, Copy)] +pub struct CompositeIndex<'a>(pub(super) &'a dyn IndexSegment); + +impl<'a> CompositeIndex<'a> { + fn ancestor_files_without_local(&self) -> impl Iterator> { + let parent_file = self.0.segment_parent_file(); + iter::successors(parent_file, |file| file.segment_parent_file()) + } + + fn ancestor_index_segments(&self) -> impl Iterator { + iter::once(self.0).chain( + self.ancestor_files_without_local() + .map(|file| file.as_ref() as &dyn IndexSegment), + ) + } + + pub fn num_commits(&self) -> u32 { + self.0.segment_num_parent_commits() + self.0.segment_num_commits() + } + + pub fn stats(&self) -> IndexStats { + let num_commits = self.num_commits(); + let mut num_merges = 0; + let mut max_generation_number = 0; + let mut is_head = vec![true; num_commits as usize]; + let mut change_ids = HashSet::new(); + for pos in 0..num_commits { + let entry = self.entry_by_pos(IndexPosition(pos)); + max_generation_number = max(max_generation_number, entry.generation_number()); + if entry.num_parents() > 1 { + num_merges += 1; + } + for parent_pos in entry.parent_positions() { + is_head[parent_pos.0 as usize] = false; + } + change_ids.insert(entry.change_id()); + } + let num_heads = is_head.iter().filter(|is_head| **is_head).count() as u32; + + let mut levels = self + .ancestor_index_segments() + .map(|segment| IndexLevelStats { + num_commits: segment.segment_num_commits(), + name: segment.segment_name(), + }) + .collect_vec(); + levels.reverse(); + + IndexStats { + num_commits, + num_merges, + max_generation_number, + num_heads, + num_changes: change_ids.len() as u32, + levels, + } + } + + pub fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> { + self.ancestor_index_segments() + .find_map(|segment| { + u32::checked_sub(pos.0, segment.segment_num_parent_commits()) + .map(|local_pos| segment.segment_entry_by_pos(pos, local_pos)) + }) + .unwrap() + } + + pub fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option { + self.ancestor_index_segments() + .find_map(|segment| segment.segment_commit_id_to_pos(commit_id)) + } + + /// Suppose the given `commit_id` exists, returns the previous and next + /// commit ids in lexicographical order. + pub(super) fn resolve_neighbor_commit_ids( + &self, + commit_id: &CommitId, + ) -> (Option, Option) { + self.ancestor_index_segments() + .map(|segment| { + let num_parent_commits = segment.segment_num_parent_commits(); + let to_local_pos = |pos: IndexPosition| pos.0 - num_parent_commits; + let (prev_pos, next_pos) = + segment.segment_commit_id_to_neighbor_positions(commit_id); + ( + prev_pos.map(|p| segment.segment_commit_id(to_local_pos(p))), + next_pos.map(|p| segment.segment_commit_id(to_local_pos(p))), + ) + }) + .reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| { + ( + acc_prev_id.into_iter().chain(prev_id).max(), + acc_next_id.into_iter().chain(next_id).min(), + ) + }) + .unwrap() + } + + pub fn entry_by_id(&self, commit_id: &CommitId) -> Option> { + self.commit_id_to_pos(commit_id) + .map(|pos| self.entry_by_pos(pos)) + } + + pub(super) fn is_ancestor_pos( + &self, + ancestor_pos: IndexPosition, + descendant_pos: IndexPosition, + ) -> bool { + let ancestor_generation = self.entry_by_pos(ancestor_pos).generation_number(); + let mut work = vec![descendant_pos]; + let mut visited = HashSet::new(); + while let Some(descendant_pos) = work.pop() { + let descendant_entry = self.entry_by_pos(descendant_pos); + if descendant_pos == ancestor_pos { + return true; + } + if !visited.insert(descendant_entry.pos) { + continue; + } + if descendant_entry.generation_number() <= ancestor_generation { + continue; + } + work.extend(descendant_entry.parent_positions()); + } + false + } + + pub(super) fn common_ancestors_pos( + &self, + set1: &[IndexPosition], + set2: &[IndexPosition], + ) -> BTreeSet { + let mut items1: BinaryHeap<_> = set1 + .iter() + .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos))) + .collect(); + let mut items2: BinaryHeap<_> = set2 + .iter() + .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos))) + .collect(); + + let mut result = BTreeSet::new(); + while let (Some(item1), Some(item2)) = (items1.peek(), items2.peek()) { + match item1.cmp(item2) { + Ordering::Greater => { + let item1 = dedup_pop(&mut items1).unwrap(); + let entry1 = self.entry_by_pos(item1.pos); + for parent_entry in entry1.parents() { + assert!(parent_entry.pos < entry1.pos); + items1.push(IndexPositionByGeneration::from(&parent_entry)); + } + } + Ordering::Less => { + let item2 = dedup_pop(&mut items2).unwrap(); + let entry2 = self.entry_by_pos(item2.pos); + for parent_entry in entry2.parents() { + assert!(parent_entry.pos < entry2.pos); + items2.push(IndexPositionByGeneration::from(&parent_entry)); + } + } + Ordering::Equal => { + result.insert(item1.pos); + dedup_pop(&mut items1).unwrap(); + dedup_pop(&mut items2).unwrap(); + } + } + } + self.heads_pos(result) + } + + pub fn walk_revs(&self, wanted: &[IndexPosition], unwanted: &[IndexPosition]) -> RevWalk<'a> { + let mut rev_walk = RevWalk::new(*self); + rev_walk.extend_wanted(wanted.iter().copied()); + rev_walk.extend_unwanted(unwanted.iter().copied()); + rev_walk + } + + pub fn heads_pos( + &self, + mut candidate_positions: BTreeSet, + ) -> BTreeSet { + // Add all parents of the candidates to the work queue. The parents and their + // ancestors are not heads. + // Also find the smallest generation number among the candidates. + let mut work = BinaryHeap::new(); + let mut min_generation = u32::MAX; + for pos in &candidate_positions { + let entry = self.entry_by_pos(*pos); + min_generation = min(min_generation, entry.generation_number()); + for parent_entry in entry.parents() { + work.push(IndexPositionByGeneration::from(&parent_entry)); + } + } + + // Walk ancestors of the parents of the candidates. Remove visited commits from + // set of candidates. Stop walking when we have gone past the minimum + // candidate generation. + while let Some(item) = dedup_pop(&mut work) { + if item.generation < min_generation { + break; + } + candidate_positions.remove(&item.pos); + let entry = self.entry_by_pos(item.pos); + for parent_entry in entry.parents() { + assert!(parent_entry.pos < entry.pos); + work.push(IndexPositionByGeneration::from(&parent_entry)); + } + } + candidate_positions + } + + pub(super) fn evaluate_revset( + &self, + expression: &ResolvedExpression, + store: &Arc, + ) -> Result + 'a>, RevsetEvaluationError> { + let revset_impl = default_revset_engine::evaluate(expression, store, *self)?; + Ok(Box::new(revset_impl)) + } +} + +impl Index for CompositeIndex<'_> { + /// Suppose the given `commit_id` exists, returns the minimum prefix length + /// to disambiguate it. The length to be returned is a number of hexadecimal + /// digits. + /// + /// If the given `commit_id` doesn't exist, this will return the prefix + /// length that never matches with any commit ids. + fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { + let (prev_id, next_id) = self.resolve_neighbor_commit_ids(commit_id); + itertools::chain(prev_id, next_id) + .map(|id| backend::common_hex_len(commit_id.as_bytes(), id.as_bytes()) + 1) + .max() + .unwrap_or(0) + } + + fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { + self.ancestor_index_segments() + .fold(PrefixResolution::NoMatch, |acc_match, segment| { + if acc_match == PrefixResolution::AmbiguousMatch { + acc_match // avoid checking the parent file(s) + } else { + let local_match = segment.segment_resolve_prefix(prefix); + acc_match.plus(&local_match) + } + }) + } + + fn has_id(&self, commit_id: &CommitId) -> bool { + self.commit_id_to_pos(commit_id).is_some() + } + + fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { + let ancestor_pos = self.commit_id_to_pos(ancestor_id).unwrap(); + let descendant_pos = self.commit_id_to_pos(descendant_id).unwrap(); + self.is_ancestor_pos(ancestor_pos, descendant_pos) + } + + fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { + let pos1 = set1 + .iter() + .map(|id| self.commit_id_to_pos(id).unwrap()) + .collect_vec(); + let pos2 = set2 + .iter() + .map(|id| self.commit_id_to_pos(id).unwrap()) + .collect_vec(); + self.common_ancestors_pos(&pos1, &pos2) + .iter() + .map(|pos| self.entry_by_pos(*pos).commit_id()) + .collect() + } + + fn heads(&self, candidate_ids: &mut dyn Iterator) -> Vec { + let candidate_positions: BTreeSet<_> = candidate_ids + .map(|id| self.commit_id_to_pos(id).unwrap()) + .collect(); + + self.heads_pos(candidate_positions) + .iter() + .map(|pos| self.entry_by_pos(*pos).commit_id()) + .collect() + } + + /// Parents before children + fn topo_order(&self, input: &mut dyn Iterator) -> Vec { + let mut ids = input.cloned().collect_vec(); + ids.sort_by_cached_key(|id| self.commit_id_to_pos(id).unwrap()); + ids + } + + fn evaluate_revset<'index>( + &'index self, + expression: &ResolvedExpression, + store: &Arc, + ) -> Result + 'index>, RevsetEvaluationError> { + CompositeIndex::evaluate_revset(self, expression, store) + } +} + +pub struct IndexLevelStats { + pub num_commits: u32, + pub name: Option, +} + +pub struct IndexStats { + pub num_commits: u32, + pub num_merges: u32, + pub max_generation_number: u32, + pub num_heads: u32, + pub num_changes: u32, + pub levels: Vec, +} + +/// Removes the greatest items (including duplicates) from the heap, returns +/// one. +fn dedup_pop(heap: &mut BinaryHeap) -> Option { + let item = heap.pop()?; + while heap.peek() == Some(&item) { + heap.pop().unwrap(); + } + Some(item) +} diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index 43a39167ff..d63893c529 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -14,20 +14,21 @@ #![allow(missing_docs)] +mod composite; mod store; use std::any::Any; -use std::cmp::{max, min, Ordering, Reverse}; -use std::collections::{BTreeMap, BTreeSet, BinaryHeap, Bound, HashMap, HashSet}; +use std::cmp::{max, Ordering, Reverse}; +use std::collections::{BTreeMap, BinaryHeap, Bound, HashMap, HashSet}; use std::fmt::{Debug, Formatter}; use std::fs::File; use std::hash::{Hash, Hasher}; +use std::io; use std::io::{Read, Write}; use std::iter::FusedIterator; use std::ops::Range; use std::path::PathBuf; use std::sync::Arc; -use std::{io, iter}; use blake2::Blake2b512; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -36,6 +37,7 @@ use itertools::Itertools; use smallvec::SmallVec; use tempfile::NamedTempFile; +pub use self::composite::{CompositeIndex, IndexLevelStats, IndexStats}; pub use self::store::{DefaultIndexStore, DefaultIndexStoreError, IndexLoadError}; use crate::backend::{ChangeId, CommitId, ObjectId}; use crate::commit::Commit; @@ -43,7 +45,6 @@ use crate::file_util::persist_content_addressed_temp_file; use crate::index::{HexPrefix, Index, MutableIndex, PrefixResolution, ReadonlyIndex}; use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; use crate::store::Store; -use crate::{backend, default_revset_engine}; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct IndexPosition(u32); @@ -584,316 +585,6 @@ trait IndexSegment: Send + Sync { fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry; } -#[derive(Clone, Copy)] -pub struct CompositeIndex<'a>(&'a dyn IndexSegment); - -impl<'a> CompositeIndex<'a> { - fn ancestor_files_without_local(&self) -> impl Iterator> { - let parent_file = self.0.segment_parent_file(); - iter::successors(parent_file, |file| file.segment_parent_file()) - } - - fn ancestor_index_segments(&self) -> impl Iterator { - iter::once(self.0).chain( - self.ancestor_files_without_local() - .map(|file| file.as_ref() as &dyn IndexSegment), - ) - } - - pub fn num_commits(&self) -> u32 { - self.0.segment_num_parent_commits() + self.0.segment_num_commits() - } - - pub fn stats(&self) -> IndexStats { - let num_commits = self.num_commits(); - let mut num_merges = 0; - let mut max_generation_number = 0; - let mut is_head = vec![true; num_commits as usize]; - let mut change_ids = HashSet::new(); - for pos in 0..num_commits { - let entry = self.entry_by_pos(IndexPosition(pos)); - max_generation_number = max(max_generation_number, entry.generation_number()); - if entry.num_parents() > 1 { - num_merges += 1; - } - for parent_pos in entry.parent_positions() { - is_head[parent_pos.0 as usize] = false; - } - change_ids.insert(entry.change_id()); - } - let num_heads = is_head.iter().filter(|is_head| **is_head).count() as u32; - - let mut levels = self - .ancestor_index_segments() - .map(|segment| IndexLevelStats { - num_commits: segment.segment_num_commits(), - name: segment.segment_name(), - }) - .collect_vec(); - levels.reverse(); - - IndexStats { - num_commits, - num_merges, - max_generation_number, - num_heads, - num_changes: change_ids.len() as u32, - levels, - } - } - - pub fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> { - self.ancestor_index_segments() - .find_map(|segment| { - u32::checked_sub(pos.0, segment.segment_num_parent_commits()) - .map(|local_pos| segment.segment_entry_by_pos(pos, local_pos)) - }) - .unwrap() - } - - pub fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option { - self.ancestor_index_segments() - .find_map(|segment| segment.segment_commit_id_to_pos(commit_id)) - } - - /// Suppose the given `commit_id` exists, returns the previous and next - /// commit ids in lexicographical order. - fn resolve_neighbor_commit_ids( - &self, - commit_id: &CommitId, - ) -> (Option, Option) { - self.ancestor_index_segments() - .map(|segment| { - let num_parent_commits = segment.segment_num_parent_commits(); - let to_local_pos = |pos: IndexPosition| pos.0 - num_parent_commits; - let (prev_pos, next_pos) = - segment.segment_commit_id_to_neighbor_positions(commit_id); - ( - prev_pos.map(|p| segment.segment_commit_id(to_local_pos(p))), - next_pos.map(|p| segment.segment_commit_id(to_local_pos(p))), - ) - }) - .reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| { - ( - acc_prev_id.into_iter().chain(prev_id).max(), - acc_next_id.into_iter().chain(next_id).min(), - ) - }) - .unwrap() - } - - pub fn entry_by_id(&self, commit_id: &CommitId) -> Option> { - self.commit_id_to_pos(commit_id) - .map(|pos| self.entry_by_pos(pos)) - } - - fn is_ancestor_pos(&self, ancestor_pos: IndexPosition, descendant_pos: IndexPosition) -> bool { - let ancestor_generation = self.entry_by_pos(ancestor_pos).generation_number(); - let mut work = vec![descendant_pos]; - let mut visited = HashSet::new(); - while let Some(descendant_pos) = work.pop() { - let descendant_entry = self.entry_by_pos(descendant_pos); - if descendant_pos == ancestor_pos { - return true; - } - if !visited.insert(descendant_entry.pos) { - continue; - } - if descendant_entry.generation_number() <= ancestor_generation { - continue; - } - work.extend(descendant_entry.parent_positions()); - } - false - } - - fn common_ancestors_pos( - &self, - set1: &[IndexPosition], - set2: &[IndexPosition], - ) -> BTreeSet { - let mut items1: BinaryHeap<_> = set1 - .iter() - .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos))) - .collect(); - let mut items2: BinaryHeap<_> = set2 - .iter() - .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos))) - .collect(); - - let mut result = BTreeSet::new(); - while let (Some(item1), Some(item2)) = (items1.peek(), items2.peek()) { - match item1.cmp(item2) { - Ordering::Greater => { - let item1 = dedup_pop(&mut items1).unwrap(); - let entry1 = self.entry_by_pos(item1.pos); - for parent_entry in entry1.parents() { - assert!(parent_entry.pos < entry1.pos); - items1.push(IndexPositionByGeneration::from(&parent_entry)); - } - } - Ordering::Less => { - let item2 = dedup_pop(&mut items2).unwrap(); - let entry2 = self.entry_by_pos(item2.pos); - for parent_entry in entry2.parents() { - assert!(parent_entry.pos < entry2.pos); - items2.push(IndexPositionByGeneration::from(&parent_entry)); - } - } - Ordering::Equal => { - result.insert(item1.pos); - dedup_pop(&mut items1).unwrap(); - dedup_pop(&mut items2).unwrap(); - } - } - } - self.heads_pos(result) - } - - pub fn walk_revs(&self, wanted: &[IndexPosition], unwanted: &[IndexPosition]) -> RevWalk<'a> { - let mut rev_walk = RevWalk::new(*self); - rev_walk.extend_wanted(wanted.iter().copied()); - rev_walk.extend_unwanted(unwanted.iter().copied()); - rev_walk - } - - pub fn heads_pos( - &self, - mut candidate_positions: BTreeSet, - ) -> BTreeSet { - // Add all parents of the candidates to the work queue. The parents and their - // ancestors are not heads. - // Also find the smallest generation number among the candidates. - let mut work = BinaryHeap::new(); - let mut min_generation = u32::MAX; - for pos in &candidate_positions { - let entry = self.entry_by_pos(*pos); - min_generation = min(min_generation, entry.generation_number()); - for parent_entry in entry.parents() { - work.push(IndexPositionByGeneration::from(&parent_entry)); - } - } - - // Walk ancestors of the parents of the candidates. Remove visited commits from - // set of candidates. Stop walking when we have gone past the minimum - // candidate generation. - while let Some(item) = dedup_pop(&mut work) { - if item.generation < min_generation { - break; - } - candidate_positions.remove(&item.pos); - let entry = self.entry_by_pos(item.pos); - for parent_entry in entry.parents() { - assert!(parent_entry.pos < entry.pos); - work.push(IndexPositionByGeneration::from(&parent_entry)); - } - } - candidate_positions - } - - fn evaluate_revset( - &self, - expression: &ResolvedExpression, - store: &Arc, - ) -> Result + 'a>, RevsetEvaluationError> { - let revset_impl = default_revset_engine::evaluate(expression, store, *self)?; - Ok(Box::new(revset_impl)) - } -} - -impl Index for CompositeIndex<'_> { - /// Suppose the given `commit_id` exists, returns the minimum prefix length - /// to disambiguate it. The length to be returned is a number of hexadecimal - /// digits. - /// - /// If the given `commit_id` doesn't exist, this will return the prefix - /// length that never matches with any commit ids. - fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize { - let (prev_id, next_id) = self.resolve_neighbor_commit_ids(commit_id); - itertools::chain(prev_id, next_id) - .map(|id| backend::common_hex_len(commit_id.as_bytes(), id.as_bytes()) + 1) - .max() - .unwrap_or(0) - } - - fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { - self.ancestor_index_segments() - .fold(PrefixResolution::NoMatch, |acc_match, segment| { - if acc_match == PrefixResolution::AmbiguousMatch { - acc_match // avoid checking the parent file(s) - } else { - let local_match = segment.segment_resolve_prefix(prefix); - acc_match.plus(&local_match) - } - }) - } - - fn has_id(&self, commit_id: &CommitId) -> bool { - self.commit_id_to_pos(commit_id).is_some() - } - - fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool { - let ancestor_pos = self.commit_id_to_pos(ancestor_id).unwrap(); - let descendant_pos = self.commit_id_to_pos(descendant_id).unwrap(); - self.is_ancestor_pos(ancestor_pos, descendant_pos) - } - - fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec { - let pos1 = set1 - .iter() - .map(|id| self.commit_id_to_pos(id).unwrap()) - .collect_vec(); - let pos2 = set2 - .iter() - .map(|id| self.commit_id_to_pos(id).unwrap()) - .collect_vec(); - self.common_ancestors_pos(&pos1, &pos2) - .iter() - .map(|pos| self.entry_by_pos(*pos).commit_id()) - .collect() - } - - fn heads(&self, candidate_ids: &mut dyn Iterator) -> Vec { - let candidate_positions: BTreeSet<_> = candidate_ids - .map(|id| self.commit_id_to_pos(id).unwrap()) - .collect(); - - self.heads_pos(candidate_positions) - .iter() - .map(|pos| self.entry_by_pos(*pos).commit_id()) - .collect() - } - - /// Parents before children - fn topo_order(&self, input: &mut dyn Iterator) -> Vec { - let mut ids = input.cloned().collect_vec(); - ids.sort_by_cached_key(|id| self.commit_id_to_pos(id).unwrap()); - ids - } - - fn evaluate_revset<'index>( - &'index self, - expression: &ResolvedExpression, - store: &Arc, - ) -> Result + 'index>, RevsetEvaluationError> { - CompositeIndex::evaluate_revset(self, expression, store) - } -} - -pub struct IndexLevelStats { - pub num_commits: u32, - pub name: Option, -} - -pub struct IndexStats { - pub num_commits: u32, - pub num_merges: u32, - pub max_generation_number: u32, - pub num_heads: u32, - pub num_changes: u32, - pub levels: Vec, -} - #[derive(Clone, Eq, PartialEq)] pub struct IndexEntryByPosition<'a>(pub IndexEntry<'a>); @@ -1415,16 +1106,6 @@ impl<'a> Iterator for RevWalkDescendants<'a> { impl FusedIterator for RevWalkDescendants<'_> {} -/// Removes the greatest items (including duplicates) from the heap, returns -/// one. -fn dedup_pop(heap: &mut BinaryHeap) -> Option { - let item = heap.pop()?; - while heap.peek() == Some(&item) { - heap.pop().unwrap(); - } - Some(item) -} - impl IndexSegment for ReadonlyIndexSegment { fn segment_num_parent_commits(&self) -> u32 { self.num_parent_commits From 98cd3e7b86da464f1a7fa0473a4a2e38c0643460 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 11 Dec 2023 19:45:16 +0900 Subject: [PATCH 4/5] index: add CompositeIndex constructor instead of pub(super)-ing field This wouldn't matter, but seemed slightly better. --- lib/src/default_index/composite.rs | 6 +++++- lib/src/default_index/mod.rs | 21 +++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 2a432fc06a..b44a1aff31 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -32,9 +32,13 @@ use crate::store::Store; use crate::{backend, default_revset_engine}; #[derive(Clone, Copy)] -pub struct CompositeIndex<'a>(pub(super) &'a dyn IndexSegment); +pub struct CompositeIndex<'a>(&'a dyn IndexSegment); impl<'a> CompositeIndex<'a> { + pub(super) fn new(segment: &'a dyn IndexSegment) -> Self { + CompositeIndex(segment) + } + fn ancestor_files_without_local(&self) -> impl Iterator> { let parent_file = self.0.segment_parent_file(); iter::successors(parent_file, |file| file.segment_parent_file()) diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index d63893c529..9e616046d6 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -238,7 +238,7 @@ impl MutableIndexSegment { } fn as_composite(&self) -> CompositeIndex { - CompositeIndex(self) + CompositeIndex::new(self) } fn add_commit(&mut self, commit: &Commit) { @@ -265,7 +265,8 @@ impl MutableIndexSegment { parent_positions: SmallVec::new(), }; for parent_id in parent_ids { - let parent_entry = CompositeIndex(self) + let parent_entry = self + .as_composite() .entry_by_id(parent_id) .expect("parent commit is not indexed"); entry.generation_number = max( @@ -282,7 +283,7 @@ impl MutableIndexSegment { } fn add_commits_from(&mut self, other_segment: &dyn IndexSegment) { - let other = CompositeIndex(other_segment); + let other = CompositeIndex::new(other_segment); for pos in other_segment.segment_num_parent_commits()..other.num_commits() { let entry = other.entry_by_pos(IndexPosition(pos)); let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec(); @@ -1351,7 +1352,7 @@ impl<'a> IndexEntry<'a> { } pub fn parents(&self) -> impl ExactSizeIterator> { - let composite = CompositeIndex(self.source); + let composite = CompositeIndex::new(self.source); self.parent_positions() .into_iter() .map(move |pos| composite.entry_by_pos(pos)) @@ -1420,7 +1421,7 @@ impl ReadonlyIndexSegment { } fn as_composite(&self) -> CompositeIndex { - CompositeIndex(self) + CompositeIndex::new(self) } fn name(&self) -> &str { @@ -1549,7 +1550,7 @@ mod tests { } else { Box::new(mutable_segment) }; - let index = CompositeIndex(index_segment.as_ref()); + let index = CompositeIndex::new(index_segment.as_ref()); // Stats are as expected let stats = index.stats(); @@ -1580,7 +1581,7 @@ mod tests { } else { Box::new(mutable_segment) }; - let index = CompositeIndex(index_segment.as_ref()); + let index = CompositeIndex::new(index_segment.as_ref()); // Stats are as expected let stats = index.stats(); @@ -1665,7 +1666,7 @@ mod tests { } else { Box::new(mutable_segment) }; - let index = CompositeIndex(index_segment.as_ref()); + let index = CompositeIndex::new(index_segment.as_ref()); // Stats are as expected let stats = index.stats(); @@ -1768,7 +1769,7 @@ mod tests { } else { Box::new(mutable_segment) }; - let index = CompositeIndex(index_segment.as_ref()); + let index = CompositeIndex::new(index_segment.as_ref()); // Stats are as expected let stats = index.stats(); @@ -1948,7 +1949,7 @@ mod tests { ); // Global lookup, commit_id exists. id_0 < id_1 < id_5 < id_3 < id_2 < id_4 - let composite_index = CompositeIndex(&mutable_segment); + let composite_index = CompositeIndex::new(&mutable_segment); assert_eq!( composite_index.resolve_neighbor_commit_ids(&id_0), (None, Some(id_1.clone())), From cbe04ddbfe68667edc898e421fef337795e1ce38 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 11 Dec 2023 19:34:31 +0900 Subject: [PATCH 5/5] index: split RevWalk types to "rev_walk" module Added pub(super) where needed. --- lib/src/default_index/composite.rs | 2 +- lib/src/default_index/mod.rs | 491 +--------------------------- lib/src/default_index/rev_walk.rs | 504 +++++++++++++++++++++++++++++ 3 files changed, 513 insertions(+), 484 deletions(-) create mode 100644 lib/src/default_index/rev_walk.rs diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index b44a1aff31..97c277590c 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -21,9 +21,9 @@ use std::sync::Arc; use itertools::Itertools; +use super::rev_walk::RevWalk; use super::{ IndexEntry, IndexPosition, IndexPositionByGeneration, IndexSegment, ReadonlyIndexSegment, - RevWalk, }; use crate::backend::{CommitId, ObjectId}; use crate::index::{HexPrefix, Index, PrefixResolution}; diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index 9e616046d6..eafb7f5861 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -15,18 +15,17 @@ #![allow(missing_docs)] mod composite; +mod rev_walk; mod store; use std::any::Any; -use std::cmp::{max, Ordering, Reverse}; -use std::collections::{BTreeMap, BinaryHeap, Bound, HashMap, HashSet}; +use std::cmp::{max, Ordering}; +use std::collections::{BTreeMap, Bound}; use std::fmt::{Debug, Formatter}; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io; use std::io::{Read, Write}; -use std::iter::FusedIterator; -use std::ops::Range; use std::path::PathBuf; use std::sync::Arc; @@ -38,6 +37,9 @@ use smallvec::SmallVec; use tempfile::NamedTempFile; pub use self::composite::{CompositeIndex, IndexLevelStats, IndexStats}; +pub use self::rev_walk::{ + RevWalk, RevWalkDescendants, RevWalkDescendantsGenerationRange, RevWalkGenerationRange, +}; pub use self::store::{DefaultIndexStore, DefaultIndexStoreError, IndexLoadError}; use crate::backend::{ChangeId, CommitId, ObjectId}; use crate::commit::Commit; @@ -628,485 +630,6 @@ impl From<&IndexEntry<'_>> for IndexPositionByGeneration { } } -trait RevWalkIndex<'a> { - type Position: Copy + Ord; - type AdjacentPositions: IntoIterator; - - fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a>; - fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions; -} - -impl<'a> RevWalkIndex<'a> for CompositeIndex<'a> { - type Position = IndexPosition; - type AdjacentPositions = SmallIndexPositionsVec; - - fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a> { - CompositeIndex::entry_by_pos(self, pos) - } - - fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions { - entry.parent_positions() - } -} - -#[derive(Clone)] -struct RevWalkDescendantsIndex<'a> { - index: CompositeIndex<'a>, - children_map: HashMap, -} - -// See SmallIndexPositionsVec for the array size. -type DescendantIndexPositionsVec = SmallVec<[Reverse; 4]>; - -impl<'a> RevWalkDescendantsIndex<'a> { - fn build<'b>( - index: CompositeIndex<'a>, - entries: impl IntoIterator>, - ) -> Self { - // For dense set, it's probably cheaper to use `Vec` instead of `HashMap`. - let mut children_map: HashMap = HashMap::new(); - for entry in entries { - children_map.entry(entry.position()).or_default(); // mark head node - for parent_pos in entry.parent_positions() { - let parent = children_map.entry(parent_pos).or_default(); - parent.push(Reverse(entry.position())); - } - } - - RevWalkDescendantsIndex { - index, - children_map, - } - } - - fn contains_pos(&self, pos: IndexPosition) -> bool { - self.children_map.contains_key(&pos) - } -} - -impl<'a> RevWalkIndex<'a> for RevWalkDescendantsIndex<'a> { - type Position = Reverse; - type AdjacentPositions = DescendantIndexPositionsVec; - - fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a> { - self.index.entry_by_pos(pos.0) - } - - fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions { - self.children_map[&entry.position()].clone() - } -} - -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] -struct RevWalkWorkItem { - pos: P, - state: RevWalkWorkItemState, -} - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -enum RevWalkWorkItemState { - // Order matters: Unwanted should appear earlier in the max-heap. - Wanted(T), - Unwanted, -} - -impl RevWalkWorkItem { - fn is_wanted(&self) -> bool { - matches!(self.state, RevWalkWorkItemState::Wanted(_)) - } - - fn map_wanted(self, f: impl FnOnce(T) -> U) -> RevWalkWorkItem { - RevWalkWorkItem { - pos: self.pos, - state: match self.state { - RevWalkWorkItemState::Wanted(t) => RevWalkWorkItemState::Wanted(f(t)), - RevWalkWorkItemState::Unwanted => RevWalkWorkItemState::Unwanted, - }, - } - } -} - -#[derive(Clone)] -struct RevWalkQueue { - items: BinaryHeap>, - unwanted_count: usize, -} - -impl RevWalkQueue { - fn new() -> Self { - Self { - items: BinaryHeap::new(), - unwanted_count: 0, - } - } - - fn map_wanted(self, mut f: impl FnMut(T) -> U) -> RevWalkQueue { - RevWalkQueue { - items: self - .items - .into_iter() - .map(|x| x.map_wanted(&mut f)) - .collect(), - unwanted_count: self.unwanted_count, - } - } - - fn push_wanted(&mut self, pos: P, t: T) { - let state = RevWalkWorkItemState::Wanted(t); - self.items.push(RevWalkWorkItem { pos, state }); - } - - fn push_unwanted(&mut self, pos: P) { - let state = RevWalkWorkItemState::Unwanted; - self.items.push(RevWalkWorkItem { pos, state }); - self.unwanted_count += 1; - } - - fn extend_wanted(&mut self, positions: impl IntoIterator, t: T) - where - T: Clone, - { - // positions typically contains one item, and single BinaryHeap::push() - // appears to be slightly faster than .extend() as of rustc 1.73.0. - for pos in positions { - self.push_wanted(pos, t.clone()); - } - } - - fn extend_unwanted(&mut self, positions: impl IntoIterator) { - for pos in positions { - self.push_unwanted(pos); - } - } - - fn pop(&mut self) -> Option> { - if let Some(x) = self.items.pop() { - self.unwanted_count -= !x.is_wanted() as usize; - Some(x) - } else { - None - } - } - - fn pop_eq(&mut self, pos: &P) -> Option> { - if let Some(x) = self.items.peek() { - (x.pos == *pos).then(|| self.pop().unwrap()) - } else { - None - } - } - - fn skip_while_eq(&mut self, pos: &P) { - while self.pop_eq(pos).is_some() { - continue; - } - } -} - -#[derive(Clone)] -pub struct RevWalk<'a>(RevWalkImpl<'a, CompositeIndex<'a>>); - -impl<'a> RevWalk<'a> { - fn new(index: CompositeIndex<'a>) -> Self { - let queue = RevWalkQueue::new(); - RevWalk(RevWalkImpl { index, queue }) - } - - fn extend_wanted(&mut self, positions: impl IntoIterator) { - self.0.queue.extend_wanted(positions, ()); - } - - fn extend_unwanted(&mut self, positions: impl IntoIterator) { - self.0.queue.extend_unwanted(positions); - } - - /// Filters entries by generation (or depth from the current wanted set.) - /// - /// The generation of the current wanted entries starts from 0. - pub fn filter_by_generation(self, generation_range: Range) -> RevWalkGenerationRange<'a> { - RevWalkGenerationRange(RevWalkGenerationRangeImpl::new( - self.0.index, - self.0.queue, - generation_range, - )) - } - - /// Walks ancestors until all of the reachable roots in `root_positions` get - /// visited. - /// - /// Use this if you are only interested in descendants of the given roots. - /// The caller still needs to filter out unwanted entries. - pub fn take_until_roots( - self, - root_positions: &[IndexPosition], - ) -> impl Iterator> + Clone + 'a { - // We can also make it stop visiting based on the generation number. Maybe - // it will perform better for unbalanced branchy history. - // https://github.com/martinvonz/jj/pull/1492#discussion_r1160678325 - let bottom_position = *root_positions.iter().min().unwrap_or(&IndexPosition::MAX); - self.take_while(move |entry| entry.position() >= bottom_position) - } - - /// Fully consumes the ancestors and walks back from `root_positions`. - /// - /// The returned iterator yields entries in order of ascending index - /// position. - pub fn descendants(self, root_positions: &[IndexPosition]) -> RevWalkDescendants<'a> { - RevWalkDescendants { - candidate_entries: self.take_until_roots(root_positions).collect(), - root_positions: root_positions.iter().copied().collect(), - reachable_positions: HashSet::new(), - } - } - - /// Fully consumes the ancestors and walks back from `root_positions` within - /// `generation_range`. - /// - /// The returned iterator yields entries in order of ascending index - /// position. - pub fn descendants_filtered_by_generation( - self, - root_positions: &[IndexPosition], - generation_range: Range, - ) -> RevWalkDescendantsGenerationRange<'a> { - let index = self.0.index; - let entries = self.take_until_roots(root_positions); - let descendants_index = RevWalkDescendantsIndex::build(index, entries); - let mut queue = RevWalkQueue::new(); - for &pos in root_positions { - // Do not add unreachable roots which shouldn't be visited - if descendants_index.contains_pos(pos) { - queue.push_wanted(Reverse(pos), ()); - } - } - RevWalkDescendantsGenerationRange(RevWalkGenerationRangeImpl::new( - descendants_index, - queue, - generation_range, - )) - } -} - -impl<'a> Iterator for RevWalk<'a> { - type Item = IndexEntry<'a>; - - fn next(&mut self) -> Option { - self.0.next() - } -} - -#[derive(Clone)] -struct RevWalkImpl<'a, I: RevWalkIndex<'a>> { - index: I, - queue: RevWalkQueue, -} - -impl<'a, I: RevWalkIndex<'a>> RevWalkImpl<'a, I> { - fn next(&mut self) -> Option> { - while let Some(item) = self.queue.pop() { - self.queue.skip_while_eq(&item.pos); - if item.is_wanted() { - let entry = self.index.entry_by_pos(item.pos); - self.queue - .extend_wanted(self.index.adjacent_positions(&entry), ()); - return Some(entry); - } else if self.queue.items.len() == self.queue.unwanted_count { - // No more wanted entries to walk - debug_assert!(!self.queue.items.iter().any(|x| x.is_wanted())); - return None; - } else { - let entry = self.index.entry_by_pos(item.pos); - self.queue - .extend_unwanted(self.index.adjacent_positions(&entry)); - } - } - - debug_assert_eq!( - self.queue.items.iter().filter(|x| !x.is_wanted()).count(), - self.queue.unwanted_count - ); - None - } -} - -#[derive(Clone)] -pub struct RevWalkGenerationRange<'a>(RevWalkGenerationRangeImpl<'a, CompositeIndex<'a>>); - -impl<'a> Iterator for RevWalkGenerationRange<'a> { - type Item = IndexEntry<'a>; - - fn next(&mut self) -> Option { - self.0.next() - } -} - -#[derive(Clone)] -pub struct RevWalkDescendantsGenerationRange<'a>( - RevWalkGenerationRangeImpl<'a, RevWalkDescendantsIndex<'a>>, -); - -impl<'a> Iterator for RevWalkDescendantsGenerationRange<'a> { - type Item = IndexEntry<'a>; - - fn next(&mut self) -> Option { - self.0.next() - } -} - -#[derive(Clone)] -struct RevWalkGenerationRangeImpl<'a, I: RevWalkIndex<'a>> { - index: I, - // Sort item generations in ascending order - queue: RevWalkQueue>, - generation_end: u32, -} - -impl<'a, I: RevWalkIndex<'a>> RevWalkGenerationRangeImpl<'a, I> { - fn new(index: I, queue: RevWalkQueue, generation_range: Range) -> Self { - // Translate filter range to item ranges so that overlapped ranges can be - // merged later. - // - // Example: `generation_range = 1..4` - // (original) (translated) - // 0 1 2 3 4 0 1 2 3 4 - // *=====o generation_range + generation_end - // + : : item's generation o=====* : item's range - let item_range = RevWalkItemGenerationRange { - start: 0, - end: u32::saturating_sub(generation_range.end, generation_range.start), - }; - RevWalkGenerationRangeImpl { - index, - queue: queue.map_wanted(|()| Reverse(item_range)), - generation_end: generation_range.end, - } - } - - fn enqueue_wanted_adjacents( - &mut self, - entry: &IndexEntry<'_>, - gen: RevWalkItemGenerationRange, - ) { - // `gen.start` is incremented from 0, which should never overflow - if gen.start + 1 >= self.generation_end { - return; - } - let succ_gen = RevWalkItemGenerationRange { - start: gen.start + 1, - end: gen.end.saturating_add(1), - }; - self.queue - .extend_wanted(self.index.adjacent_positions(entry), Reverse(succ_gen)); - } - - fn next(&mut self) -> Option> { - while let Some(item) = self.queue.pop() { - if let RevWalkWorkItemState::Wanted(Reverse(mut pending_gen)) = item.state { - let entry = self.index.entry_by_pos(item.pos); - let mut some_in_range = pending_gen.contains_end(self.generation_end); - while let Some(x) = self.queue.pop_eq(&item.pos) { - // Merge overlapped ranges to reduce number of the queued items. - // For queries like `:(heads-)`, `gen.end` is close to `u32::MAX`, so - // ranges can be merged into one. If this is still slow, maybe we can add - // special case for upper/lower bounded ranges. - if let RevWalkWorkItemState::Wanted(Reverse(gen)) = x.state { - some_in_range |= gen.contains_end(self.generation_end); - pending_gen = if let Some(merged) = pending_gen.try_merge_end(gen) { - merged - } else { - self.enqueue_wanted_adjacents(&entry, pending_gen); - gen - }; - } else { - unreachable!("no more unwanted items of the same entry"); - } - } - self.enqueue_wanted_adjacents(&entry, pending_gen); - if some_in_range { - return Some(entry); - } - } else if self.queue.items.len() == self.queue.unwanted_count { - // No more wanted entries to walk - debug_assert!(!self.queue.items.iter().any(|x| x.is_wanted())); - return None; - } else { - let entry = self.index.entry_by_pos(item.pos); - self.queue.skip_while_eq(&item.pos); - self.queue - .extend_unwanted(self.index.adjacent_positions(&entry)); - } - } - - debug_assert_eq!( - self.queue.items.iter().filter(|x| !x.is_wanted()).count(), - self.queue.unwanted_count - ); - None - } -} - -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] -struct RevWalkItemGenerationRange { - start: u32, - end: u32, -} - -impl RevWalkItemGenerationRange { - /// Suppose sorted ranges `self, other`, merges them if overlapped. - #[must_use] - fn try_merge_end(self, other: Self) -> Option { - (other.start <= self.end).then(|| RevWalkItemGenerationRange { - start: self.start, - end: max(self.end, other.end), - }) - } - - #[must_use] - fn contains_end(self, end: u32) -> bool { - self.start < end && end <= self.end - } -} - -/// Walks descendants from the roots, in order of ascending index position. -#[derive(Clone)] -pub struct RevWalkDescendants<'a> { - candidate_entries: Vec>, - root_positions: HashSet, - reachable_positions: HashSet, -} - -impl RevWalkDescendants<'_> { - /// Builds a set of index positions reachable from the roots. - /// - /// This is equivalent to `.map(|entry| entry.position()).collect()` on - /// the new iterator, but returns the internal buffer instead. - pub fn collect_positions_set(mut self) -> HashSet { - self.by_ref().for_each(drop); - self.reachable_positions - } -} - -impl<'a> Iterator for RevWalkDescendants<'a> { - type Item = IndexEntry<'a>; - - fn next(&mut self) -> Option { - while let Some(candidate) = self.candidate_entries.pop() { - if self.root_positions.contains(&candidate.position()) - || candidate - .parent_positions() - .iter() - .any(|parent_pos| self.reachable_positions.contains(parent_pos)) - { - self.reachable_positions.insert(candidate.position()); - return Some(candidate); - } - } - None - } -} - -impl FusedIterator for RevWalkDescendants<'_> {} - impl IndexSegment for ReadonlyIndexSegment { fn segment_num_parent_commits(&self) -> u32 { self.num_parent_commits @@ -1519,6 +1042,8 @@ impl Index for DefaultReadonlyIndex { #[cfg(test)] mod tests { + use std::ops::Range; + use smallvec::smallvec_inline; use test_case::test_case; diff --git a/lib/src/default_index/rev_walk.rs b/lib/src/default_index/rev_walk.rs new file mode 100644 index 0000000000..1c12939a76 --- /dev/null +++ b/lib/src/default_index/rev_walk.rs @@ -0,0 +1,504 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![allow(missing_docs)] + +use std::cmp::{max, Reverse}; +use std::collections::{BinaryHeap, HashMap, HashSet}; +use std::iter::FusedIterator; +use std::ops::Range; + +use smallvec::SmallVec; + +use super::composite::CompositeIndex; +use super::{IndexEntry, IndexPosition, SmallIndexPositionsVec}; + +trait RevWalkIndex<'a> { + type Position: Copy + Ord; + type AdjacentPositions: IntoIterator; + + fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a>; + fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions; +} + +impl<'a> RevWalkIndex<'a> for CompositeIndex<'a> { + type Position = IndexPosition; + type AdjacentPositions = SmallIndexPositionsVec; + + fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a> { + CompositeIndex::entry_by_pos(self, pos) + } + + fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions { + entry.parent_positions() + } +} + +#[derive(Clone)] +struct RevWalkDescendantsIndex<'a> { + index: CompositeIndex<'a>, + children_map: HashMap, +} + +// See SmallIndexPositionsVec for the array size. +type DescendantIndexPositionsVec = SmallVec<[Reverse; 4]>; + +impl<'a> RevWalkDescendantsIndex<'a> { + fn build<'b>( + index: CompositeIndex<'a>, + entries: impl IntoIterator>, + ) -> Self { + // For dense set, it's probably cheaper to use `Vec` instead of `HashMap`. + let mut children_map: HashMap = HashMap::new(); + for entry in entries { + children_map.entry(entry.position()).or_default(); // mark head node + for parent_pos in entry.parent_positions() { + let parent = children_map.entry(parent_pos).or_default(); + parent.push(Reverse(entry.position())); + } + } + + RevWalkDescendantsIndex { + index, + children_map, + } + } + + fn contains_pos(&self, pos: IndexPosition) -> bool { + self.children_map.contains_key(&pos) + } +} + +impl<'a> RevWalkIndex<'a> for RevWalkDescendantsIndex<'a> { + type Position = Reverse; + type AdjacentPositions = DescendantIndexPositionsVec; + + fn entry_by_pos(&self, pos: Self::Position) -> IndexEntry<'a> { + self.index.entry_by_pos(pos.0) + } + + fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Self::AdjacentPositions { + self.children_map[&entry.position()].clone() + } +} + +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] +struct RevWalkWorkItem { + pos: P, + state: RevWalkWorkItemState, +} + +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +enum RevWalkWorkItemState { + // Order matters: Unwanted should appear earlier in the max-heap. + Wanted(T), + Unwanted, +} + +impl RevWalkWorkItem { + fn is_wanted(&self) -> bool { + matches!(self.state, RevWalkWorkItemState::Wanted(_)) + } + + fn map_wanted(self, f: impl FnOnce(T) -> U) -> RevWalkWorkItem { + RevWalkWorkItem { + pos: self.pos, + state: match self.state { + RevWalkWorkItemState::Wanted(t) => RevWalkWorkItemState::Wanted(f(t)), + RevWalkWorkItemState::Unwanted => RevWalkWorkItemState::Unwanted, + }, + } + } +} + +#[derive(Clone)] +struct RevWalkQueue { + items: BinaryHeap>, + unwanted_count: usize, +} + +impl RevWalkQueue { + fn new() -> Self { + Self { + items: BinaryHeap::new(), + unwanted_count: 0, + } + } + + fn map_wanted(self, mut f: impl FnMut(T) -> U) -> RevWalkQueue { + RevWalkQueue { + items: self + .items + .into_iter() + .map(|x| x.map_wanted(&mut f)) + .collect(), + unwanted_count: self.unwanted_count, + } + } + + fn push_wanted(&mut self, pos: P, t: T) { + let state = RevWalkWorkItemState::Wanted(t); + self.items.push(RevWalkWorkItem { pos, state }); + } + + fn push_unwanted(&mut self, pos: P) { + let state = RevWalkWorkItemState::Unwanted; + self.items.push(RevWalkWorkItem { pos, state }); + self.unwanted_count += 1; + } + + fn extend_wanted(&mut self, positions: impl IntoIterator, t: T) + where + T: Clone, + { + // positions typically contains one item, and single BinaryHeap::push() + // appears to be slightly faster than .extend() as of rustc 1.73.0. + for pos in positions { + self.push_wanted(pos, t.clone()); + } + } + + fn extend_unwanted(&mut self, positions: impl IntoIterator) { + for pos in positions { + self.push_unwanted(pos); + } + } + + fn pop(&mut self) -> Option> { + if let Some(x) = self.items.pop() { + self.unwanted_count -= !x.is_wanted() as usize; + Some(x) + } else { + None + } + } + + fn pop_eq(&mut self, pos: &P) -> Option> { + if let Some(x) = self.items.peek() { + (x.pos == *pos).then(|| self.pop().unwrap()) + } else { + None + } + } + + fn skip_while_eq(&mut self, pos: &P) { + while self.pop_eq(pos).is_some() { + continue; + } + } +} + +#[derive(Clone)] +pub struct RevWalk<'a>(RevWalkImpl<'a, CompositeIndex<'a>>); + +impl<'a> RevWalk<'a> { + pub(super) fn new(index: CompositeIndex<'a>) -> Self { + let queue = RevWalkQueue::new(); + RevWalk(RevWalkImpl { index, queue }) + } + + pub(super) fn extend_wanted(&mut self, positions: impl IntoIterator) { + self.0.queue.extend_wanted(positions, ()); + } + + pub(super) fn extend_unwanted(&mut self, positions: impl IntoIterator) { + self.0.queue.extend_unwanted(positions); + } + + /// Filters entries by generation (or depth from the current wanted set.) + /// + /// The generation of the current wanted entries starts from 0. + pub fn filter_by_generation(self, generation_range: Range) -> RevWalkGenerationRange<'a> { + RevWalkGenerationRange(RevWalkGenerationRangeImpl::new( + self.0.index, + self.0.queue, + generation_range, + )) + } + + /// Walks ancestors until all of the reachable roots in `root_positions` get + /// visited. + /// + /// Use this if you are only interested in descendants of the given roots. + /// The caller still needs to filter out unwanted entries. + pub fn take_until_roots( + self, + root_positions: &[IndexPosition], + ) -> impl Iterator> + Clone + 'a { + // We can also make it stop visiting based on the generation number. Maybe + // it will perform better for unbalanced branchy history. + // https://github.com/martinvonz/jj/pull/1492#discussion_r1160678325 + let bottom_position = *root_positions.iter().min().unwrap_or(&IndexPosition::MAX); + self.take_while(move |entry| entry.position() >= bottom_position) + } + + /// Fully consumes the ancestors and walks back from `root_positions`. + /// + /// The returned iterator yields entries in order of ascending index + /// position. + pub fn descendants(self, root_positions: &[IndexPosition]) -> RevWalkDescendants<'a> { + RevWalkDescendants { + candidate_entries: self.take_until_roots(root_positions).collect(), + root_positions: root_positions.iter().copied().collect(), + reachable_positions: HashSet::new(), + } + } + + /// Fully consumes the ancestors and walks back from `root_positions` within + /// `generation_range`. + /// + /// The returned iterator yields entries in order of ascending index + /// position. + pub fn descendants_filtered_by_generation( + self, + root_positions: &[IndexPosition], + generation_range: Range, + ) -> RevWalkDescendantsGenerationRange<'a> { + let index = self.0.index; + let entries = self.take_until_roots(root_positions); + let descendants_index = RevWalkDescendantsIndex::build(index, entries); + let mut queue = RevWalkQueue::new(); + for &pos in root_positions { + // Do not add unreachable roots which shouldn't be visited + if descendants_index.contains_pos(pos) { + queue.push_wanted(Reverse(pos), ()); + } + } + RevWalkDescendantsGenerationRange(RevWalkGenerationRangeImpl::new( + descendants_index, + queue, + generation_range, + )) + } +} + +impl<'a> Iterator for RevWalk<'a> { + type Item = IndexEntry<'a>; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[derive(Clone)] +struct RevWalkImpl<'a, I: RevWalkIndex<'a>> { + index: I, + queue: RevWalkQueue, +} + +impl<'a, I: RevWalkIndex<'a>> RevWalkImpl<'a, I> { + fn next(&mut self) -> Option> { + while let Some(item) = self.queue.pop() { + self.queue.skip_while_eq(&item.pos); + if item.is_wanted() { + let entry = self.index.entry_by_pos(item.pos); + self.queue + .extend_wanted(self.index.adjacent_positions(&entry), ()); + return Some(entry); + } else if self.queue.items.len() == self.queue.unwanted_count { + // No more wanted entries to walk + debug_assert!(!self.queue.items.iter().any(|x| x.is_wanted())); + return None; + } else { + let entry = self.index.entry_by_pos(item.pos); + self.queue + .extend_unwanted(self.index.adjacent_positions(&entry)); + } + } + + debug_assert_eq!( + self.queue.items.iter().filter(|x| !x.is_wanted()).count(), + self.queue.unwanted_count + ); + None + } +} + +#[derive(Clone)] +pub struct RevWalkGenerationRange<'a>(RevWalkGenerationRangeImpl<'a, CompositeIndex<'a>>); + +impl<'a> Iterator for RevWalkGenerationRange<'a> { + type Item = IndexEntry<'a>; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[derive(Clone)] +pub struct RevWalkDescendantsGenerationRange<'a>( + RevWalkGenerationRangeImpl<'a, RevWalkDescendantsIndex<'a>>, +); + +impl<'a> Iterator for RevWalkDescendantsGenerationRange<'a> { + type Item = IndexEntry<'a>; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[derive(Clone)] +struct RevWalkGenerationRangeImpl<'a, I: RevWalkIndex<'a>> { + index: I, + // Sort item generations in ascending order + queue: RevWalkQueue>, + generation_end: u32, +} + +impl<'a, I: RevWalkIndex<'a>> RevWalkGenerationRangeImpl<'a, I> { + fn new(index: I, queue: RevWalkQueue, generation_range: Range) -> Self { + // Translate filter range to item ranges so that overlapped ranges can be + // merged later. + // + // Example: `generation_range = 1..4` + // (original) (translated) + // 0 1 2 3 4 0 1 2 3 4 + // *=====o generation_range + generation_end + // + : : item's generation o=====* : item's range + let item_range = RevWalkItemGenerationRange { + start: 0, + end: u32::saturating_sub(generation_range.end, generation_range.start), + }; + RevWalkGenerationRangeImpl { + index, + queue: queue.map_wanted(|()| Reverse(item_range)), + generation_end: generation_range.end, + } + } + + fn enqueue_wanted_adjacents( + &mut self, + entry: &IndexEntry<'_>, + gen: RevWalkItemGenerationRange, + ) { + // `gen.start` is incremented from 0, which should never overflow + if gen.start + 1 >= self.generation_end { + return; + } + let succ_gen = RevWalkItemGenerationRange { + start: gen.start + 1, + end: gen.end.saturating_add(1), + }; + self.queue + .extend_wanted(self.index.adjacent_positions(entry), Reverse(succ_gen)); + } + + fn next(&mut self) -> Option> { + while let Some(item) = self.queue.pop() { + if let RevWalkWorkItemState::Wanted(Reverse(mut pending_gen)) = item.state { + let entry = self.index.entry_by_pos(item.pos); + let mut some_in_range = pending_gen.contains_end(self.generation_end); + while let Some(x) = self.queue.pop_eq(&item.pos) { + // Merge overlapped ranges to reduce number of the queued items. + // For queries like `:(heads-)`, `gen.end` is close to `u32::MAX`, so + // ranges can be merged into one. If this is still slow, maybe we can add + // special case for upper/lower bounded ranges. + if let RevWalkWorkItemState::Wanted(Reverse(gen)) = x.state { + some_in_range |= gen.contains_end(self.generation_end); + pending_gen = if let Some(merged) = pending_gen.try_merge_end(gen) { + merged + } else { + self.enqueue_wanted_adjacents(&entry, pending_gen); + gen + }; + } else { + unreachable!("no more unwanted items of the same entry"); + } + } + self.enqueue_wanted_adjacents(&entry, pending_gen); + if some_in_range { + return Some(entry); + } + } else if self.queue.items.len() == self.queue.unwanted_count { + // No more wanted entries to walk + debug_assert!(!self.queue.items.iter().any(|x| x.is_wanted())); + return None; + } else { + let entry = self.index.entry_by_pos(item.pos); + self.queue.skip_while_eq(&item.pos); + self.queue + .extend_unwanted(self.index.adjacent_positions(&entry)); + } + } + + debug_assert_eq!( + self.queue.items.iter().filter(|x| !x.is_wanted()).count(), + self.queue.unwanted_count + ); + None + } +} + +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +struct RevWalkItemGenerationRange { + start: u32, + end: u32, +} + +impl RevWalkItemGenerationRange { + /// Suppose sorted ranges `self, other`, merges them if overlapped. + #[must_use] + fn try_merge_end(self, other: Self) -> Option { + (other.start <= self.end).then(|| RevWalkItemGenerationRange { + start: self.start, + end: max(self.end, other.end), + }) + } + + #[must_use] + fn contains_end(self, end: u32) -> bool { + self.start < end && end <= self.end + } +} + +/// Walks descendants from the roots, in order of ascending index position. +#[derive(Clone)] +pub struct RevWalkDescendants<'a> { + candidate_entries: Vec>, + root_positions: HashSet, + reachable_positions: HashSet, +} + +impl RevWalkDescendants<'_> { + /// Builds a set of index positions reachable from the roots. + /// + /// This is equivalent to `.map(|entry| entry.position()).collect()` on + /// the new iterator, but returns the internal buffer instead. + pub fn collect_positions_set(mut self) -> HashSet { + self.by_ref().for_each(drop); + self.reachable_positions + } +} + +impl<'a> Iterator for RevWalkDescendants<'a> { + type Item = IndexEntry<'a>; + + fn next(&mut self) -> Option { + while let Some(candidate) = self.candidate_entries.pop() { + if self.root_positions.contains(&candidate.position()) + || candidate + .parent_positions() + .iter() + .any(|parent_pos| self.reachable_positions.contains(parent_pos)) + { + self.reachable_positions.insert(candidate.position()); + return Some(candidate); + } + } + None + } +} + +impl FusedIterator for RevWalkDescendants<'_> {}