diff --git a/CHANGELOG.md b/CHANGELOG.md index 003b3ccccf..36fe4f2e66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * The minimum supported Rust version (MSRV) is now 1.76.0. +* The on-disk index format changed. New index files will be created + automatically, but it can fail if the repository is co-located and predates + Git GC issues [#815](https://github.com/martinvonz/jj/issues/815). If + reindexing failed, you'll need to clean up corrupted operation history by + `jj op abandon ..`. + ### New features * Templates now support logical operators: `||`, `&&`, `!` diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 7e6734ed75..c1ce030145 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -31,7 +31,9 @@ use tempfile::NamedTempFile; use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment}; use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec}; -use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment}; +use super::readonly::{ + DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION, +}; use crate::backend::{ChangeId, CommitId}; use crate::commit::Commit; use crate::file_util::persist_content_addressed_temp_file; @@ -275,6 +277,7 @@ impl MutableIndexSegment { } let mut buf = Vec::new(); + buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes()); self.serialize_parent_filename(&mut buf); let local_entries_offset = buf.len(); self.serialize_local_entries(&mut buf); diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index 3bca5639ce..1ff1041a94 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -72,6 +72,9 @@ impl ReadonlyIndexLoadError { } } +/// Current format version of the index segment file. +pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 1; + struct CommitGraphEntry<'a> { data: &'a [u8], commit_id_length: usize, @@ -145,6 +148,7 @@ impl CommitLookupEntry<'_> { /// /// File format: /// ```text +/// u32: file format version /// u32: parent segment file name length (0 means root) /// : parent segment file name /// @@ -167,7 +171,6 @@ impl CommitLookupEntry<'_> { /// /// Note that u32 fields are 4-byte aligned so long as the parent file name /// (which is hexadecimal hash) and commit/change ids aren't of exotic length. -// TODO: add a version number // TODO: replace the table by a trie so we don't have to repeat the full commit // ids // TODO: add a fanout table like git's commit graph has? @@ -220,6 +223,13 @@ impl ReadonlyIndexSegment { file.read_exact(&mut buf).map_err(from_io_err)?; Ok(u32::from_le_bytes(buf)) }; + let format_version = read_u32(file)?; + if format_version != INDEX_SEGMENT_FILE_FORMAT_VERSION { + return Err(ReadonlyIndexLoadError::invalid_data( + &name, + format!("unsupported file format version: {format_version}"), + )); + } let parent_filename_len = read_u32(file)?; let maybe_parent_file = if parent_filename_len > 0 { let mut parent_filename_bytes = vec![0; parent_filename_len as usize]; diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs index 155b9329c7..0ac974176f 100644 --- a/lib/src/default_index/store.rs +++ b/lib/src/default_index/store.rs @@ -30,7 +30,7 @@ use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexLoadError, ReadonlyInde use crate::backend::{BackendError, BackendInitError, CommitId}; use crate::commit::CommitByCommitterTimestamp; use crate::dag_walk; -use crate::file_util::{persist_content_addressed_temp_file, IoResultExt as _, PathError}; +use crate::file_util::{self, persist_content_addressed_temp_file, IoResultExt as _, PathError}; use crate::index::{ Index, IndexReadError, IndexStore, IndexWriteError, MutableIndex, ReadonlyIndex, }; @@ -89,11 +89,11 @@ impl DefaultIndexStore { } pub fn init(dir: &Path) -> Result { - let op_dir = dir.join("operations"); - std::fs::create_dir(&op_dir).context(&op_dir)?; - Ok(DefaultIndexStore { + let store = DefaultIndexStore { dir: dir.to_owned(), - }) + }; + store.ensure_base_dirs()?; + Ok(store) } pub fn load(dir: &Path) -> DefaultIndexStore { @@ -103,12 +103,14 @@ impl DefaultIndexStore { } pub fn reinit(&self) -> Result<(), DefaultIndexStoreInitError> { + // Create base directories in case the store was initialized by old jj. + self.ensure_base_dirs()?; // Remove all operation links to trigger rebuilding. - let op_dir = self.dir.join("operations"); - std::fs::remove_dir_all(&op_dir).context(&op_dir)?; - std::fs::create_dir(&op_dir).context(&op_dir)?; + file_util::remove_dir_contents(&self.operations_dir())?; // Remove index segments to save disk space. If raced, new segment file // will be created by the other process. + file_util::remove_dir_contents(&self.segments_dir())?; + // jj <= 0.14 created segment files in the top directory for entry in self.dir.read_dir().context(&self.dir)? { let entry = entry.context(&self.dir)?; let path = entry.path(); @@ -121,17 +123,32 @@ impl DefaultIndexStore { Ok(()) } + fn ensure_base_dirs(&self) -> Result<(), PathError> { + for dir in [self.operations_dir(), self.segments_dir()] { + file_util::create_or_reuse_dir(&dir).context(&dir)?; + } + Ok(()) + } + + fn operations_dir(&self) -> PathBuf { + self.dir.join("operations") + } + + fn segments_dir(&self) -> PathBuf { + self.dir.join("segments") + } + fn load_index_segments_at_operation( &self, op_id: &OperationId, commit_id_length: usize, change_id_length: usize, ) -> Result, DefaultIndexStoreError> { - let op_id_file = self.dir.join("operations").join(op_id.hex()); + let op_id_file = self.operations_dir().join(op_id.hex()); let index_file_id_hex = fs::read_to_string(op_id_file).map_err(DefaultIndexStoreError::LoadAssociation)?; ReadonlyIndexSegment::load( - &self.dir, + &self.segments_dir(), index_file_id_hex, commit_id_length, change_id_length, @@ -159,7 +176,7 @@ impl DefaultIndexStore { store: &Arc, ) -> Result, DefaultIndexStoreError> { let view = operation.view()?; - let operations_dir = self.dir.join("operations"); + let operations_dir = self.operations_dir(); let commit_id_length = store.commit_id_length(); let change_id_length = store.change_id_length(); let mut visited_heads: HashSet = @@ -262,7 +279,7 @@ impl DefaultIndexStore { op_id: &OperationId, ) -> Result, DefaultIndexStoreError> { let index_segment = mutable_index - .squash_and_save_in(&self.dir) + .squash_and_save_in(&self.segments_dir()) .map_err(DefaultIndexStoreError::SaveIndex)?; self.associate_file_with_operation(&index_segment, op_id) .map_err(|source| DefaultIndexStoreError::AssociateIndex { @@ -281,10 +298,7 @@ impl DefaultIndexStore { let mut temp_file = NamedTempFile::new_in(&self.dir)?; let file = temp_file.as_file_mut(); file.write_all(index.name().as_bytes())?; - persist_content_addressed_temp_file( - temp_file, - self.dir.join("operations").join(op_id.hex()), - )?; + persist_content_addressed_temp_file(temp_file, self.operations_dir().join(op_id.hex()))?; Ok(()) } } @@ -317,7 +331,7 @@ impl IndexStore for DefaultIndexStore { // If the index was corrupt (maybe it was written in a different format), // we just reindex. // TODO: Move this message to a callback or something. - println!("The index was corrupt (maybe the format has changed). Reindexing..."); + eprintln!("{err}: {source}. Reindexing...", source = err.error); self.reinit().map_err(|err| IndexReadError(err.into()))?; self.build_index_segments_at_operation(op, store) } diff --git a/lib/src/file_util.rs b/lib/src/file_util.rs index 15b9b24a14..d92b9f4e75 100644 --- a/lib/src/file_util.rs +++ b/lib/src/file_util.rs @@ -55,6 +55,18 @@ pub fn create_or_reuse_dir(dirname: &Path) -> io::Result<()> { } } +/// Removes all files in the directory, but not the directory itself. +/// +/// The directory must exist, and there should be no sub directories. +pub fn remove_dir_contents(dirname: &Path) -> Result<(), PathError> { + for entry in dirname.read_dir().context(dirname)? { + let entry = entry.context(dirname)?; + let path = entry.path(); + fs::remove_file(&path).context(&path)?; + } + Ok(()) +} + /// Turns the given `to` path into relative path starting from the `from` path. /// /// Both `from` and `to` paths are supposed to be absolute and normalized in the diff --git a/lib/tests/test_index.rs b/lib/tests/test_index.rs index 3abc321abf..da5c1ba9ee 100644 --- a/lib/tests/test_index.rs +++ b/lib/tests/test_index.rs @@ -294,10 +294,9 @@ fn test_index_commits_previous_operations() { let repo = tx.commit("test"); // Delete index from disk - let index_operations_dir = repo.repo_path().join("index").join("operations"); - assert!(index_operations_dir.is_dir()); - std::fs::remove_dir_all(&index_operations_dir).unwrap(); - std::fs::create_dir(&index_operations_dir).unwrap(); + let default_index_store: &DefaultIndexStore = + repo.index_store().as_any().downcast_ref().unwrap(); + default_index_store.reinit().unwrap(); let repo = load_repo_at_head(&settings, repo.repo_path()); let index = as_readonly_composite(&repo); @@ -586,6 +585,52 @@ fn test_index_commits_incremental_squashed() { assert_eq!(commits_by_level(&repo), vec![71, 20]); } +#[test] +fn test_reindex_no_segments_dir() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let commit_a = write_random_commit(tx.mut_repo(), &settings); + let repo = tx.commit("test"); + assert!(repo.index().has_id(commit_a.id())); + + // jj <= 0.14 doesn't have "segments" directory + let segments_dir = repo.repo_path().join("index").join("segments"); + assert!(segments_dir.is_dir()); + fs::remove_dir_all(&segments_dir).unwrap(); + + let repo = load_repo_at_head(&settings, repo.repo_path()); + assert!(repo.index().has_id(commit_a.id())); +} + +#[test] +fn test_reindex_corrupt_segment_files() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let commit_a = write_random_commit(tx.mut_repo(), &settings); + let repo = tx.commit("test"); + assert!(repo.index().has_id(commit_a.id())); + + // Corrupt the index files + let segments_dir = repo.repo_path().join("index").join("segments"); + for entry in segments_dir.read_dir().unwrap() { + let entry = entry.unwrap(); + // u32: file format version + // u32: parent segment file name length (0 means root) + // u32: number of local entries + // u32: number of overflow parent entries + fs::write(entry.path(), b"\0".repeat(16)).unwrap() + } + + let repo = load_repo_at_head(&settings, repo.repo_path()); + assert!(repo.index().has_id(commit_a.id())); +} + #[test] fn test_reindex_from_merged_operation() { let settings = testutils::user_settings();