Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index: move segment files to sub directory, add version number #3014

Merged
merged 4 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

* The minimum supported Rust version (MSRV) is now 1.76.0.

* The on-disk index format changed. New index files will be created
automatically, but it can fail if the repository is co-located and predates
Git GC issues [#815](https://github.com/martinvonz/jj/issues/815). If
reindexing failed, you'll need to clean up corrupted operation history by
`jj op abandon ..<bad operation ID>`.

### New features

* Templates now support logical operators: `||`, `&&`, `!`
Expand Down
5 changes: 4 additions & 1 deletion lib/src/default_index/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ use tempfile::NamedTempFile;

use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment};
use super::readonly::{
DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION,
};
use crate::backend::{ChangeId, CommitId};
use crate::commit::Commit;
use crate::file_util::persist_content_addressed_temp_file;
Expand Down Expand Up @@ -275,6 +277,7 @@ impl MutableIndexSegment {
}

let mut buf = Vec::new();
buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
self.serialize_parent_filename(&mut buf);
let local_entries_offset = buf.len();
self.serialize_local_entries(&mut buf);
Expand Down
12 changes: 11 additions & 1 deletion lib/src/default_index/readonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ impl ReadonlyIndexLoadError {
}
}

/// Current format version of the index segment file.
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 1;

struct CommitGraphEntry<'a> {
data: &'a [u8],
commit_id_length: usize,
Expand Down Expand Up @@ -145,6 +148,7 @@ impl CommitLookupEntry<'_> {
///
/// File format:
/// ```text
/// u32: file format version
/// u32: parent segment file name length (0 means root)
/// <length number of bytes>: parent segment file name
///
Expand All @@ -167,7 +171,6 @@ impl CommitLookupEntry<'_> {
///
/// Note that u32 fields are 4-byte aligned so long as the parent file name
/// (which is hexadecimal hash) and commit/change ids aren't of exotic length.
// TODO: add a version number
// TODO: replace the table by a trie so we don't have to repeat the full commit
// ids
// TODO: add a fanout table like git's commit graph has?
Expand Down Expand Up @@ -220,6 +223,13 @@ impl ReadonlyIndexSegment {
file.read_exact(&mut buf).map_err(from_io_err)?;
Ok(u32::from_le_bytes(buf))
};
let format_version = read_u32(file)?;
if format_version != INDEX_SEGMENT_FILE_FORMAT_VERSION {
return Err(ReadonlyIndexLoadError::invalid_data(
&name,
format!("unsupported file format version: {format_version}"),
));
}
let parent_filename_len = read_u32(file)?;
let maybe_parent_file = if parent_filename_len > 0 {
let mut parent_filename_bytes = vec![0; parent_filename_len as usize];
Expand Down
48 changes: 31 additions & 17 deletions lib/src/default_index/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexLoadError, ReadonlyInde
use crate::backend::{BackendError, BackendInitError, CommitId};
use crate::commit::CommitByCommitterTimestamp;
use crate::dag_walk;
use crate::file_util::{persist_content_addressed_temp_file, IoResultExt as _, PathError};
use crate::file_util::{self, persist_content_addressed_temp_file, IoResultExt as _, PathError};
use crate::index::{
Index, IndexReadError, IndexStore, IndexWriteError, MutableIndex, ReadonlyIndex,
};
Expand Down Expand Up @@ -89,11 +89,11 @@ impl DefaultIndexStore {
}

pub fn init(dir: &Path) -> Result<Self, DefaultIndexStoreInitError> {
let op_dir = dir.join("operations");
std::fs::create_dir(&op_dir).context(&op_dir)?;
Ok(DefaultIndexStore {
let store = DefaultIndexStore {
dir: dir.to_owned(),
})
};
store.ensure_base_dirs()?;
Ok(store)
}

pub fn load(dir: &Path) -> DefaultIndexStore {
Expand All @@ -103,12 +103,14 @@ impl DefaultIndexStore {
}

pub fn reinit(&self) -> Result<(), DefaultIndexStoreInitError> {
// Create base directories in case the store was initialized by old jj.
self.ensure_base_dirs()?;
// Remove all operation links to trigger rebuilding.
let op_dir = self.dir.join("operations");
std::fs::remove_dir_all(&op_dir).context(&op_dir)?;
std::fs::create_dir(&op_dir).context(&op_dir)?;
file_util::remove_dir_contents(&self.operations_dir())?;
// Remove index segments to save disk space. If raced, new segment file
// will be created by the other process.
file_util::remove_dir_contents(&self.segments_dir())?;
// jj <= 0.14 created segment files in the top directory
for entry in self.dir.read_dir().context(&self.dir)? {
let entry = entry.context(&self.dir)?;
let path = entry.path();
Expand All @@ -121,17 +123,32 @@ impl DefaultIndexStore {
Ok(())
}

fn ensure_base_dirs(&self) -> Result<(), PathError> {
for dir in [self.operations_dir(), self.segments_dir()] {
file_util::create_or_reuse_dir(&dir).context(&dir)?;
}
Ok(())
}

fn operations_dir(&self) -> PathBuf {
self.dir.join("operations")
}

fn segments_dir(&self) -> PathBuf {
self.dir.join("segments")
}

fn load_index_segments_at_operation(
&self,
op_id: &OperationId,
commit_id_length: usize,
change_id_length: usize,
) -> Result<Arc<ReadonlyIndexSegment>, DefaultIndexStoreError> {
let op_id_file = self.dir.join("operations").join(op_id.hex());
let op_id_file = self.operations_dir().join(op_id.hex());
let index_file_id_hex =
fs::read_to_string(op_id_file).map_err(DefaultIndexStoreError::LoadAssociation)?;
ReadonlyIndexSegment::load(
&self.dir,
&self.segments_dir(),
index_file_id_hex,
commit_id_length,
change_id_length,
Expand Down Expand Up @@ -159,7 +176,7 @@ impl DefaultIndexStore {
store: &Arc<Store>,
) -> Result<Arc<ReadonlyIndexSegment>, DefaultIndexStoreError> {
let view = operation.view()?;
let operations_dir = self.dir.join("operations");
let operations_dir = self.operations_dir();
let commit_id_length = store.commit_id_length();
let change_id_length = store.change_id_length();
let mut visited_heads: HashSet<CommitId> =
Expand Down Expand Up @@ -262,7 +279,7 @@ impl DefaultIndexStore {
op_id: &OperationId,
) -> Result<Arc<ReadonlyIndexSegment>, DefaultIndexStoreError> {
let index_segment = mutable_index
.squash_and_save_in(&self.dir)
.squash_and_save_in(&self.segments_dir())
.map_err(DefaultIndexStoreError::SaveIndex)?;
self.associate_file_with_operation(&index_segment, op_id)
.map_err(|source| DefaultIndexStoreError::AssociateIndex {
Expand All @@ -281,10 +298,7 @@ impl DefaultIndexStore {
let mut temp_file = NamedTempFile::new_in(&self.dir)?;
let file = temp_file.as_file_mut();
file.write_all(index.name().as_bytes())?;
persist_content_addressed_temp_file(
temp_file,
self.dir.join("operations").join(op_id.hex()),
)?;
persist_content_addressed_temp_file(temp_file, self.operations_dir().join(op_id.hex()))?;
Ok(())
}
}
Expand Down Expand Up @@ -317,7 +331,7 @@ impl IndexStore for DefaultIndexStore {
// If the index was corrupt (maybe it was written in a different format),
// we just reindex.
// TODO: Move this message to a callback or something.
println!("The index was corrupt (maybe the format has changed). Reindexing...");
eprintln!("{err}: {source}. Reindexing...", source = err.error);
self.reinit().map_err(|err| IndexReadError(err.into()))?;
self.build_index_segments_at_operation(op, store)
}
Expand Down
12 changes: 12 additions & 0 deletions lib/src/file_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ pub fn create_or_reuse_dir(dirname: &Path) -> io::Result<()> {
}
}

/// Removes all files in the directory, but not the directory itself.
///
/// The directory must exist, and there should be no sub directories.
pub fn remove_dir_contents(dirname: &Path) -> Result<(), PathError> {
for entry in dirname.read_dir().context(dirname)? {
let entry = entry.context(dirname)?;
let path = entry.path();
fs::remove_file(&path).context(&path)?;
}
Ok(())
}

/// Turns the given `to` path into relative path starting from the `from` path.
///
/// Both `from` and `to` paths are supposed to be absolute and normalized in the
Expand Down
53 changes: 49 additions & 4 deletions lib/tests/test_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,9 @@ fn test_index_commits_previous_operations() {
let repo = tx.commit("test");

// Delete index from disk
let index_operations_dir = repo.repo_path().join("index").join("operations");
assert!(index_operations_dir.is_dir());
std::fs::remove_dir_all(&index_operations_dir).unwrap();
std::fs::create_dir(&index_operations_dir).unwrap();
let default_index_store: &DefaultIndexStore =
repo.index_store().as_any().downcast_ref().unwrap();
default_index_store.reinit().unwrap();

let repo = load_repo_at_head(&settings, repo.repo_path());
let index = as_readonly_composite(&repo);
Expand Down Expand Up @@ -586,6 +585,52 @@ fn test_index_commits_incremental_squashed() {
assert_eq!(commits_by_level(&repo), vec![71, 20]);
}

#[test]
fn test_reindex_no_segments_dir() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;

let mut tx = repo.start_transaction(&settings);
let commit_a = write_random_commit(tx.mut_repo(), &settings);
let repo = tx.commit("test");
assert!(repo.index().has_id(commit_a.id()));

// jj <= 0.14 doesn't have "segments" directory
let segments_dir = repo.repo_path().join("index").join("segments");
assert!(segments_dir.is_dir());
fs::remove_dir_all(&segments_dir).unwrap();

let repo = load_repo_at_head(&settings, repo.repo_path());
assert!(repo.index().has_id(commit_a.id()));
}

#[test]
fn test_reindex_corrupt_segment_files() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;

let mut tx = repo.start_transaction(&settings);
let commit_a = write_random_commit(tx.mut_repo(), &settings);
let repo = tx.commit("test");
assert!(repo.index().has_id(commit_a.id()));

// Corrupt the index files
let segments_dir = repo.repo_path().join("index").join("segments");
for entry in segments_dir.read_dir().unwrap() {
let entry = entry.unwrap();
// u32: file format version
// u32: parent segment file name length (0 means root)
// u32: number of local entries
// u32: number of overflow parent entries
fs::write(entry.path(), b"\0".repeat(16)).unwrap()
}

let repo = load_repo_at_head(&settings, repo.repo_path());
assert!(repo.index().has_id(commit_a.id()));
}

#[test]
fn test_reindex_from_merged_operation() {
let settings = testutils::user_settings();
Expand Down