Skip to content

Commit

Permalink
index: show bad operation id if commit lookup failed during reindexing
Browse files Browse the repository at this point in the history
My jj repo contains such head commits, and "jj debug reindex" fails. To address
this problem, we'll probably need to implement GC, and the user will discard
operations before the first bad op id.
  • Loading branch information
yuja committed Dec 28, 2023
1 parent 8009205 commit 8855bc2
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 18 deletions.
51 changes: 34 additions & 17 deletions lib/src/default_index/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#![allow(missing_docs)]

use std::any::Any;
use std::collections::HashSet;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::Arc;
Expand Down Expand Up @@ -64,8 +65,11 @@ pub enum DefaultIndexStoreError {
LoadIndex(ReadonlyIndexLoadError),
#[error("Failed to write commit index file: {0}")]
SaveIndex(#[source] io::Error),
#[error("Failed to index commits: {0}")]
IndexCommits(#[source] BackendError),
#[error("Failed to index commits at operation {op_id}: {source}", op_id = op_id.hex())]
IndexCommits {
op_id: OperationId,
source: BackendError,
},
#[error(transparent)]
OpStore(#[from] OpStoreError),
}
Expand Down Expand Up @@ -142,7 +146,11 @@ impl DefaultIndexStore {
let operations_dir = self.dir.join("operations");
let commit_id_length = store.commit_id_length();
let change_id_length = store.change_id_length();
let mut new_heads = view.heads().clone();
let mut visited_heads: HashSet<CommitId> = view.heads().clone();
let mut historical_heads: Vec<(CommitId, OperationId)> = visited_heads
.iter()
.map(|commit_id| (commit_id.clone(), operation.id().clone()))
.collect();
let mut parent_op_id: Option<OperationId> = None;
for op in dag_walk::dfs_ok(
[Ok(operation.clone())],
Expand All @@ -158,8 +166,10 @@ impl DefaultIndexStore {
parent_op_id = Some(op.id().clone());
}
// TODO: no need to walk ancestors of the parent_op_id operation
for head in op.view()?.heads() {
new_heads.insert(head.clone());
for commit_id in op.view()?.heads() {
if visited_heads.insert(commit_id.clone()) {
historical_heads.push((commit_id.clone(), op.id().clone()));
}
}
}
let maybe_parent_file;
Expand All @@ -182,7 +192,7 @@ impl DefaultIndexStore {

tracing::info!(
?maybe_parent_file,
new_heads_count = new_heads.len(),
heads_count = historical_heads.len(),
"indexing commits reachable from historical heads"
);
// Build a list of ancestors of heads where parents and predecessors come after
Expand All @@ -192,23 +202,30 @@ impl DefaultIndexStore {
.as_ref()
.map_or(false, |segment| segment.as_composite().has_id(id))
};
let get_commit_with_op = |commit_id: &CommitId, op_id: &OperationId| {
let op_id = op_id.clone();
match store.get_commit(commit_id) {
// Propagate head's op_id to report possible source of an error.
// The op_id doesn't have to be included in the sort key, but
// that wouldn't matter since the commit should be unique.
Ok(commit) => Ok((CommitByCommitterTimestamp(commit), op_id)),
Err(source) => Err(DefaultIndexStoreError::IndexCommits { op_id, source }),
}
};
let commits = dag_walk::topo_order_reverse_ord_ok(
new_heads
historical_heads
.iter()
.filter(|&id| !parent_file_has_id(id))
.map(|id| store.get_commit(id))
.map_ok(CommitByCommitterTimestamp),
|CommitByCommitterTimestamp(commit)| commit.id().clone(),
|CommitByCommitterTimestamp(commit)| {
.filter(|&(commit_id, _)| !parent_file_has_id(commit_id))
.map(|(commit_id, op_id)| get_commit_with_op(commit_id, op_id)),
|(CommitByCommitterTimestamp(commit), _)| commit.id().clone(),
|(CommitByCommitterTimestamp(commit), op_id)| {
itertools::chain(commit.parent_ids(), commit.predecessor_ids())
.filter(|&id| !parent_file_has_id(id))
.map(|id| store.get_commit(id))
.map_ok(CommitByCommitterTimestamp)
.map(|commit_id| get_commit_with_op(commit_id, op_id))
.collect_vec()
},
)
.map_err(DefaultIndexStoreError::IndexCommits)?;
for CommitByCommitterTimestamp(commit) in commits.iter().rev() {
)?;
for (CommitByCommitterTimestamp(commit), _) in commits.iter().rev() {
mutable_index.add_commit(commit);
}

Expand Down
37 changes: 36 additions & 1 deletion lib/tests/test_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@
use std::fs;
use std::sync::Arc;

use assert_matches::assert_matches;
use jj_lib::backend::{CommitId, ObjectId as _};
use jj_lib::commit::Commit;
use jj_lib::commit_builder::CommitBuilder;
use jj_lib::default_index::{
AsCompositeIndex as _, CompositeIndex, DefaultMutableIndex, DefaultReadonlyIndex, IndexPosition,
AsCompositeIndex as _, CompositeIndex, DefaultIndexStore, DefaultIndexStoreError,
DefaultMutableIndex, DefaultReadonlyIndex, IndexPosition,
};
use jj_lib::index::Index as _;
use jj_lib::repo::{MutableRepo, ReadonlyRepo, Repo};
use jj_lib::settings::UserSettings;
use testutils::test_backend::TestBackend;
use testutils::{
commit_transactions, create_random_commit, load_repo_at_head, write_random_commit,
CommitGraphBuilder, TestRepo,
Expand Down Expand Up @@ -582,6 +585,38 @@ fn test_reindex_from_merged_operation() {
assert_eq!(index.num_commits(), 4);
}

#[test]
fn test_reindex_missing_commit() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;

let mut tx = repo.start_transaction(&settings);
let missing_commit = write_random_commit(tx.mut_repo(), &settings);
let repo = tx.commit("test");
let bad_op_id = repo.op_id();

let mut tx = repo.start_transaction(&settings);
tx.mut_repo().remove_head(missing_commit.id());
let repo = tx.commit("test");

// Remove historical head commit to simulate bad GC.
let test_backend: &TestBackend = repo.store().backend_impl().downcast_ref().unwrap();
test_backend.remove_commit_unchecked(missing_commit.id());
let repo = load_repo_at_head(&settings, repo.repo_path()); // discard cache
assert!(repo.store().get_commit(missing_commit.id()).is_err());

// Reindexing error should include the operation id where the commit
// couldn't be found.
let default_index_store: &DefaultIndexStore =
repo.index_store().as_any().downcast_ref().unwrap();
default_index_store.reinit().unwrap();
let err = default_index_store
.build_index_at_operation(repo.operation(), repo.store())
.unwrap_err();
assert_matches!(err, DefaultIndexStoreError::IndexCommits { op_id, .. } if op_id == *bad_op_id);
}

/// Test that .jj/repo/index/type is created when the repo is created, and that
/// it is created when an old repo is loaded.
#[test]
Expand Down
4 changes: 4 additions & 0 deletions lib/testutils/src/test_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ impl TestBackend {
fn locked_data(&self) -> MutexGuard<'_, TestBackendData> {
self.data.lock().unwrap()
}

pub fn remove_commit_unchecked(&self, id: &CommitId) {
self.locked_data().commits.remove(id);
}
}

impl Debug for TestBackend {
Expand Down

0 comments on commit 8855bc2

Please sign in to comment.