Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

git: add method to import commits, promote them to tree-conflict format #2232

Merged
merged 4 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 28 additions & 29 deletions lib/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use tempfile::NamedTempFile;
use thiserror::Error;

use crate::backend::{BackendError, CommitId, ObjectId};
use crate::git_backend::NO_GC_REF_NAMESPACE;
use crate::git_backend::GitBackend;
use crate::op_store::{BranchTarget, RefTarget, RefTargetOptionExt};
use crate::repo::{MutableRepo, Repo};
use crate::revset;
Expand Down Expand Up @@ -172,18 +172,6 @@ pub fn get_local_git_tracking_branch<'a>(view: &'a View, branch: &str) -> &'a Re
view.get_git_ref(&format!("refs/heads/{branch}"))
}

fn prevent_gc(git_repo: &git2::Repository, id: &CommitId) -> Result<(), git2::Error> {
martinvonz marked this conversation as resolved.
Show resolved Hide resolved
// If multiple processes do git::import_refs() in parallel, this can fail to
// acquire a lock file even with force=true.
git_repo.reference(
&format!("{}{}", NO_GC_REF_NAMESPACE, id.hex()),
Oid::from_bytes(id.as_bytes()).unwrap(),
true,
"used by jj",
)?;
Ok(())
}

/// Reflect changes made in the underlying Git repo in the Jujutsu repo.
///
/// This function detects conflicts (if both Git and JJ modified a branch) and
Expand Down Expand Up @@ -228,33 +216,44 @@ pub fn import_some_refs(

// Import new heads
let store = mut_repo.store();
// TODO: It might be better to obtain both git_repo and git_backend from
// mut_repo, and return error if the repo isn't backed by Git.
let git_backend = store.backend_impl().downcast_ref::<GitBackend>().unwrap();
// Bulk-import all reachable commits to reduce overhead of table merging.
let head_ids = itertools::chain(
&changed_git_head,
changed_git_refs.values().map(|(_, new_target)| new_target),
)
.flat_map(|target| target.added_ids());
let heads_imported = git_backend
.import_head_commits(head_ids, store.use_tree_conflict_format())
.is_ok();
let mut head_commits = Vec::new();
let get_commit = |id| {
// If bulk-import failed, try again to find bad head or ref.
if !heads_imported {
git_backend.import_head_commits([id], store.use_tree_conflict_format())?;
}
store.get_commit(id)
};
if let Some(new_head_target) = &changed_git_head {
for id in new_head_target.added_ids() {
let commit = store
.get_commit(id)
.map_err(|err| GitImportError::MissingHeadTarget {
id: id.clone(),
err,
})?;
let commit = get_commit(id).map_err(|err| GitImportError::MissingHeadTarget {
id: id.clone(),
err,
})?;
head_commits.push(commit);
}
}
for (ref_name, (_, new_git_target)) in &changed_git_refs {
for id in new_git_target.added_ids() {
let commit =
store
.get_commit(id)
.map_err(|err| GitImportError::MissingRefAncestor {
ref_name: ref_name.to_string(),
err,
})?;
let commit = get_commit(id).map_err(|err| GitImportError::MissingRefAncestor {
ref_name: ref_name.to_string(),
err,
})?;
head_commits.push(commit);
}
}
for commit in &head_commits {
prevent_gc(git_repo, commit.id())?;
}
mut_repo.add_heads(&head_commits);

// Apply the change that happened in git since last time we imported refs.
Expand Down
178 changes: 131 additions & 47 deletions lib/src/git_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@

use std::any::Any;
use std::fmt::{Debug, Error, Formatter};
use std::fs;
use std::io::{Cursor, Read};
use std::ops::Deref;
use std::path::Path;
use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, slice};

use git2::Oid;
use itertools::Itertools;
Expand All @@ -43,7 +43,7 @@ use crate::stacked_table::{
const HASH_LENGTH: usize = 20;
const CHANGE_ID_LENGTH: usize = 16;
/// Ref namespace used only for preventing GC.
pub const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
const CONFLICT_SUFFIX: &str = ".jjconflict";

#[derive(Debug, Error)]
Expand Down Expand Up @@ -200,9 +200,52 @@ impl GitBackend {
*self.cached_extra_metadata.lock().unwrap() = Some(table);
Ok(())
}

/// Imports the given commits and ancestors from the backing Git repo.
#[tracing::instrument(skip(self, head_ids))]
pub fn import_head_commits<'a>(
&self,
head_ids: impl IntoIterator<Item = &'a CommitId>,
uses_tree_conflict_format: bool,
) -> BackendResult<()> {
let table = self.cached_extra_metadata_table()?;
let mut missing_head_ids = head_ids
.into_iter()
.filter(|&id| *id != self.root_commit_id && table.get_value(id.as_bytes()).is_none())
.collect_vec();
if missing_head_ids.is_empty() {
return Ok(());
}

// These commits are imported from Git. Make our change ids persist (otherwise
// future write_commit() could reassign new change id.)
tracing::debug!(
heads_count = missing_head_ids.len(),
"import extra metadata entries"
);
let locked_repo = self.repo.lock().unwrap();
let (table, table_lock) = self.read_extra_metadata_table_locked()?;
let mut mut_table = table.start_mutation();
// Concurrent write_commit() might have updated the table before taking a lock.
missing_head_ids.retain(|&id| mut_table.get_value(id.as_bytes()).is_none());
import_extra_metadata_entries_from_heads(
&locked_repo,
&mut mut_table,
&table_lock,
&missing_head_ids,
uses_tree_conflict_format,
)?;
for &id in &missing_head_ids {
prevent_gc(&locked_repo, id)?;
}
self.save_extra_metadata_table(mut_table, &table_lock)
}
}

fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
fn commit_from_git_without_root_parent(
commit: &git2::Commit,
uses_tree_conflict_format: bool,
) -> Commit {
// We reverse the bits of the commit id to create the change id. We don't want
// to use the first bytes unmodified because then it would be ambiguous
// if a given hash prefix refers to the commit id or the change id. It
Expand All @@ -223,7 +266,11 @@ fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes());
// If this commit is a conflict, we'll update the root tree later, when we read
// the extra metadata.
let root_tree = MergedTreeId::Legacy(tree_id);
let root_tree = if uses_tree_conflict_format {
MergedTreeId::resolved(tree_id)
} else {
MergedTreeId::Legacy(tree_id)
};
let description = commit.message().unwrap_or("<no message>").to_owned();
let author = signature_from_git(commit.author());
let committer = signature_from_git(commit.committer());
Expand Down Expand Up @@ -336,6 +383,18 @@ fn create_no_gc_ref() -> String {
format!("{NO_GC_REF_NAMESPACE}{}", hex::encode(random_bytes))
}

fn prevent_gc(git_repo: &git2::Repository, id: &CommitId) -> Result<(), BackendError> {
git_repo
.reference(
&format!("{NO_GC_REF_NAMESPACE}{}", id.hex()),
Oid::from_bytes(id.as_bytes()).unwrap(),
true,
"used by jj",
)
.map_err(|err| BackendError::Other(Box::new(err)))?;
Ok(())
}

fn validate_git_object_id(id: &impl ObjectId) -> Result<git2::Oid, BackendError> {
if id.as_bytes().len() != HASH_LENGTH {
return Err(BackendError::InvalidHashLength {
Expand Down Expand Up @@ -373,18 +432,18 @@ fn import_extra_metadata_entries_from_heads(
git_repo: &git2::Repository,
mut_table: &mut MutableTable,
_table_lock: &FileLock,
head_ids: &[CommitId],
missing_head_ids: &[&CommitId],
uses_tree_conflict_format: bool,
) -> BackendResult<()> {
let mut work_ids = head_ids
.iter()
.filter(|id| mut_table.get_value(id.as_bytes()).is_none())
.cloned()
.collect_vec();
let mut work_ids = missing_head_ids.iter().map(|&id| id.clone()).collect_vec();
while let Some(id) = work_ids.pop() {
let git_commit = git_repo
.find_commit(validate_git_object_id(&id)?)
.map_err(|err| map_not_found_err(err, &id))?;
let commit = commit_from_git_without_root_parent(&git_commit);
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
martinvonz marked this conversation as resolved.
Show resolved Hide resolved
let commit = commit_from_git_without_root_parent(&git_commit, uses_tree_conflict_format);
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
work_ids.extend(
commit
Expand Down Expand Up @@ -614,7 +673,6 @@ impl Backend for GitBackend {
Ok(ConflictId::from_bytes(oid.as_bytes()))
}

#[tracing::instrument(skip(self))]
fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
if *id == self.root_commit_id {
return Ok(make_root_commit(
Expand All @@ -628,42 +686,21 @@ impl Backend for GitBackend {
let commit = locked_repo
.find_commit(git_commit_id)
.map_err(|err| map_not_found_err(err, id))?;
let mut commit = commit_from_git_without_root_parent(&commit);
let mut commit = commit_from_git_without_root_parent(&commit, false);
if commit.parents.is_empty() {
commit.parents.push(self.root_commit_id.clone());
};

let table = self.cached_extra_metadata_table()?;
if let Some(extras) = table.get_value(id.as_bytes()) {
deserialize_extras(&mut commit, extras);
} else {
let (table, table_lock) = self.read_extra_metadata_table_locked()?;
if let Some(extras) = table.get_value(id.as_bytes()) {
// Concurrent write_commit() would update extras before taking a lock.
deserialize_extras(&mut commit, extras);
*self.cached_extra_metadata.lock().unwrap() = Some(table);
} else {
// This commit is imported from Git. Make our change id persist (otherwise
// future write_commit() could reassign new change id.) It's likely that
// the commit is a branch head, so bulk-import metadata as much as possible.
tracing::debug!("import extra metadata entries");
let mut mut_table = table.start_mutation();
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
if commit.parents != slice::from_ref(&self.root_commit_id) {
import_extra_metadata_entries_from_heads(
&locked_repo,
&mut mut_table,
&table_lock,
&commit.parents,
)?;
}
self.save_extra_metadata_table(mut_table, &table_lock)?;
}
}

let extras =
table
.get_value(id.as_bytes())
.ok_or_else(|| BackendError::ObjectNotFound {
object_type: id.object_type(),
hash: id.hex(),
source: "Not imported from Git".into(),
})?;
deserialize_extras(&mut commit, extras);
Ok(commit)
}

Expand Down Expand Up @@ -862,12 +899,14 @@ fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use test_case::test_case;

use super::*;
use crate::backend::{FileId, MillisSinceEpoch};

#[test]
fn read_plain_git_commit() {
#[test_case(false; "legacy tree format")]
#[test_case(true; "tree-level conflict format")]
fn read_plain_git_commit(uses_tree_conflict_format: bool) {
let temp_dir = testutils::new_temp_dir();
let store_path = temp_dir.path();
let git_repo_path = temp_dir.path().join("git");
Expand Down Expand Up @@ -914,15 +953,47 @@ mod tests {
// Check that the git commit above got the hash we expect
assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes());

// Add an empty commit on top
let git_commit_id2 = git_repo
.commit(
None,
&git_author,
&git_committer,
"git commit message 2",
&git_tree,
&[&git_repo.find_commit(git_commit_id).unwrap()],
)
.unwrap();
let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());

let store = GitBackend::init_external(store_path, &git_repo_path).unwrap();

// Import the head commit and its ancestors
store
.import_head_commits([&commit_id2], uses_tree_conflict_format)
.unwrap();
// Ref should be created only for the head commit
let git_refs = store
.git_repo()
.references_glob("refs/jj/keep/*")
.unwrap()
.map(|git_ref| git_ref.unwrap().target().unwrap())
.collect_vec();
assert_eq!(git_refs, vec![git_commit_id2]);

let commit = store.read_commit(&commit_id).unwrap();
assert_eq!(&commit.change_id, &change_id);
assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
assert_eq!(commit.predecessors, vec![]);
assert_eq!(
commit.root_tree,
MergedTreeId::Legacy(TreeId::from_bytes(root_tree_id.as_bytes()))
commit.root_tree.to_merge(),
Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
);
if uses_tree_conflict_format {
assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
} else {
assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
}
assert_eq!(commit.description, "git commit message");
assert_eq!(commit.author.name, "git author");
assert_eq!(commit.author.email, "[email protected]");
Expand Down Expand Up @@ -977,6 +1048,19 @@ mod tests {
symlink.value(),
&TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
);

let commit2 = store.read_commit(&commit_id2).unwrap();
assert_eq!(commit2.parents, vec![commit_id.clone()]);
assert_eq!(commit.predecessors, vec![]);
assert_eq!(
commit.root_tree.to_merge(),
Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes()))
);
if uses_tree_conflict_format {
assert_matches!(commit.root_tree, MergedTreeId::Merge(_));
} else {
assert_matches!(commit.root_tree, MergedTreeId::Legacy(_));
}
}

#[test]
Expand Down
Loading