Skip to content

Commit

Permalink
Merge pull request #1705 from GitoxideLabs/merge
Browse files Browse the repository at this point in the history
tree merge with tree-related auto-resolution
  • Loading branch information
Byron authored Dec 8, 2024
2 parents fadf106 + 960773e commit 520c832
Show file tree
Hide file tree
Showing 61 changed files with 1,989 additions and 508 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crate-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,11 @@ Check out the [performance discussion][gix-diff-performance] as well.
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
* [x] **tree**-diff-heuristics match Git for its test-cases
- [x] a way to generate an index with stages, mostly conforming with Git.
- [ ] resolve to be *ours* or the *ancestors* version of the tree.
- [ ] submodule merges (*right now they count as conflicts if they differ*)
- [ ] assure sparse indices are handled correctly during application - right now we refuse.
- [ ] rewrite so that the whole logic can be proven to be correct - it's too insane now and probably has way
more possible states than are tested, despite best attempts.
* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge
* [x] API documentation
* [ ] Examples
Expand Down
7 changes: 6 additions & 1 deletion gitoxide-core/src/query/engine/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ pub fn update(
});

let rewrites = {
let mut r = gix::diff::new_rewrites(&repo.config_snapshot(), true)?.unwrap_or_default();
// These are either configured, or we set them to the default. There is no turning them off.
let (r, was_configured) = gix::diff::new_rewrites(&repo.config_snapshot(), true)?;
if was_configured && r.is_none() {
gix::trace::warn!("Rename tracking is disabled by configuration, but we enable it using the default");
}
let mut r = r.unwrap_or_default();
r.copies = Some(gix::diff::rewrites::Copies {
source: if find_copies_harder {
CopySource::FromSetOfModifiedFilesAndAllSources
Expand Down
8 changes: 6 additions & 2 deletions gitoxide-core/src/repository/merge/commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub fn commit(
Options {
format,
file_favor,
tree_favor,
in_memory,
debug,
}: Options,
Expand All @@ -31,7 +32,10 @@ pub fn commit(
let (ours_ref, ours_id) = refname_and_commit(&repo, ours)?;
let (theirs_ref, theirs_id) = refname_and_commit(&repo, theirs)?;

let options = repo.tree_merge_options()?.with_file_favor(file_favor);
let options = repo
.tree_merge_options()?
.with_file_favor(file_favor)
.with_tree_favor(tree_favor);
let ours_id_str = ours_id.to_string();
let theirs_id_str = theirs_id.to_string();
let labels = gix::merge::blob::builtin_driver::text::Labels {
Expand All @@ -49,7 +53,7 @@ pub fn commit(
.merge_commits(ours_id, theirs_id, labels, options.into())?
.tree_merge;
let has_conflicts = res.conflicts.is_empty();
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
{
let _span = gix::trace::detail!("Writing merged tree");
let mut written = 0;
Expand Down
9 changes: 7 additions & 2 deletions gitoxide-core/src/repository/merge/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::OutputFormat;
pub struct Options {
pub format: OutputFormat,
pub file_favor: Option<gix::merge::tree::FileFavor>,
pub tree_favor: Option<gix::merge::tree::TreeFavor>,
pub in_memory: bool,
pub debug: bool,
}
Expand All @@ -29,6 +30,7 @@ pub(super) mod function {
Options {
format,
file_favor,
tree_favor,
in_memory,
debug,
}: Options,
Expand All @@ -44,7 +46,10 @@ pub(super) mod function {
let (ours_ref, ours_id) = refname_and_tree(&repo, ours)?;
let (theirs_ref, theirs_id) = refname_and_tree(&repo, theirs)?;

let options = repo.tree_merge_options()?.with_file_favor(file_favor);
let options = repo
.tree_merge_options()?
.with_file_favor(file_favor)
.with_tree_favor(tree_favor);
let base_id_str = base_id.to_string();
let ours_id_str = ours_id.to_string();
let theirs_id_str = theirs_id.to_string();
Expand All @@ -64,7 +69,7 @@ pub(super) mod function {
};
let res = repo.merge_trees(base_id, ours_id, theirs_id, labels, options)?;
let has_conflicts = res.conflicts.is_empty();
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
{
let _span = gix::trace::detail!("Writing merged tree");
let mut written = 0;
Expand Down
1 change: 1 addition & 0 deletions gitoxide-core/src/repository/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub fn show(
copies: None,
percentage: Some(percentage),
limit: 0,
track_empty: false,
});
if opts.rewrites.is_some() {
if let Some(opts) = opts.dirwalk_options.as_mut() {
Expand Down
5 changes: 5 additions & 0 deletions gix-diff/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ pub struct Rewrites {
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
/// run the fuzzy version of identity tests at all. That way results are never partial.
pub limit: usize,

/// If `true`, empty blobs will be tracked. If `false`, they do not participate in rename tracking.
///
/// Leaving this off usually leads to better results as empty files don't have a unique-enough identity.
pub track_empty: bool,
}

/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.
Expand Down
1 change: 1 addition & 0 deletions gix-diff/src/rewrites/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ impl Default for Rewrites {
copies: None,
percentage: Some(0.5),
limit: 1000,
track_empty: false,
}
}
}
51 changes: 44 additions & 7 deletions gix-diff/src/rewrites/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,10 @@ impl<T: Change> Tracker<T> {
CopySource::FromSetOfModifiedFiles => {}
CopySource::FromSetOfModifiedFilesAndAllSources => {
push_source_tree(&mut |change, location| {
assert!(
self.try_push_change(change, location).is_none(),
"we must accept every change"
);
// make sure these aren't viable to be emitted anymore.
self.items.last_mut().expect("just pushed").emitted = true;
if self.try_push_change(change, location).is_none() {
// make sure these aren't viable to be emitted anymore.
self.items.last_mut().expect("just pushed").emitted = true;
}
})
.map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?;
self.items.sort_by(by_id_and_location);
Expand Down Expand Up @@ -341,6 +339,10 @@ impl<T: Change> Tracker<T> {
) -> Result<(), emit::Error> {
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
let needs_second_pass = !needs_exact_match(percentage);

// https://github.com/git/git/blob/cc01bad4a9f566cf4453c7edd6b433851b0835e2/diffcore-rename.c#L350-L369
// We would need a hashmap to be OK to not use the limit here, otherwise the performance is too bad.
// This also means we don't find all renames if we hit the rename limit.
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
return Ok(());
}
Expand Down Expand Up @@ -384,10 +386,35 @@ impl<T: Change> Tracker<T> {
filter: Option<fn(&T) -> bool>,
) -> Result<Action, emit::Error> {
let mut dest_ofs = 0;
let mut num_checks = 0;
let max_checks = {
let limit = self.rewrites.limit.saturating_pow(2);
// There can be trees with a lot of entries and pathological search behaviour, as they can be repeated
// and then have a lot of similar hashes. This also means we have to search a lot of candidates which
// can be too slow despite best attempts. So play it save and detect such cases 'roughly' by amount of items.
if self.items.len() < 100_000 {
0
} else {
limit
}
};

while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
(!item.emitted
&& matches!(item.change.kind(), ChangeKind::Addition)
&& filter.map_or(true, |f| f(&item.change)))
&& filter.map_or_else(
|| {
self.rewrites.track_empty
// We always want to keep track of entries that are involved of a directory rename.
// Note that this may still match them up arbitrarily if empty, but empty is empty.
|| matches!(item.change.relation(), Some(Relation::ChildOfParent(_)))
|| {
let id = item.change.id();
id != gix_hash::ObjectId::empty_blob(id.kind())
}
},
|f| f(&item.change),
))
.then_some((idx, item))
}) {
dest_idx += dest_ofs;
Expand All @@ -403,6 +430,7 @@ impl<T: Change> Tracker<T> {
objects,
diff_cache,
&self.path_backing,
&mut num_checks,
)?
.map(|(src_idx, src, diff)| {
let (id, entry_mode) = src.change.id_and_entry_mode();
Expand All @@ -420,6 +448,12 @@ impl<T: Change> Tracker<T> {
src_idx,
)
});
if max_checks != 0 && num_checks > max_checks {
gix_trace::warn!(
"Cancelled rename matching as there were too many iterations ({num_checks} > {max_checks})"
);
return Ok(Action::Cancel);
}
let Some((src, src_idx)) = src else {
continue;
};
Expand Down Expand Up @@ -631,6 +665,7 @@ fn find_match<'a, T: Change>(
objects: &impl gix_object::FindObjectOrHeader,
diff_cache: &mut crate::blob::Platform,
path_backing: &[u8],
num_checks: &mut usize,
) -> Result<Option<SourceTuple<'a, T>>, emit::Error> {
let (item_id, item_mode) = item.change.id_and_entry_mode();
if needs_exact_match(percentage) || item_mode.is_link() {
Expand All @@ -651,6 +686,7 @@ fn find_match<'a, T: Change>(
}
let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| {
src_idx += range.start;
*num_checks += 1;
(src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None))
});
if let Some(src) = res {
Expand Down Expand Up @@ -685,6 +721,7 @@ fn find_match<'a, T: Change>(
)?;
let prep = diff_cache.prepare_diff()?;
stats.num_similarity_checks += 1;
*num_checks += 1;
match prep.operation {
Operation::InternalDiff { algorithm } => {
let tokens =
Expand Down
10 changes: 10 additions & 0 deletions gix-diff/tests/diff/rewrites/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ fn rename_by_id() -> crate::Result {
copies: None,
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
assert!(
Expand Down Expand Up @@ -80,6 +81,7 @@ fn copy_by_similarity_reports_limit_if_encountered() -> crate::Result {
}),
percentage: None,
limit: 1,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -132,6 +134,7 @@ fn copy_by_id() -> crate::Result {
}),
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -206,6 +209,7 @@ fn copy_by_id_search_in_all_sources() -> crate::Result {
}),
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -284,6 +288,7 @@ fn copy_by_50_percent_similarity() -> crate::Result {
}),
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -363,6 +368,7 @@ fn copy_by_id_in_additions_only() -> crate::Result {
}),
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -413,6 +419,7 @@ fn rename_by_similarity_reports_limit_if_encountered() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 1,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -458,6 +465,7 @@ fn rename_by_50_percent_similarity() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -547,6 +555,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rename_by_similarity);
let tree_dst_id = 1;
Expand Down Expand Up @@ -638,6 +647,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
copies: None,
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(renames_by_identity);
let tree_dst_id = 1;
Expand Down
Loading

0 comments on commit 520c832

Please sign in to comment.