Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tree merge with tree-related auto-resolution #1705

Merged
merged 20 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
a57192c
fix: when binary merges are performed, adjust the returned resolution…
Byron Nov 25, 2024
1c3ba81
feat!: replace `tree::Options::allow_lossy_resolution` with `*::tree_…
Byron Nov 25, 2024
3228de6
adapt to changes in `gix-merge`
Byron Nov 25, 2024
e487cca
implement support for resolving irreconcilable tree conflicts with 'o…
Byron Dec 2, 2024
e17b3a9
feat: add `merge::tree::TreeFavor` similar to `*::FileFavor`.
Byron Dec 1, 2024
471e046
feat: add `--tree-favor` to `gix merge tree|commit`.
Byron Dec 1, 2024
b2b8181
Improve merge related API in `gix`
Byron Dec 2, 2024
8b694a6
fix: create a more local lock when creating writable fixtures.
Byron Dec 4, 2024
efc71fd
fix: public access to the contained repository in wrapped types, like…
Byron Dec 5, 2024
bd905a6
fix!: assure that rename tracking can be turned off.
Byron Dec 5, 2024
9896d5d
adapt to changes in `gix`
Byron Dec 5, 2024
d281ba6
fix: enforce `diff.renamelimit` even for identical name matching
Byron Dec 5, 2024
530257f
fix: assure passwords aren't returned in percent-encoded form.
Byron Dec 6, 2024
01f66eb
refactor `gix-url` tests
Byron Dec 6, 2024
3e94b58
fix!: assure that `tree::apply_index_entries()` cannot unknowingly le…
Byron Dec 7, 2024
aaeb427
adapt to changes in `gix-merge`
Byron Dec 7, 2024
da585db
fix: don't incorrectly mark as auto-resolved conflict if there was none.
Byron Dec 7, 2024
d3f5f27
feat!: add support for not tracking single empty files.
Byron Dec 8, 2024
f53cec5
adapt to changes in `gix-diff` related to not tracking empty blobs an…
Byron Dec 8, 2024
960773e
adapt to changes in `gix-diff`
Byron Dec 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crate-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,11 @@ Check out the [performance discussion][gix-diff-performance] as well.
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
* [x] **tree**-diff-heuristics match Git for its test-cases
- [x] a way to generate an index with stages, mostly conforming with Git.
- [ ] resolve to be *ours* or the *ancestors* version of the tree.
- [ ] submodule merges (*right now they count as conflicts if they differ*)
- [ ] assure sparse indices are handled correctly during application - right now we refuse.
- [ ] rewrite so that the whole logic can be proven to be correct - it's too insane now and probably has way
more possible states than are tested, despite best attempts.
* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge
* [x] API documentation
* [ ] Examples
Expand Down
7 changes: 6 additions & 1 deletion gitoxide-core/src/query/engine/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ pub fn update(
});

let rewrites = {
let mut r = gix::diff::new_rewrites(&repo.config_snapshot(), true)?.unwrap_or_default();
// These are either configured, or we set them to the default. There is no turning them off.
let (r, was_configured) = gix::diff::new_rewrites(&repo.config_snapshot(), true)?;
if was_configured && r.is_none() {
gix::trace::warn!("Rename tracking is disabled by configuration, but we enable it using the default");
}
let mut r = r.unwrap_or_default();
r.copies = Some(gix::diff::rewrites::Copies {
source: if find_copies_harder {
CopySource::FromSetOfModifiedFilesAndAllSources
Expand Down
8 changes: 6 additions & 2 deletions gitoxide-core/src/repository/merge/commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub fn commit(
Options {
format,
file_favor,
tree_favor,
in_memory,
debug,
}: Options,
Expand All @@ -31,7 +32,10 @@ pub fn commit(
let (ours_ref, ours_id) = refname_and_commit(&repo, ours)?;
let (theirs_ref, theirs_id) = refname_and_commit(&repo, theirs)?;

let options = repo.tree_merge_options()?.with_file_favor(file_favor);
let options = repo
.tree_merge_options()?
.with_file_favor(file_favor)
.with_tree_favor(tree_favor);
let ours_id_str = ours_id.to_string();
let theirs_id_str = theirs_id.to_string();
let labels = gix::merge::blob::builtin_driver::text::Labels {
Expand All @@ -49,7 +53,7 @@ pub fn commit(
.merge_commits(ours_id, theirs_id, labels, options.into())?
.tree_merge;
let has_conflicts = res.conflicts.is_empty();
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
{
let _span = gix::trace::detail!("Writing merged tree");
let mut written = 0;
Expand Down
9 changes: 7 additions & 2 deletions gitoxide-core/src/repository/merge/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::OutputFormat;
pub struct Options {
pub format: OutputFormat,
pub file_favor: Option<gix::merge::tree::FileFavor>,
pub tree_favor: Option<gix::merge::tree::TreeFavor>,
pub in_memory: bool,
pub debug: bool,
}
Expand All @@ -29,6 +30,7 @@ pub(super) mod function {
Options {
format,
file_favor,
tree_favor,
in_memory,
debug,
}: Options,
Expand All @@ -44,7 +46,10 @@ pub(super) mod function {
let (ours_ref, ours_id) = refname_and_tree(&repo, ours)?;
let (theirs_ref, theirs_id) = refname_and_tree(&repo, theirs)?;

let options = repo.tree_merge_options()?.with_file_favor(file_favor);
let options = repo
.tree_merge_options()?
.with_file_favor(file_favor)
.with_tree_favor(tree_favor);
let base_id_str = base_id.to_string();
let ours_id_str = ours_id.to_string();
let theirs_id_str = theirs_id.to_string();
Expand All @@ -64,7 +69,7 @@ pub(super) mod function {
};
let res = repo.merge_trees(base_id, ours_id, theirs_id, labels, options)?;
let has_conflicts = res.conflicts.is_empty();
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
{
let _span = gix::trace::detail!("Writing merged tree");
let mut written = 0;
Expand Down
1 change: 1 addition & 0 deletions gitoxide-core/src/repository/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub fn show(
copies: None,
percentage: Some(percentage),
limit: 0,
track_empty: false,
});
if opts.rewrites.is_some() {
if let Some(opts) = opts.dirwalk_options.as_mut() {
Expand Down
5 changes: 5 additions & 0 deletions gix-diff/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ pub struct Rewrites {
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
/// run the fuzzy version of identity tests at all. That way results are never partial.
pub limit: usize,

/// If `true`, empty blobs will be tracked. If `false`, they do not participate in rename tracking.
///
/// Leaving this off usually leads to better results as empty files don't have a unique-enough identity.
pub track_empty: bool,
}

/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.
Expand Down
1 change: 1 addition & 0 deletions gix-diff/src/rewrites/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ impl Default for Rewrites {
copies: None,
percentage: Some(0.5),
limit: 1000,
track_empty: false,
}
}
}
51 changes: 44 additions & 7 deletions gix-diff/src/rewrites/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,10 @@ impl<T: Change> Tracker<T> {
CopySource::FromSetOfModifiedFiles => {}
CopySource::FromSetOfModifiedFilesAndAllSources => {
push_source_tree(&mut |change, location| {
assert!(
self.try_push_change(change, location).is_none(),
"we must accept every change"
);
// make sure these aren't viable to be emitted anymore.
self.items.last_mut().expect("just pushed").emitted = true;
if self.try_push_change(change, location).is_none() {
// make sure these aren't viable to be emitted anymore.
self.items.last_mut().expect("just pushed").emitted = true;
}
})
.map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?;
self.items.sort_by(by_id_and_location);
Expand Down Expand Up @@ -341,6 +339,10 @@ impl<T: Change> Tracker<T> {
) -> Result<(), emit::Error> {
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
let needs_second_pass = !needs_exact_match(percentage);

// https://github.com/git/git/blob/cc01bad4a9f566cf4453c7edd6b433851b0835e2/diffcore-rename.c#L350-L369
// We would need a hashmap to be OK to not use the limit here, otherwise the performance is too bad.
// This also means we don't find all renames if we hit the rename limit.
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
return Ok(());
}
Expand Down Expand Up @@ -384,10 +386,35 @@ impl<T: Change> Tracker<T> {
filter: Option<fn(&T) -> bool>,
) -> Result<Action, emit::Error> {
let mut dest_ofs = 0;
let mut num_checks = 0;
let max_checks = {
let limit = self.rewrites.limit.saturating_pow(2);
// There can be trees with a lot of entries and pathological search behaviour, as they can be repeated
// and then have a lot of similar hashes. This also means we have to search a lot of candidates which
// can be too slow despite best attempts. So play it save and detect such cases 'roughly' by amount of items.
if self.items.len() < 100_000 {
0
} else {
limit
}
};

while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
(!item.emitted
&& matches!(item.change.kind(), ChangeKind::Addition)
&& filter.map_or(true, |f| f(&item.change)))
&& filter.map_or_else(
|| {
self.rewrites.track_empty
// We always want to keep track of entries that are involved of a directory rename.
// Note that this may still match them up arbitrarily if empty, but empty is empty.
|| matches!(item.change.relation(), Some(Relation::ChildOfParent(_)))
|| {
let id = item.change.id();
id != gix_hash::ObjectId::empty_blob(id.kind())
}
},
|f| f(&item.change),
))
.then_some((idx, item))
}) {
dest_idx += dest_ofs;
Expand All @@ -403,6 +430,7 @@ impl<T: Change> Tracker<T> {
objects,
diff_cache,
&self.path_backing,
&mut num_checks,
)?
.map(|(src_idx, src, diff)| {
let (id, entry_mode) = src.change.id_and_entry_mode();
Expand All @@ -420,6 +448,12 @@ impl<T: Change> Tracker<T> {
src_idx,
)
});
if max_checks != 0 && num_checks > max_checks {
gix_trace::warn!(
"Cancelled rename matching as there were too many iterations ({num_checks} > {max_checks})"
);
return Ok(Action::Cancel);
}
let Some((src, src_idx)) = src else {
continue;
};
Expand Down Expand Up @@ -631,6 +665,7 @@ fn find_match<'a, T: Change>(
objects: &impl gix_object::FindObjectOrHeader,
diff_cache: &mut crate::blob::Platform,
path_backing: &[u8],
num_checks: &mut usize,
) -> Result<Option<SourceTuple<'a, T>>, emit::Error> {
let (item_id, item_mode) = item.change.id_and_entry_mode();
if needs_exact_match(percentage) || item_mode.is_link() {
Expand All @@ -651,6 +686,7 @@ fn find_match<'a, T: Change>(
}
let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| {
src_idx += range.start;
*num_checks += 1;
(src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None))
});
if let Some(src) = res {
Expand Down Expand Up @@ -685,6 +721,7 @@ fn find_match<'a, T: Change>(
)?;
let prep = diff_cache.prepare_diff()?;
stats.num_similarity_checks += 1;
*num_checks += 1;
match prep.operation {
Operation::InternalDiff { algorithm } => {
let tokens =
Expand Down
10 changes: 10 additions & 0 deletions gix-diff/tests/diff/rewrites/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ fn rename_by_id() -> crate::Result {
copies: None,
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
assert!(
Expand Down Expand Up @@ -80,6 +81,7 @@ fn copy_by_similarity_reports_limit_if_encountered() -> crate::Result {
}),
percentage: None,
limit: 1,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -132,6 +134,7 @@ fn copy_by_id() -> crate::Result {
}),
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -206,6 +209,7 @@ fn copy_by_id_search_in_all_sources() -> crate::Result {
}),
percentage: None,
limit,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -284,6 +288,7 @@ fn copy_by_50_percent_similarity() -> crate::Result {
}),
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -363,6 +368,7 @@ fn copy_by_id_in_additions_only() -> crate::Result {
}),
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -413,6 +419,7 @@ fn rename_by_similarity_reports_limit_if_encountered() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 1,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -458,6 +465,7 @@ fn rename_by_50_percent_similarity() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rewrites);
let odb = util::add_retained_blobs(
Expand Down Expand Up @@ -547,6 +555,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
copies: None,
percentage: Some(0.5),
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(rename_by_similarity);
let tree_dst_id = 1;
Expand Down Expand Up @@ -638,6 +647,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
copies: None,
percentage: None,
limit: 0,
track_empty: false,
};
let mut track = util::new_tracker(renames_by_identity);
let tree_dst_id = 1;
Expand Down
Loading
Loading