diff --git a/Cargo.lock b/Cargo.lock index dcfc0729ce5..21a56b4c908 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2114,6 +2114,7 @@ dependencies = [ "gix-filter", "gix-fs 0.12.0", "gix-hash 0.15.0", + "gix-index 0.36.0", "gix-object 0.45.0", "gix-odb", "gix-path 0.10.12", diff --git a/crate-status.md b/crate-status.md index 9dcf98f1248..0774895b67a 100644 --- a/crate-status.md +++ b/crate-status.md @@ -318,6 +318,8 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] find blobs by similarity check * [ ] heuristics to find best candidate * [ ] find by basename to support similarity check + - Not having it can lead to issues when files with the same or similar content are part of a move + as files can be lost that way. * [x] directory tracking - [x] by identity - [ ] by similarity @@ -349,8 +351,7 @@ Check out the [performance discussion][gix-diff-performance] as well. - [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change). - [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same) * [x] **tree**-diff-heuristics match Git for its test-cases - - [ ] a way to generate an index with stages - - *currently the data it provides won't generate index entries, and possibly can't be used for it yet* + - [x] a way to generate an index with stages, mostly conforming with Git. - [ ] submodule merges (*right now they count as conflicts if they differ*) * [x] **commits** - with handling of multiple merge bases by recursive merge-base merge * [x] API documentation diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index b8439ba982b..5f3f0740cf9 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -32,6 +32,7 @@ gix-quote = { version = "^0.4.13", path = "../gix-quote" } gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] } gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" } gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-index = { version = "^0.36.0", path = "../gix-index" } thiserror = "2.0.0" imara-diff = { version = "0.1.7" } diff --git a/gix-merge/src/tree/function.rs b/gix-merge/src/tree/function.rs index eaa0ea933ba..ca131db27a7 100644 --- a/gix-merge/src/tree/function.rs +++ b/gix-merge/src/tree/function.rs @@ -3,7 +3,10 @@ use crate::tree::utils::{ to_components, track, unique_path_in_tree, ChangeList, ChangeListRef, PossibleConflict, TrackedChange, TreeNodes, }; use crate::tree::ConflictMapping::{Original, Swapped}; -use crate::tree::{Conflict, ConflictMapping, ContentMerge, Error, Options, Outcome, Resolution, ResolutionFailure}; +use crate::tree::{ + Conflict, ConflictIndexEntry, ConflictIndexEntryPathHint, ConflictMapping, ContentMerge, Error, Options, Outcome, + Resolution, ResolutionFailure, +}; use bstr::{BString, ByteSlice}; use gix_diff::tree::recorder::Location; use gix_diff::tree_with_rewrites::Change; @@ -190,11 +193,14 @@ where }) { None => { if let Some((rewritten_location, ours_idx)) = rewritten_location { + // `no_entry` to the index because that's not a conflict at all, + // but somewhat advanced rename tracking. if should_fail_on_conflict(Conflict::with_resolution( Resolution::SourceLocationAffectedByRename { final_location: rewritten_location.to_owned(), }, (&our_changes[*ours_idx].inner, theirs, Original, outer_side), + [None, None, None], )) { break 'outer; }; @@ -254,10 +260,12 @@ where if let Some(ours) = ours { gix_trace::debug!("Turning a case we could probably handle into a conflict for now. theirs: {theirs:#?} ours: {ours:#?} kind: {match_kind:?}"); if allow_resolution_failure - && should_fail_on_conflict(Conflict::without_resolution( - ResolutionFailure::Unknown, - (&ours.inner, theirs, Original, outer_side), - )) + && should_fail_on_conflict(Conflict::unknown(( + &ours.inner, + theirs, + Original, + outer_side, + ))) { break 'outer; }; @@ -328,7 +336,7 @@ where pick_our_changes(side, our_changes, their_changes), ); let renamed_without_change = their_source_id == their_id; - let (our_id, resolution) = if renamed_without_change { + let (merged_blob_id, resolution) = if renamed_without_change { (*our_id, None) } else { let (our_location, our_id, our_mode, their_location, their_id, their_mode) = @@ -373,18 +381,23 @@ where location: their_rewritten_location.unwrap_or_else(|| their_location.to_owned()), relation: None, entry_mode: merged_mode, - id: our_id, + id: merged_blob_id, }; if should_fail_on_conflict(Conflict::with_resolution( Resolution::OursModifiedTheirsRenamedAndChangedThenRename { merged_mode: (merged_mode != *their_mode).then_some(merged_mode), merged_blob: resolution.map(|resolution| ContentMerge { resolution, - merged_blob_id: our_id, + merged_blob_id, }), final_location, }, (ours, theirs, side, outer_side), + [ + index_entry(previous_entry_mode, previous_id), + index_entry(our_mode, our_id), + index_entry(their_mode, their_id), + ], )) { break 'outer; } @@ -401,6 +414,19 @@ where if should_fail_on_conflict(Conflict::without_resolution( ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch, (ours, theirs, side, outer_side), + [ + index_entry_at_path( + previous_entry_mode, + previous_id, + ConflictIndexEntryPathHint::RenamedOrTheirs, + ), + None, + index_entry_at_path( + their_mode, + their_id, + ConflictIndexEntryPathHint::RenamedOrTheirs, + ), + ], )) { break 'outer; } @@ -421,7 +447,7 @@ where .. }, ) if !involves_submodule(our_mode, their_mode) - && our_mode.kind() == their_mode.kind() + && merge_modes(*our_mode, *their_mode).is_some() && our_id != their_id => { let (merged_blob_id, resolution) = perform_blob_merge( @@ -436,7 +462,11 @@ where (0, outer_side), &options, )?; - editor.upsert(toc(location), our_mode.kind(), merged_blob_id)?; + + let merged_mode = merge_modes_prev(*our_mode, *their_mode, *previous_entry_mode) + .expect("BUG: merge_modes() reports a valid mode, this one should do too"); + + editor.upsert(toc(location), merged_mode.kind(), merged_blob_id)?; if should_fail_on_conflict(Conflict::with_resolution( Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob: ContentMerge { @@ -445,6 +475,11 @@ where }, }, (ours, theirs, Original, outer_side), + [ + index_entry(previous_entry_mode, previous_id), + index_entry(our_mode, our_id), + index_entry(their_mode, their_id), + ], )) { break 'outer; }; @@ -489,27 +524,28 @@ where }, }, (ours, theirs, Original, outer_side), + [None, index_entry(our_mode, our_id), index_entry(their_mode, their_id)], )) } else if allow_resolution_failure { // Actually this has a preference, as symlinks are always left in place with the other side renamed. let ( logical_side, label_of_side_to_be_moved, - (our_mode, our_id), - (their_mode, their_id), + (our_mode, our_id, our_path_hint), + (their_mode, their_id, their_path_hint), ) = if matches!(our_mode.kind(), EntryKind::Link | EntryKind::Tree) { ( Original, labels.other.unwrap_or_default(), - (*our_mode, *our_id), - (*their_mode, *their_id), + (*our_mode, *our_id, ConflictIndexEntryPathHint::Current), + (*their_mode, *their_id, ConflictIndexEntryPathHint::RenamedOrTheirs), ) } else { ( Swapped, labels.current.unwrap_or_default(), - (*their_mode, *their_id), - (*our_mode, *our_id), + (*their_mode, *their_id, ConflictIndexEntryPathHint::RenamedOrTheirs), + (*our_mode, *our_id, ConflictIndexEntryPathHint::Current), ) }; let tree_with_rename = pick_our_tree(logical_side, their_tree, our_tree); @@ -525,6 +561,11 @@ where their_unique_location: renamed_location.clone(), }, (ours, theirs, logical_side, outer_side), + [ + None, + index_entry_at_path(&our_mode, &our_id, our_path_hint), + index_entry_at_path(&their_mode, &their_id, their_path_hint), + ], ); let new_change = Change::Addition { @@ -554,7 +595,8 @@ where location, entry_mode, id, - .. + previous_entry_mode, + previous_id, }, Change::Deletion { .. }, ) @@ -564,7 +606,8 @@ where location, entry_mode, id, - .. + previous_entry_mode, + previous_id, }, ) if allow_resolution_failure => { let (label_of_side_to_be_moved, side) = if matches!(ours, Change::Modification { .. }) { @@ -606,6 +649,11 @@ where renamed_unique_path_to_modified_blob: renamed_path, }, (ours, theirs, side, outer_side), + [ + index_entry(previous_entry_mode, previous_id), + index_entry(entry_mode, id), + None, + ], )); // Since we move *our* side, our tree needs to be modified. @@ -621,6 +669,11 @@ where let should_break = should_fail_on_conflict(Conflict::without_resolution( ResolutionFailure::OursModifiedTheirsDeleted, (ours, theirs, side, outer_side), + [ + index_entry(previous_entry_mode, previous_id), + index_entry(entry_mode, id), + None, + ], )); editor.upsert(toc(location), entry_mode.kind(), *id)?; if should_break { @@ -702,10 +755,9 @@ where // Pretend this is the end of the loop and keep this as conflict. // If this happens in the wild, we'd want to reproduce it. if allow_resolution_failure - && should_fail_on_conflict(Conflict::without_resolution( - ResolutionFailure::Unknown, - (ours, theirs, Original, outer_side), - )) + && should_fail_on_conflict(Conflict::unknown(( + ours, theirs, Original, outer_side, + ))) { break 'outer; }; @@ -738,6 +790,23 @@ where }), }, (ours, theirs, Original, outer_side), + [ + index_entry_at_path( + source_entry_mode, + source_id, + ConflictIndexEntryPathHint::Source, + ), + index_entry_at_path( + our_mode, + &merged_blob_id, + ConflictIndexEntryPathHint::Current, + ), + index_entry_at_path( + their_mode, + &merged_blob_id, + ConflictIndexEntryPathHint::RenamedOrTheirs, + ), + ], )) { break 'outer; }; @@ -767,6 +836,23 @@ where }, }, (ours, theirs, Original, outer_side), + [ + index_entry_at_path( + source_entry_mode, + source_id, + ConflictIndexEntryPathHint::Source, + ), + index_entry_at_path( + our_mode, + &merged_blob_id, + ConflictIndexEntryPathHint::Current, + ), + index_entry_at_path( + their_mode, + &merged_blob_id, + ConflictIndexEntryPathHint::RenamedOrTheirs, + ), + ], )) { break 'outer; }; @@ -824,6 +910,15 @@ where if should_fail_on_conflict(Conflict::without_resolution( ResolutionFailure::OursDeletedTheirsRenamed, (ours, theirs, side, outer_side), + [ + None, + None, + index_entry_at_path( + rewritten_mode, + rewritten_id, + ConflictIndexEntryPathHint::RenamedOrTheirs, + ), + ], )) { break 'outer; }; @@ -901,6 +996,7 @@ where }, }, (ours, theirs, Original, outer_side), + [None, index_entry(our_mode, our_id), index_entry(their_mode, their_id)], )) { break 'outer; }; @@ -916,21 +1012,21 @@ where let ( logical_side, label_of_side_to_be_moved, - (our_mode, our_id), - (their_mode, their_id), + (our_mode, our_id, our_path_hint), + (their_mode, their_id, their_path_hint), ) = if matches!(our_mode.kind(), EntryKind::Link | EntryKind::Tree) { ( Original, labels.other.unwrap_or_default(), - (*our_mode, *our_id), - (*their_mode, *their_id), + (*our_mode, *our_id, ConflictIndexEntryPathHint::Current), + (*their_mode, *their_id, ConflictIndexEntryPathHint::RenamedOrTheirs), ) } else { ( Swapped, labels.current.unwrap_or_default(), - (*their_mode, *their_id), - (*our_mode, *our_id), + (*their_mode, *their_id, ConflictIndexEntryPathHint::RenamedOrTheirs), + (*our_mode, *our_id, ConflictIndexEntryPathHint::Current), ) }; let tree_with_rename = pick_our_tree(logical_side, their_tree, our_tree); @@ -946,6 +1042,11 @@ where their_unique_location: renamed_location.clone(), }, (ours, theirs, side, outer_side), + [ + None, + index_entry_at_path(&our_mode, &our_id, our_path_hint), + index_entry_at_path(&their_mode, &their_id, their_path_hint), + ], ); let new_change_with_rename = Change::Addition { @@ -970,10 +1071,7 @@ where } _unknown => { if allow_resolution_failure - && should_fail_on_conflict(Conflict::without_resolution( - ResolutionFailure::Unknown, - (ours, theirs, Original, outer_side), - )) + && should_fail_on_conflict(Conflict::unknown((ours, theirs, Original, outer_side))) { break 'outer; }; @@ -1004,12 +1102,40 @@ fn involves_submodule(a: &EntryMode, b: &EntryMode) -> bool { a.is_commit() || b.is_commit() } -/// Allows equal modes or preferes executables bits in case of blobs +/// Allows equal modes or prefers executables bits in case of blobs +/// +/// Note that this is often not correct as the previous mode of each side should be taken into account so that: +/// +/// on | on = on +/// off | off = off +/// on | off || off | on = conflict fn merge_modes(a: EntryMode, b: EntryMode) -> Option { match (a.kind(), b.kind()) { + (_, _) if a == b => Some(a), (EntryKind::BlobExecutable, EntryKind::BlobExecutable | EntryKind::Blob) | (EntryKind::Blob, EntryKind::BlobExecutable) => Some(EntryKind::BlobExecutable.into()), + _ => None, + } +} + +/// Use this version if there is a single common `prev` value for both `a` and `b` to detect +/// if the mode was turned on or off. +fn merge_modes_prev(a: EntryMode, b: EntryMode, prev: EntryMode) -> Option { + match (a.kind(), b.kind()) { (_, _) if a == b => Some(a), + (a @ EntryKind::BlobExecutable, b @ (EntryKind::BlobExecutable | EntryKind::Blob)) + | (a @ EntryKind::Blob, b @ EntryKind::BlobExecutable) => { + let prev = prev.kind(); + let changed = if a == prev { b } else { a }; + Some( + match (prev, changed) { + (EntryKind::Blob, EntryKind::BlobExecutable) => EntryKind::BlobExecutable, + (EntryKind::BlobExecutable, EntryKind::Blob) => EntryKind::Blob, + _ => unreachable!("upper match already assured we only deal with blobs"), + } + .into(), + ) + } _ => None, } } @@ -1066,3 +1192,23 @@ fn pick_our_changes_mut<'a>( Swapped => theirs, } } + +fn index_entry(mode: &gix_object::tree::EntryMode, id: &gix_hash::ObjectId) -> Option { + Some(ConflictIndexEntry { + mode: *mode, + id: *id, + path_hint: None, + }) +} + +fn index_entry_at_path( + mode: &gix_object::tree::EntryMode, + id: &gix_hash::ObjectId, + hint: ConflictIndexEntryPathHint, +) -> Option { + Some(ConflictIndexEntry { + mode: *mode, + id: *id, + path_hint: Some(hint), + }) +} diff --git a/gix-merge/src/tree/mod.rs b/gix-merge/src/tree/mod.rs index 37a6453ea82..cd66d1ffa9a 100644 --- a/gix-merge/src/tree/mod.rs +++ b/gix-merge/src/tree/mod.rs @@ -81,6 +81,17 @@ impl Outcome<'_> { pub fn has_unresolved_conflicts(&self, how: TreatAsUnresolved) -> bool { self.conflicts.iter().any(|c| c.is_unresolved(how)) } + + /// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a + /// conflict should be considered unresolved. + /// It's important that `index` is at the state of [`Self::tree`]. + /// + /// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`. + /// Also, the unconflicted stage of such entries will be removed merely by setting a flag, so the + /// in-memory entry is still present. + pub fn index_changed_after_applying_conflicts(&self, index: &mut gix_index::State, how: TreatAsUnresolved) -> bool { + apply_index_entries(&self.conflicts, how, index) + } } /// A description of a conflict (i.e. merge issue without an auto-resolution) as seen during a [tree-merge](crate::tree()). @@ -99,11 +110,45 @@ pub struct Conflict { pub ours: Change, /// The change representing *their* side. pub theirs: Change, + /// An array to store an entry for each stage of the conflict. + /// + /// * `entries[0]` => Base + /// * `entries[1]` => Ours + /// * `entries[2]` => Theirs + /// + /// Note that ours and theirs might be swapped, so one should access it through [`Self::entries()`] to compensate for that. + pub entries: [Option; 3], /// Determine how to interpret the `ours` and `theirs` fields. This is used to implement [`Self::changes_in_resolution()`] /// and [`Self::into_parts_by_resolution()`]. map: ConflictMapping, } +/// A conflicting entry for insertion into the index. +/// It will always be either on stage 1 (ancestor/base), 2 (ours) or 3 (theirs) +#[derive(Debug, Clone, Copy)] +pub struct ConflictIndexEntry { + /// The kind of object at this stage. + /// Note that it's possible that this is a directory, for instance if a directory was replaced with a file. + pub mode: gix_object::tree::EntryMode, + /// The id defining the state of the object. + pub id: gix_hash::ObjectId, + /// Hidden, maybe one day we can do without? + path_hint: Option, +} + +/// A hint for [`apply_index_entries()`] to know which paths to use for an entry. +/// This is only used when necessary. +#[derive(Debug, Clone, Copy)] +enum ConflictIndexEntryPathHint { + /// Use the previous path, i.e. rename source. + Source, + /// Use the current path as it is in the tree. + Current, + /// Use the path of the final destination, or *their* name. + /// It's definitely finicky, as we don't store the actual path and instead refer to it. + RenamedOrTheirs, +} + /// A utility to help define which side is what in the [`Conflict`] type. #[derive(Debug, Clone, Copy)] enum ConflictMapping { @@ -147,7 +192,11 @@ impl Conflict { TreatAsUnresolved::Renames | TreatAsUnresolved::RenamesAndAutoResolvedContent => match &self.resolution { Ok(success) => match success { Resolution::SourceLocationAffectedByRename { .. } => false, - Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true, + Resolution::OursModifiedTheirsRenamedAndChangedThenRename { + merged_blob, + final_location, + .. + } => final_location.is_some() || merged_blob.as_ref().map_or(false, content_merge_matches), Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => { content_merge_matches(merged_blob) } @@ -178,6 +227,14 @@ impl Conflict { } } + /// Return the index entries for insertion into the index, to match with what's returned by [`Self::changes_in_resolution()`]. + pub fn entries(&self) -> [Option; 3] { + match self.map { + ConflictMapping::Original => self.entries, + ConflictMapping::Swapped => [self.entries[0], self.entries[2], self.entries[1]], + } + } + /// Return information about the content merge if it was performed. pub fn content_merge(&self) -> Option { match &self.resolution { @@ -308,3 +365,132 @@ pub struct Options { pub(super) mod function; mod utils; +pub mod apply_index_entries { + + pub(super) mod function { + use crate::tree::{Conflict, ConflictIndexEntryPathHint, Resolution, ResolutionFailure, TreatAsUnresolved}; + use bstr::{BStr, ByteSlice}; + use std::collections::{hash_map, HashMap}; + + /// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a + /// conflict should be considered unresolved. + /// Once a stage of a path conflicts, the unconflicting stage is removed even though it might be the one + /// that is currently checked out. + /// This removal, however, is only done by flagging it with [gix_index::entry::Flags::REMOVE], which means + /// these entries won't be written back to disk but will still be present in the index. + /// It's important that `index` matches the tree that was produced as part of the merge that also + /// brought about `conflicts`, or else this function will fail if it cannot find the path matching + /// the conflicting entries. + /// + /// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`. + /// Errors can only occour if `index` isn't the one created from the merged tree that produced the `conflicts`. + pub fn apply_index_entries( + conflicts: &[Conflict], + how: TreatAsUnresolved, + index: &mut gix_index::State, + ) -> bool { + let len = index.entries().len(); + let mut idx_by_path_stage = HashMap::<(gix_index::entry::Stage, &BStr), usize>::default(); + for conflict in conflicts.iter().filter(|c| c.is_unresolved(how)) { + let (renamed_path, current_path): (Option<&BStr>, &BStr) = match &conflict.resolution { + Ok(success) => match success { + Resolution::SourceLocationAffectedByRename { final_location } => { + (Some(final_location.as_bstr()), final_location.as_bstr()) + } + Resolution::OursModifiedTheirsRenamedAndChangedThenRename { final_location, .. } => ( + final_location.as_ref().map(|p| p.as_bstr()), + conflict.changes_in_resolution().1.location(), + ), + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { .. } => { + (None, conflict.ours.location()) + } + }, + Err(failure) => match failure { + ResolutionFailure::OursRenamedTheirsRenamedDifferently { .. } => { + (Some(conflict.theirs.location()), conflict.ours.location()) + } + ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch + | ResolutionFailure::OursDeletedTheirsRenamed + | ResolutionFailure::OursModifiedTheirsDeleted + | ResolutionFailure::Unknown => (None, conflict.ours.location()), + ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed { + renamed_unique_path_to_modified_blob, + } => ( + Some(renamed_unique_path_to_modified_blob.as_bstr()), + conflict.ours.location(), + ), + ResolutionFailure::OursAddedTheirsAddedTypeMismatch { their_unique_location } => { + (Some(their_unique_location.as_bstr()), conflict.ours.location()) + } + }, + }; + let source_path = conflict.ours.source_location(); + + let entries_with_stage = conflict.entries().into_iter().enumerate().filter_map(|(idx, entry)| { + entry.filter(|e| e.mode.is_no_tree()).map(|e| { + ( + match idx { + 0 => gix_index::entry::Stage::Base, + 1 => gix_index::entry::Stage::Ours, + 2 => gix_index::entry::Stage::Theirs, + _ => unreachable!("fixed size array with three items"), + }, + match e.path_hint { + None => renamed_path.unwrap_or(current_path), + Some(ConflictIndexEntryPathHint::Source) => source_path, + Some(ConflictIndexEntryPathHint::Current) => current_path, + Some(ConflictIndexEntryPathHint::RenamedOrTheirs) => { + renamed_path.unwrap_or_else(|| conflict.changes_in_resolution().1.location()) + } + }, + e, + ) + }) + }); + + if !entries_with_stage.clone().any(|(_, path, _)| { + index + .entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len) + .is_some() + }) { + continue; + } + + for (stage, path, entry) in entries_with_stage { + if let Some(pos) = + index.entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len) + { + index.entries_mut()[pos].flags.insert(gix_index::entry::Flags::REMOVE); + }; + match idx_by_path_stage.entry((stage, path)) { + hash_map::Entry::Occupied(map_entry) => { + // This can happen due to the way the algorithm works. + // The same happens in Git, but it stores the index-related data as part of its deduplicating tree. + // We store each conflict we encounter, which also may duplicate their index entries, sometimes, but + // with different values. The most recent value wins. + // Instead of trying to deduplicate the index entries when the merge runs, we put the cost + // to the tree-assembly - there is no way around it. + let index_entry = &mut index.entries_mut()[*map_entry.get()]; + index_entry.mode = entry.mode.into(); + index_entry.id = entry.id; + } + hash_map::Entry::Vacant(map_entry) => { + map_entry.insert(index.entries().len()); + index.dangerously_push_entry( + Default::default(), + entry.id, + stage.into(), + entry.mode.into(), + path, + ); + } + }; + } + } + + index.sort_entries(); + index.entries().len() != len + } + } +} +pub use apply_index_entries::function::apply_index_entries; diff --git a/gix-merge/src/tree/utils.rs b/gix-merge/src/tree/utils.rs index cd37f809e1e..318916ff981 100644 --- a/gix-merge/src/tree/utils.rs +++ b/gix-merge/src/tree/utils.rs @@ -7,7 +7,10 @@ //! contribute to finding a fix faster. use crate::blob::builtin_driver::binary::Pick; use crate::blob::ResourceKind; -use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure}; +use crate::tree::{ + Conflict, ConflictIndexEntry, ConflictIndexEntryPathHint, ConflictMapping, Error, Options, Resolution, + ResolutionFailure, +}; use bstr::ByteSlice; use bstr::{BStr, BString, ByteVec}; use gix_diff::tree_with_rewrites::{Change, ChangeRef}; @@ -98,6 +101,14 @@ pub fn perform_blob_merge( where E: Into>, { + if our_id == their_id { + // This can happen if the merge modes are different. + debug_assert_ne!( + our_mode, their_mode, + "BUG: we must think anything has to be merged if the modes and the ids are the same" + ); + return Ok((their_id, crate::blob::Resolution::Complete)); + } if matches!(our_mode.kind(), EntryKind::Link) && matches!(their_mode.kind(), EntryKind::Link) { let (pick, resolution) = crate::blob::builtin_driver::binary(options.symlink_conflicts); let (our_id, their_id) = match outer_side { @@ -544,29 +555,57 @@ impl Conflict { pub(super) fn without_resolution( resolution: ResolutionFailure, changes: (&Change, &Change, ConflictMapping, ConflictMapping), + entries: [Option; 3], ) -> Self { - Conflict::maybe_resolved(Err(resolution), changes) + Conflict::maybe_resolved(Err(resolution), changes, entries) } pub(super) fn with_resolution( resolution: Resolution, changes: (&Change, &Change, ConflictMapping, ConflictMapping), + entries: [Option; 3], ) -> Self { - Conflict::maybe_resolved(Ok(resolution), changes) + Conflict::maybe_resolved(Ok(resolution), changes, entries) } - pub(super) fn maybe_resolved( + fn maybe_resolved( resolution: Result, (ours, theirs, map, outer_map): (&Change, &Change, ConflictMapping, ConflictMapping), + entries: [Option; 3], ) -> Self { Conflict { resolution, ours: ours.clone(), theirs: theirs.clone(), + entries, map: match outer_map { ConflictMapping::Original => map, ConflictMapping::Swapped => map.swapped(), }, } } + + pub(super) fn unknown(changes: (&Change, &Change, ConflictMapping, ConflictMapping)) -> Self { + let (source_mode, source_id) = changes.0.source_entry_mode_and_id(); + let (our_mode, our_id) = changes.0.entry_mode_and_id(); + let (their_mode, their_id) = changes.1.entry_mode_and_id(); + let entries = [ + Some(ConflictIndexEntry { + mode: source_mode, + id: source_id.into(), + path_hint: Some(ConflictIndexEntryPathHint::Source), + }), + Some(ConflictIndexEntry { + mode: our_mode, + id: our_id.into(), + path_hint: Some(ConflictIndexEntryPathHint::Current), + }), + Some(ConflictIndexEntry { + mode: their_mode, + id: their_id.into(), + path_hint: Some(ConflictIndexEntryPathHint::RenamedOrTheirs), + }), + ]; + Conflict::maybe_resolved(Err(ResolutionFailure::Unknown), changes, entries) + } } diff --git a/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar b/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar index bd50e785648..325533a59a8 100644 Binary files a/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar and b/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar differ diff --git a/gix-merge/tests/fixtures/tree-baseline.sh b/gix-merge/tests/fixtures/tree-baseline.sh index 69b103ae986..70394d1d1e2 100644 --- a/gix-merge/tests/fixtures/tree-baseline.sh +++ b/gix-merge/tests/fixtures/tree-baseline.sh @@ -31,6 +31,11 @@ function seq () { done } +function make_conflict_index() { + local identifier=${1:?The first argument is the name of the parent directory along with the output name} + cp .git/index .git/"${identifier}".index +} + function baseline () ( local dir=${1:?the directory to enter} local output_name=${2:?the basename of the output of the merge} @@ -172,6 +177,29 @@ git init rename-add git commit -m "rename foo to bar" ) +git init rename-add-exe-bit-conflict +(cd rename-add-exe-bit-conflict + touch a b + chmod +x a + git add --chmod=+x a + git add b + git commit -m "original" + + git branch A + git branch B + + git checkout A + chmod -x a + git update-index --chmod=-x a + git commit -m "-x a" + + git checkout B + git mv --force b a + chmod +x a + git update-index --chmod=+x a + git commit -m "mv b a; chmod +x a" +) + git init rename-add-symlink (cd rename-add-symlink write_lines original 1 2 3 4 5 >foo @@ -317,6 +345,34 @@ git init super-2 git add foo git mv foo olddir/bar git commit -m "Modify foo & rename foo -> olddir/bar" + + rm .git/index + git update-index --index-info <a/sub/y.f git mv a a-different git commit -am "changed all content, renamed a -> a-different" + +# Git only sees the files with content changes as conflicting, and somehow misses to add the +# bases of the files without content changes. After all, these also have been renamed into +# different places which must be a conflict just as much. + rm .git/index + git update-index --index-info <a/sub/y.f git mv a/sub a/sub-different git commit -am "changed all content, renamed a/sub -> a/sub-different" + +# Here it's the same as above, i.e. Git doesn't list files as conflicting if +# they didn't change, even though they have a conflicting rename. + rm .git/index + git update-index --index-info <a/x.f git mv a a-renamed git commit -am "Git, when branches are reversed, doesn't keep the +x flag on a/w so we specify our own expectation" + # Git sets +x and adds it as conflict, even though the merge is perfect, i.e. one side adds +x on top, perfectly additive. + make_conflict_index same-rename-different-mode-A-B + make_conflict_index same-rename-different-mode-A-B-reversed +) + +git init remove-executable-mode +(cd remove-executable-mode + touch w + chmod +x w + git add --chmod=+x w + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + chmod -x w + git update-index --chmod=-x w + git commit -am "remove executable bit from w" + + git checkout B + write_lines 1 2 3 4 5 >w + git commit -am "unrelated change to w" ) git init renamed-symlink-with-conflict @@ -614,6 +787,23 @@ git init submodule-both-modify git add sub tick git commit -m b + + # We cannot handle submodules yet and thus mark them as conflicted, always if they mismatch at least. + rm .git/index + git update-index --index-info <a/x.f git add . && git commit -m "original" @@ -678,8 +868,8 @@ git init big-file-merge git branch B git checkout A - seq 30 >a/x.f - git commit -am "turn normal file into big one (81 bytes)" + seq 37 >a/x.f + git commit -am "turn normal file into big one (102 bytes)" git branch expected git checkout B @@ -803,6 +993,9 @@ git init type-change-to-symlink +# TODO: Git does not detect the conflict (one turns exe off, the other turns it on), and we do exactly the same. +baseline rename-add-exe-bit-conflict A-B A B +baseline remove-executable-mode A-B A B baseline simple side-1-3-without-conflict side1 side3 baseline simple fast-forward side1 main baseline simple no-change main main @@ -838,7 +1031,6 @@ baseline conflicting-rename-2 A-B A B baseline conflicting-rename-complex A-B A B "Git has different rename tracking which is why a-renamed/w disappears - it's still close enough" baseline same-rename-different-mode A-B A B "Git works for the A/B case, but for B/A it forgets to set the executable bit" -baseline same-rename-different-mode A-B-diff3 A B "Git works for the A/B case, but for B/A it forgets to set the executable bit" baseline renamed-symlink-with-conflict A-B A B baseline added-file-changed-content-and-mode A-B A B "We improve on executable bit handling, but loose on diff quality as we are definitely missing some tweaks" diff --git a/gix-merge/tests/merge/tree/baseline.rs b/gix-merge/tests/merge/tree/baseline.rs index 18ba8aa0f06..1eddf0211a7 100644 --- a/gix-merge/tests/merge/tree/baseline.rs +++ b/gix-merge/tests/merge/tree/baseline.rs @@ -6,7 +6,7 @@ use gix_object::FindExt; use std::path::{Path, PathBuf}; /// An entry in the conflict -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub struct Entry { /// The relative path in the repository pub location: String, @@ -17,7 +17,7 @@ pub struct Entry { } /// Keep track of all the sides of a conflict. Some might not be set to indicate removal, including the ancestor. -#[derive(Default, Debug)] +#[derive(Default, Debug, Eq, PartialEq)] pub struct Conflict { pub ancestor: Option, pub ours: Option, @@ -129,7 +129,7 @@ impl Iterator for Expectations<'_> { let mut tokens = line.split(' '); let ( Some(subdir), - Some(conflict_style), + Some(conflict_style_name), Some(our_commit_id), Some(our_side_name), Some(their_commit_id), @@ -160,7 +160,7 @@ impl Iterator for Expectations<'_> { }); let subdir_path = self.root.join(subdir); - let conflict_style = match conflict_style { + let conflict_style = match conflict_style_name { "merge" => ConflictStyle::Merge, "diff3" => ConflictStyle::Diff3, unknown => unreachable!("Unknown conflict style: '{unknown}'"), @@ -221,6 +221,10 @@ fn parse_merge_info(content: String) -> MergeInfo { *field = Some(entry); } + if conflict.any_location().is_some() && conflicts.last() != Some(&conflict) { + conflicts.push(conflict); + } + while lines.peek().is_some() { out.information .push(parse_info(&mut lines).expect("if there are lines, it should be valid info")); @@ -285,6 +289,30 @@ fn parse_info<'a>(mut lines: impl Iterator) -> Option { + path: &'a BStr, + id: gix_hash::ObjectId, + mode: gix_index::entry::Mode, + stage: gix_index::entry::Stage, +} + +pub fn clear_entries(state: &gix_index::State) -> Vec> { + state + .entries() + .iter() + .map(|entry| { + let path = entry.path(state); + DebugIndexEntry { + path, + id: entry.id, + mode: entry.mode, + stage: entry.stage(), + } + }) + .collect() +} + pub fn visualize_tree( id: &gix_hash::oid, odb: &impl gix_object::Find, @@ -342,3 +370,35 @@ pub fn show_diff_and_fail( expected.information ); } + +pub(crate) fn apply_git_index_entries(conflicts: &[Conflict], state: &mut gix_index::State) { + let len = state.entries().len(); + for Conflict { ours, theirs, ancestor } in conflicts { + for (entry, stage) in [ + ancestor.as_ref().map(|e| (e, gix_index::entry::Stage::Base)), + ours.as_ref().map(|e| (e, gix_index::entry::Stage::Ours)), + theirs.as_ref().map(|e| (e, gix_index::entry::Stage::Theirs)), + ] + .into_iter() + .flatten() + { + if let Some(pos) = state.entry_index_by_path_and_stage_bounded( + entry.location.as_str().into(), + gix_index::entry::Stage::Unconflicted, + len, + ) { + state.entries_mut()[pos].flags.insert(gix_index::entry::Flags::REMOVE); + } + + state.dangerously_push_entry( + Default::default(), + entry.id, + stage.into(), + entry.mode.into(), + entry.location.as_str().into(), + ); + } + } + state.sort_entries(); + state.remove_entries(|_, _, e| e.flags.contains(gix_index::entry::Flags::REMOVE)); +} diff --git a/gix-merge/tests/merge/tree/mod.rs b/gix-merge/tests/merge/tree/mod.rs index 833166bc61a..662bcc67998 100644 --- a/gix-merge/tests/merge/tree/mod.rs +++ b/gix-merge/tests/merge/tree/mod.rs @@ -1,6 +1,7 @@ use crate::tree::baseline::Deviation; use gix_diff::Rewrites; use gix_merge::commit::Options; +use gix_merge::tree::TreatAsUnresolved; use gix_object::Write; use gix_worktree::stack::state::attributes; use std::path::Path; @@ -20,7 +21,7 @@ fn run_baseline() -> crate::Result { let root = gix_testtools::scripted_fixture_read_only("tree-baseline.sh")?; let cases = std::fs::read_to_string(root.join("baseline.cases"))?; let mut actual_cases = 0; - // let new_test = Some("simple-fast-forward"); + // let new_test = Some("rename-add-symlink-A-B"); let new_test = None; for baseline::Expectation { root, @@ -98,10 +99,58 @@ fn run_baseline() -> crate::Result { ); } } + + let mut actual_index = gix_index::State::from_tree(&actual_id, &odb, Default::default())?; + let expected_index = { + let derivative_index_path = root.join(".git").join(format!("{case_name}.index")); + if derivative_index_path.exists() { + gix_index::File::at( + derivative_index_path, + odb.store().object_hash(), + true, + Default::default(), + )? + .into() + } else { + let mut index = actual_index.clone(); + if let Some(conflicts) = &merge_info.conflicts { + baseline::apply_git_index_entries(conflicts, &mut index); + } + index + } + }; + let conflicts_like_in_git = TreatAsUnresolved::Renames; + let did_change = actual.index_changed_after_applying_conflicts(&mut actual_index, conflicts_like_in_git); + actual_index.remove_entries(|_, _, e| e.flags.contains(gix_index::entry::Flags::REMOVE)); + + pretty_assertions::assert_eq!( + baseline::clear_entries(&actual_index), + baseline::clear_entries(&expected_index), + "{case_name}: index mismatch\n{:#?}\n{:#?}", + actual.conflicts, + merge_info.conflicts + ); + // if case_name.starts_with("submodule-both-modify-A-B") { + if false { + assert!( + !did_change, + "{case_name}: We can't handle submodules, so there is no index change" + ); + assert!( + actual.has_unresolved_conflicts(conflicts_like_in_git), + "{case_name}: submodules currently result in an unresolved (unknown) conflict" + ); + } else { + assert_eq!( + did_change, + actual.has_unresolved_conflicts(conflicts_like_in_git), + "{case_name}: If there is any kind of conflict, the index should have been changed" + ); + } } assert_eq!( - actual_cases, 105, + actual_cases, 107, "BUG: update this number, and don't forget to remove a filter in the end" );