Skip to content

Commit

Permalink
feat: provide a way to record and apply index changes.
Browse files Browse the repository at this point in the history
These changes will then be applicable to an index that is created
from the written tree editor.
  • Loading branch information
Byron committed Nov 23, 2024
1 parent c6f1409 commit d803ff5
Show file tree
Hide file tree
Showing 10 changed files with 725 additions and 50 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions crate-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ Check out the [performance discussion][gix-diff-performance] as well.
* [x] find blobs by similarity check
* [ ] heuristics to find best candidate
* [ ] find by basename to support similarity check
- Not having it can lead to issues when files with the same or similar content are part of a move
as files can be lost that way.
* [x] directory tracking
- [x] by identity
- [ ] by similarity
Expand Down Expand Up @@ -349,8 +351,7 @@ Check out the [performance discussion][gix-diff-performance] as well.
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
* [x] **tree**-diff-heuristics match Git for its test-cases
- [ ] a way to generate an index with stages
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
- [x] a way to generate an index with stages, mostly conforming with Git.
- [ ] submodule merges (*right now they count as conflicts if they differ*)
* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge
* [x] API documentation
Expand Down
1 change: 1 addition & 0 deletions gix-merge/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ gix-quote = { version = "^0.4.13", path = "../gix-quote" }
gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] }
gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" }
gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] }
gix-index = { version = "^0.36.0", path = "../gix-index" }

thiserror = "2.0.0"
imara-diff = { version = "0.1.7" }
Expand Down
212 changes: 179 additions & 33 deletions gix-merge/src/tree/function.rs

Large diffs are not rendered by default.

188 changes: 187 additions & 1 deletion gix-merge/src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ impl Outcome<'_> {
pub fn has_unresolved_conflicts(&self, how: TreatAsUnresolved) -> bool {
self.conflicts.iter().any(|c| c.is_unresolved(how))
}

/// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a
/// conflict should be considered unresolved.
/// It's important that `index` is at the state of [`Self::tree`].
///
/// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`.
/// Also, the unconflicted stage of such entries will be removed merely by setting a flag, so the
/// in-memory entry is still present.
pub fn index_changed_after_applying_conflicts(&self, index: &mut gix_index::State, how: TreatAsUnresolved) -> bool {
apply_index_entries(&self.conflicts, how, index)
}
}

/// A description of a conflict (i.e. merge issue without an auto-resolution) as seen during a [tree-merge](crate::tree()).
Expand All @@ -99,11 +110,45 @@ pub struct Conflict {
pub ours: Change,
/// The change representing *their* side.
pub theirs: Change,
/// An array to store an entry for each stage of the conflict.
///
/// * `entries[0]` => Base
/// * `entries[1]` => Ours
/// * `entries[2]` => Theirs
///
/// Note that ours and theirs might be swapped, so one should access it through [`Self::entries()`] to compensate for that.
pub entries: [Option<ConflictIndexEntry>; 3],
/// Determine how to interpret the `ours` and `theirs` fields. This is used to implement [`Self::changes_in_resolution()`]
/// and [`Self::into_parts_by_resolution()`].
map: ConflictMapping,
}

/// A conflicting entry for insertion into the index.
/// It will always be either on stage 1 (ancestor/base), 2 (ours) or 3 (theirs)
#[derive(Debug, Clone, Copy)]
pub struct ConflictIndexEntry {
/// The kind of object at this stage.
/// Note that it's possible that this is a directory, for instance if a directory was replaced with a file.
pub mode: gix_object::tree::EntryMode,
/// The id defining the state of the object.
pub id: gix_hash::ObjectId,
/// Hidden, maybe one day we can do without?
path_hint: Option<ConflictIndexEntryPathHint>,
}

/// A hint for [`apply_index_entries()`] to know which paths to use for an entry.
/// This is only used when necessary.
#[derive(Debug, Clone, Copy)]
enum ConflictIndexEntryPathHint {
/// Use the previous path, i.e. rename source.
Source,
/// Use the current path as it is in the tree.
Current,
/// Use the path of the final destination, or *their* name.
/// It's definitely finicky, as we don't store the actual path and instead refer to it.
RenamedOrTheirs,
}

/// A utility to help define which side is what in the [`Conflict`] type.
#[derive(Debug, Clone, Copy)]
enum ConflictMapping {
Expand Down Expand Up @@ -147,7 +192,11 @@ impl Conflict {
TreatAsUnresolved::Renames | TreatAsUnresolved::RenamesAndAutoResolvedContent => match &self.resolution {
Ok(success) => match success {
Resolution::SourceLocationAffectedByRename { .. } => false,
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true,
Resolution::OursModifiedTheirsRenamedAndChangedThenRename {
merged_blob,
final_location,
..
} => final_location.is_some() || merged_blob.as_ref().map_or(false, content_merge_matches),
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => {
content_merge_matches(merged_blob)
}
Expand Down Expand Up @@ -178,6 +227,14 @@ impl Conflict {
}
}

/// Return the index entries for insertion into the index, to match with what's returned by [`Self::changes_in_resolution()`].
pub fn entries(&self) -> [Option<ConflictIndexEntry>; 3] {
match self.map {
ConflictMapping::Original => self.entries,
ConflictMapping::Swapped => [self.entries[0], self.entries[2], self.entries[1]],
}
}

/// Return information about the content merge if it was performed.
pub fn content_merge(&self) -> Option<ContentMerge> {
match &self.resolution {
Expand Down Expand Up @@ -308,3 +365,132 @@ pub struct Options {

pub(super) mod function;
mod utils;
pub mod apply_index_entries {

pub(super) mod function {
use crate::tree::{Conflict, ConflictIndexEntryPathHint, Resolution, ResolutionFailure, TreatAsUnresolved};
use bstr::{BStr, ByteSlice};
use std::collections::{hash_map, HashMap};

/// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a
/// conflict should be considered unresolved.
/// Once a stage of a path conflicts, the unconflicting stage is removed even though it might be the one
/// that is currently checked out.
/// This removal, however, is only done by flagging it with [gix_index::entry::Flags::REMOVE], which means
/// these entries won't be written back to disk but will still be present in the index.
/// It's important that `index` matches the tree that was produced as part of the merge that also
/// brought about `conflicts`, or else this function will fail if it cannot find the path matching
/// the conflicting entries.
///
/// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`.
/// Errors can only occour if `index` isn't the one created from the merged tree that produced the `conflicts`.
pub fn apply_index_entries(
conflicts: &[Conflict],
how: TreatAsUnresolved,
index: &mut gix_index::State,
) -> bool {
let len = index.entries().len();
let mut idx_by_path_stage = HashMap::<(gix_index::entry::Stage, &BStr), usize>::default();
for conflict in conflicts.iter().filter(|c| c.is_unresolved(how)) {
let (renamed_path, current_path): (Option<&BStr>, &BStr) = match &conflict.resolution {
Ok(success) => match success {
Resolution::SourceLocationAffectedByRename { final_location } => {
(Some(final_location.as_bstr()), final_location.as_bstr())
}
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { final_location, .. } => (
final_location.as_ref().map(|p| p.as_bstr()),
conflict.changes_in_resolution().1.location(),
),
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { .. } => {
(None, conflict.ours.location())
}
},
Err(failure) => match failure {
ResolutionFailure::OursRenamedTheirsRenamedDifferently { .. } => {
(Some(conflict.theirs.location()), conflict.ours.location())
}
ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch
| ResolutionFailure::OursDeletedTheirsRenamed
| ResolutionFailure::OursModifiedTheirsDeleted
| ResolutionFailure::Unknown => (None, conflict.ours.location()),
ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed {
renamed_unique_path_to_modified_blob,
} => (
Some(renamed_unique_path_to_modified_blob.as_bstr()),
conflict.ours.location(),
),
ResolutionFailure::OursAddedTheirsAddedTypeMismatch { their_unique_location } => {
(Some(their_unique_location.as_bstr()), conflict.ours.location())
}
},
};
let source_path = conflict.ours.source_location();

let entries_with_stage = conflict.entries().into_iter().enumerate().filter_map(|(idx, entry)| {
entry.filter(|e| e.mode.is_no_tree()).map(|e| {
(
match idx {
0 => gix_index::entry::Stage::Base,
1 => gix_index::entry::Stage::Ours,
2 => gix_index::entry::Stage::Theirs,
_ => unreachable!("fixed size array with three items"),
},
match e.path_hint {
None => renamed_path.unwrap_or(current_path),
Some(ConflictIndexEntryPathHint::Source) => source_path,
Some(ConflictIndexEntryPathHint::Current) => current_path,
Some(ConflictIndexEntryPathHint::RenamedOrTheirs) => {
renamed_path.unwrap_or_else(|| conflict.changes_in_resolution().1.location())
}
},
e,
)
})
});

if !entries_with_stage.clone().any(|(_, path, _)| {
index
.entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len)
.is_some()
}) {
continue;
}

for (stage, path, entry) in entries_with_stage {
if let Some(pos) =
index.entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len)
{
index.entries_mut()[pos].flags.insert(gix_index::entry::Flags::REMOVE);
};
match idx_by_path_stage.entry((stage, path)) {
hash_map::Entry::Occupied(map_entry) => {
// This can happen due to the way the algorithm works.
// The same happens in Git, but it stores the index-related data as part of its deduplicating tree.
// We store each conflict we encounter, which also may duplicate their index entries, sometimes, but
// with different values. The most recent value wins.
// Instead of trying to deduplicate the index entries when the merge runs, we put the cost
// to the tree-assembly - there is no way around it.
let index_entry = &mut index.entries_mut()[*map_entry.get()];
index_entry.mode = entry.mode.into();
index_entry.id = entry.id;
}
hash_map::Entry::Vacant(map_entry) => {
map_entry.insert(index.entries().len());
index.dangerously_push_entry(
Default::default(),
entry.id,
stage.into(),
entry.mode.into(),
path,
);
}
};
}
}

index.sort_entries();
index.entries().len() != len
}
}
}
pub use apply_index_entries::function::apply_index_entries;
47 changes: 43 additions & 4 deletions gix-merge/src/tree/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
//! contribute to finding a fix faster.
use crate::blob::builtin_driver::binary::Pick;
use crate::blob::ResourceKind;
use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure};
use crate::tree::{
Conflict, ConflictIndexEntry, ConflictIndexEntryPathHint, ConflictMapping, Error, Options, Resolution,
ResolutionFailure,
};
use bstr::ByteSlice;
use bstr::{BStr, BString, ByteVec};
use gix_diff::tree_with_rewrites::{Change, ChangeRef};
Expand Down Expand Up @@ -98,6 +101,14 @@ pub fn perform_blob_merge<E>(
where
E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
{
if our_id == their_id {
// This can happen if the merge modes are different.
debug_assert_ne!(
our_mode, their_mode,
"BUG: we must think anything has to be merged if the modes and the ids are the same"
);
return Ok((their_id, crate::blob::Resolution::Complete));
}
if matches!(our_mode.kind(), EntryKind::Link) && matches!(their_mode.kind(), EntryKind::Link) {
let (pick, resolution) = crate::blob::builtin_driver::binary(options.symlink_conflicts);
let (our_id, their_id) = match outer_side {
Expand Down Expand Up @@ -544,29 +555,57 @@ impl Conflict {
pub(super) fn without_resolution(
resolution: ResolutionFailure,
changes: (&Change, &Change, ConflictMapping, ConflictMapping),
entries: [Option<ConflictIndexEntry>; 3],
) -> Self {
Conflict::maybe_resolved(Err(resolution), changes)
Conflict::maybe_resolved(Err(resolution), changes, entries)
}

pub(super) fn with_resolution(
resolution: Resolution,
changes: (&Change, &Change, ConflictMapping, ConflictMapping),
entries: [Option<ConflictIndexEntry>; 3],
) -> Self {
Conflict::maybe_resolved(Ok(resolution), changes)
Conflict::maybe_resolved(Ok(resolution), changes, entries)
}

pub(super) fn maybe_resolved(
fn maybe_resolved(
resolution: Result<Resolution, ResolutionFailure>,
(ours, theirs, map, outer_map): (&Change, &Change, ConflictMapping, ConflictMapping),
entries: [Option<ConflictIndexEntry>; 3],
) -> Self {
Conflict {
resolution,
ours: ours.clone(),
theirs: theirs.clone(),
entries,
map: match outer_map {
ConflictMapping::Original => map,
ConflictMapping::Swapped => map.swapped(),
},
}
}

pub(super) fn unknown(changes: (&Change, &Change, ConflictMapping, ConflictMapping)) -> Self {
let (source_mode, source_id) = changes.0.source_entry_mode_and_id();
let (our_mode, our_id) = changes.0.entry_mode_and_id();
let (their_mode, their_id) = changes.1.entry_mode_and_id();
let entries = [
Some(ConflictIndexEntry {
mode: source_mode,
id: source_id.into(),
path_hint: Some(ConflictIndexEntryPathHint::Source),
}),
Some(ConflictIndexEntry {
mode: our_mode,
id: our_id.into(),
path_hint: Some(ConflictIndexEntryPathHint::Current),
}),
Some(ConflictIndexEntry {
mode: their_mode,
id: their_id.into(),
path_hint: Some(ConflictIndexEntryPathHint::RenamedOrTheirs),
}),
];
Conflict::maybe_resolved(Err(ResolutionFailure::Unknown), changes, entries)
}
}
Binary file modified gix-merge/tests/fixtures/generated-archives/tree-baseline.tar
Binary file not shown.
Loading

0 comments on commit d803ff5

Please sign in to comment.