From 6ac0514fe80e09acbd382fb57016de559dd338a1 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Sun, 24 Nov 2024 15:20:18 -0600 Subject: [PATCH 1/3] conflicts: refactor conflict marker writing and parsing These changes make the code a bit more readable, and they will make it easier to have conflict markers of different lengths in the next commit. --- lib/src/conflicts.rs | 282 ++++++++++++++++++++++++++----------------- 1 file changed, 168 insertions(+), 114 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 32997363c1..ef937b051b 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -28,8 +28,6 @@ use futures::StreamExt; use futures::TryStreamExt; use itertools::Itertools; use pollster::FutureExt; -use regex::bytes::Regex; -use regex::bytes::RegexBuilder; use crate::backend::BackendError; use crate::backend::BackendResult; @@ -51,49 +49,25 @@ use crate::merge::MergedTreeValue; use crate::repo_path::RepoPath; use crate::store::Store; -const CONFLICT_START_LINE: &str = "<<<<<<<"; -const CONFLICT_END_LINE: &str = ">>>>>>>"; -const CONFLICT_DIFF_LINE: &str = "%%%%%%%"; -const CONFLICT_MINUS_LINE: &str = "-------"; -const CONFLICT_PLUS_LINE: &str = "+++++++"; -const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||"; -const CONFLICT_GIT_SEPARATOR_LINE: &str = "======="; -const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0]; -const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0]; -const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0]; -const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0]; -const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0]; -const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0]; -const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0]; - -/// A conflict marker is one of the separators, optionally followed by a space -/// and some text. -// TODO: All the `{7}` could be replaced with `{7,}` to allow longer -// separators. This could be useful to make it possible to allow conflict -// markers inside the text of the conflicts. -static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$") - .multi_line(true) - .build() - .unwrap() -}); +/// Length of conflict markers. +pub const CONFLICT_MARKER_LEN: usize = 7; fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { for hunk in hunks { match hunk.kind { DiffHunkKind::Matching => { debug_assert!(hunk.contents.iter().all_equal()); - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b" ")?; file.write_all(line)?; } } DiffHunkKind::Different => { - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b"-")?; file.write_all(line)?; } - for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[1].lines_with_terminator() { file.write_all(b"+")?; file.write_all(line)?; } @@ -250,6 +224,77 @@ pub enum ConflictMarkerStyle { Git, } +/// Characters which can be repeated to form a conflict marker line when +/// materializing and parsing conflicts. +#[derive(Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum ConflictMarkerLineChar { + ConflictStart = b'<', + ConflictEnd = b'>', + Add = b'+', + Remove = b'-', + Diff = b'%', + GitAncestor = b'|', + GitSeparator = b'=', +} + +impl ConflictMarkerLineChar { + /// Get the ASCII byte used for this conflict marker. + fn to_byte(self) -> u8 { + self as u8 + } + + /// Parse a byte to see if it corresponds with any kind of conflict marker. + fn parse_byte(byte: u8) -> Option { + match byte { + b'<' => Some(Self::ConflictStart), + b'>' => Some(Self::ConflictEnd), + b'+' => Some(Self::Add), + b'-' => Some(Self::Remove), + b'%' => Some(Self::Diff), + b'|' => Some(Self::GitAncestor), + b'=' => Some(Self::GitSeparator), + _ => None, + } + } +} + +/// Write a conflict marker to an output file. +fn write_conflict_marker( + output: &mut dyn Write, + kind: ConflictMarkerLineChar, + suffix_text: &str, +) -> io::Result<()> { + let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]); + + if suffix_text.is_empty() { + writeln!(output, "{conflict_marker}") + } else { + writeln!(output, "{conflict_marker} {suffix_text}") + } +} + +/// Parse a conflict marker from a line of a file. The conflict marker must have +/// the correct length (CONFLICT_MARKER_LEN). +fn parse_conflict_marker(line: &[u8]) -> Option { + let first_byte = *line.first()?; + let kind = ConflictMarkerLineChar::parse_byte(first_byte)?; + let len = line.iter().take_while(|&&b| b == first_byte).count(); + + if len != CONFLICT_MARKER_LEN { + return None; + } + + if let Some(next_byte) = line.get(len) { + // If there is a character after the marker, it must be ASCII whitespace + if !next_byte.is_ascii_whitespace() { + return None; + } + } + + Some(kind) +} + pub fn materialize_merge_result>( single_hunk: &Merge, conflict_marker_style: ConflictMarkerStyle, @@ -323,14 +368,22 @@ fn materialize_git_style_conflict( conflict_info: &str, output: &mut dyn Write, ) -> io::Result<()> { - writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictStart, + &format!("Side #1 ({conflict_info})"), + )?; output.write_all(left)?; - writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?; + write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?; output.write_all(base)?; // VS Code doesn't seem to support any trailing text on the separator line - writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?; + write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?; output.write_all(right)?; - writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictEnd, + &format!("Side #2 ({conflict_info} ends)"), + )?; Ok(()) } @@ -343,17 +396,21 @@ fn materialize_jj_style_conflict( ) -> io::Result<()> { // Write a positive snapshot (side) of a conflict fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!( + write_conflict_marker( output, - "{CONFLICT_PLUS_LINE} Contents of side #{}", - add_index + 1 + ConflictMarkerLineChar::Add, + &format!("Contents of side #{}", add_index + 1), )?; output.write_all(data) } // Write a negative snapshot (base) of a conflict fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::Remove, + &format!("Contents of {base_str}"), + )?; output.write_all(data) } @@ -364,15 +421,15 @@ fn materialize_jj_style_conflict( diff: &[DiffHunk], output: &mut dyn Write, ) -> io::Result<()> { - writeln!( + write_conflict_marker( output, - "{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}", - add_index + 1 + ConflictMarkerLineChar::Diff, + &format!("Changes from {base_str} to side #{}", add_index + 1), )?; write_diff_hunks(diff, output) } - writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?; + write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?; let mut add_index = 0; for (base_index, left) in hunk.removes().enumerate() { // The vast majority of conflicts one actually tries to resolve manually have 1 @@ -422,7 +479,11 @@ fn materialize_jj_style_conflict( for (add_index, slice) in hunk.adds().enumerate().skip(add_index) { write_side(add_index, slice, output)?; } - writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictEnd, + &format!("{conflict_info} ends"), + )?; Ok(()) } @@ -480,24 +541,27 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { conflict_start = Some(pos); conflict_start_len = line.len(); - } else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR { - let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos]; - let hunk = parse_conflict_hunk(conflict_body); - if hunk.num_sides() == num_sides { - let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; - if !resolved_slice.is_empty() { - hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + Some(ConflictMarkerLineChar::ConflictEnd) => { + if let Some(conflict_start_index) = conflict_start.take() { + let conflict_body = &input[conflict_start_index + conflict_start_len..pos]; + let hunk = parse_conflict_hunk(conflict_body); + if hunk.num_sides() == num_sides { + let resolved_slice = &input[resolved_start..conflict_start_index]; + if !resolved_slice.is_empty() { + hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + hunks.push(hunk); + resolved_start = pos + line.len(); } - hunks.push(hunk); - resolved_start = pos + line.len(); } - conflict_start = None; } + _ => {} } pos += line.len(); } @@ -519,20 +583,21 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { // If the hunk starts with a conflict marker, find its first character - let initial_conflict_marker_char = input + let initial_conflict_marker = input .lines_with_terminator() .next() - .filter(|line| is_conflict_marker_line(line)) - .map(|line| line[0]); + .and_then(parse_conflict_marker); - match initial_conflict_marker_char { + match initial_conflict_marker { // JJ-style conflicts must start with one of these 3 conflict marker lines - Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => { - parse_jj_style_conflict_hunk(input) - } + Some( + ConflictMarkerLineChar::Diff + | ConflictMarkerLineChar::Remove + | ConflictMarkerLineChar::Add, + ) => parse_jj_style_conflict_hunk(input), // Git-style conflicts either must not start with a conflict marker line, or must start with // the "|||||||" conflict marker line (if the first side was empty) - None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input), + None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input), // No other conflict markers are allowed at the start of a hunk Some(_) => Merge::resolved(BString::new(vec![])), } @@ -541,34 +606,32 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge { fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { enum State { Diff, - Minus, - Plus, + Remove, + Add, Unknown, } let mut state = State::Unknown; let mut removes = vec![]; let mut adds = vec![]; for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_DIFF_LINE_CHAR => { - state = State::Diff; - removes.push(BString::new(vec![])); - adds.push(BString::new(vec![])); - continue; - } - CONFLICT_MINUS_LINE_CHAR => { - state = State::Minus; - removes.push(BString::new(vec![])); - continue; - } - CONFLICT_PLUS_LINE_CHAR => { - state = State::Plus; - adds.push(BString::new(vec![])); - continue; - } - _ => {} + match parse_conflict_marker(line) { + Some(ConflictMarkerLineChar::Diff) => { + state = State::Diff; + removes.push(BString::new(vec![])); + adds.push(BString::new(vec![])); + continue; } + Some(ConflictMarkerLineChar::Remove) => { + state = State::Remove; + removes.push(BString::new(vec![])); + continue; + } + Some(ConflictMarkerLineChar::Add) => { + state = State::Add; + adds.push(BString::new(vec![])); + continue; + } + _ => {} } match state { State::Diff => { @@ -590,10 +653,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { return Merge::resolved(BString::new(vec![])); } } - State::Minus => { + State::Remove => { removes.last_mut().unwrap().extend_from_slice(line); } - State::Plus => { + State::Add => { adds.last_mut().unwrap().extend_from_slice(line); } State::Unknown => { @@ -623,28 +686,26 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { let mut base = BString::new(vec![]); let mut right = BString::new(vec![]); for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_GIT_ANCESTOR_LINE_CHAR => { - if state == State::Left { - state = State::Base; - continue; - } else { - // Base must come after left - return Merge::resolved(BString::new(vec![])); - } + match parse_conflict_marker(line) { + Some(ConflictMarkerLineChar::GitAncestor) => { + if state == State::Left { + state = State::Base; + continue; + } else { + // Base must come after left + return Merge::resolved(BString::new(vec![])); } - CONFLICT_GIT_SEPARATOR_LINE_CHAR => { - if state == State::Base { - state = State::Right; - continue; - } else { - // Right must come after base - return Merge::resolved(BString::new(vec![])); - } + } + Some(ConflictMarkerLineChar::GitSeparator) => { + if state == State::Base { + state = State::Right; + continue; + } else { + // Right must come after base + return Merge::resolved(BString::new(vec![])); } - _ => {} } + _ => {} } match state { State::Left => left.extend_from_slice(line), @@ -661,13 +722,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { } } -/// Check whether a line is a conflict marker. Removes trailing whitespace -/// before checking against regex to ensure it parses CRLF endings correctly. -fn is_conflict_marker_line(line: &[u8]) -> bool { - let line = line.trim_end_with(|ch| ch.is_ascii_whitespace()); - CONFLICT_MARKER_REGEX.is_match_at(line, 0) -} - /// Parses conflict markers in `content` and returns an updated version of /// `file_ids` with the new contents. If no (valid) conflict markers remain, a /// single resolves `FileId` will be returned. From 10b8069f17aa51fc7056467760e5bd4473a38ba8 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Sun, 24 Nov 2024 15:20:18 -0600 Subject: [PATCH 2/3] conflicts: escape conflict markers by making them longer If a file contains lines which look like conflict markers, then we need to make the real conflict markers longer so that the materialized conflicts can be parsed unambiguously. When parsing the conflict, we require that the conflict markers are at least as long as the materialized conflict markers based on the current tree. This can lead to some unintuitive edge cases which will be solved in the next commit. For instance, if we have a file explaining the differences between Jujutsu's conflict markers and Git's conflict markers, it could produce a conflict with long markers like this: ``` <<<<<<<<<<< Conflict 1 of 1 %%%%%%%%%%% Changes from base to side #1 Jujutsu uses different conflict markers than Git, which just shows the -sides of a conflict without a diff. +sides of a conflict without a diff: + +<<<<<<< +left +||||||| +base +======= +right +>>>>>>> +++++++++++ Contents of side #2 Jujutsu uses different conflict markers than Git: <<<<<<< %%%%%%% -base +left +++++++ right >>>>>>> >>>>>>>>>>> Conflict 1 of 1 ends ``` We should support these options for "git" conflict marker style as well, since Git actually does support producing longer conflict markers in some cases through .gitattributes: https://git-scm.com/docs/gitattributes#_conflict_marker_size We may also want to support passing the conflict marker length to merge tools as well in the future, since Git supports a "%L" parameter to pass the conflict marker length to merge drivers: https://git-scm.com/docs/gitattributes#_defining_a_custom_merge_driver --- CHANGELOG.md | 4 + lib/src/conflicts.rs | 203 ++++++++++++----- lib/tests/test_conflicts.rs | 420 +++++++++++++++++++++++++++++++++--- 3 files changed, 548 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c94e2cbe4c..e862858f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * A new Email template type is added. `Signature.email()` now returns an Email template type instead of a String. +* Conflict markers are now allowed to be longer than 7 characters, allowing + conflicts to be materialized and parsed correctly in files which already + contain lines which look like conflict markers. + ### Fixed bugs * The `$NO_COLOR` environment variable must now be non-empty to be respected. diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index ef937b051b..0438f802ac 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -49,8 +49,14 @@ use crate::merge::MergedTreeValue; use crate::repo_path::RepoPath; use crate::store::Store; -/// Length of conflict markers. -pub const CONFLICT_MARKER_LEN: usize = 7; +/// Minimum length of conflict markers. +pub const MIN_CONFLICT_MARKER_LEN: usize = 7; + +/// If a file already contains lines which look like conflict markers of length +/// N, then the conflict markers we add will be of length (N + increment). This +/// number is chosen to make the conflict markers noticeably longer than the +/// existing markers. +const CONFLICT_MARKER_LEN_INCREMENT: usize = 4; fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { for hunk in hunks { @@ -259,13 +265,21 @@ impl ConflictMarkerLineChar { } } +/// Represents a conflict marker line parsed from the file. Conflict marker +/// lines consist of a single ASCII character repeated for a certain length. +struct ConflictMarkerLine { + kind: ConflictMarkerLineChar, + len: usize, +} + /// Write a conflict marker to an output file. fn write_conflict_marker( output: &mut dyn Write, kind: ConflictMarkerLineChar, + len: usize, suffix_text: &str, ) -> io::Result<()> { - let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]); + let conflict_marker = BString::new(vec![kind.to_byte(); len]); if suffix_text.is_empty() { writeln!(output, "{conflict_marker}") @@ -274,17 +288,13 @@ fn write_conflict_marker( } } -/// Parse a conflict marker from a line of a file. The conflict marker must have -/// the correct length (CONFLICT_MARKER_LEN). -fn parse_conflict_marker(line: &[u8]) -> Option { +/// Parse a conflict marker from a line of a file. The conflict marker may have +/// any length (even less than MIN_CONFLICT_MARKER_LEN). +fn parse_conflict_marker_any_len(line: &[u8]) -> Option { let first_byte = *line.first()?; let kind = ConflictMarkerLineChar::parse_byte(first_byte)?; let len = line.iter().take_while(|&&b| b == first_byte).count(); - if len != CONFLICT_MARKER_LEN { - return None; - } - if let Some(next_byte) = line.get(len) { // If there is a character after the marker, it must be ASCII whitespace if !next_byte.is_ascii_whitespace() { @@ -292,7 +302,31 @@ fn parse_conflict_marker(line: &[u8]) -> Option { } } - Some(kind) + Some(ConflictMarkerLine { kind, len }) +} + +/// Parse a conflict marker, expecting it to be at least a certain length. Any +/// shorter conflict markers are ignored. +fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option { + parse_conflict_marker_any_len(line) + .filter(|marker| marker.len >= expected_len) + .map(|marker| marker.kind) +} + +/// Given a Merge of files, choose the conflict marker length to use when +/// materializing conflicts. +pub fn choose_materialized_conflict_marker_len>(single_hunk: &Merge) -> usize { + let max_existing_marker_len = single_hunk + .iter() + .flat_map(|file| file.as_ref().lines_with_terminator()) + .filter_map(parse_conflict_marker_any_len) + .map(|marker| marker.len) + .max() + .unwrap_or_default(); + + max_existing_marker_len + .saturating_add(CONFLICT_MARKER_LEN_INCREMENT) + .max(MIN_CONFLICT_MARKER_LEN) } pub fn materialize_merge_result>( @@ -304,7 +338,23 @@ pub fn materialize_merge_result>( match &merge_result { MergeResult::Resolved(content) => output.write_all(content), MergeResult::Conflict(hunks) => { - materialize_conflict_hunks(hunks, conflict_marker_style, output) + let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk); + materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output) + } + } +} + +fn materialize_merge_result_with_marker_len>( + single_hunk: &Merge, + conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, + output: &mut dyn Write, +) -> io::Result<()> { + let merge_result = files::merge(single_hunk); + match &merge_result { + MergeResult::Resolved(content) => output.write_all(content), + MergeResult::Conflict(hunks) => { + materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output) } } } @@ -317,9 +367,15 @@ pub fn materialize_merge_result_to_bytes>( match merge_result { MergeResult::Resolved(content) => content, MergeResult::Conflict(hunks) => { + let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk); let mut output = Vec::new(); - materialize_conflict_hunks(&hunks, conflict_marker_style, &mut output) - .expect("writing to an in-memory buffer should never fail"); + materialize_conflict_hunks( + &hunks, + conflict_marker_style, + conflict_marker_len, + &mut output, + ) + .expect("writing to an in-memory buffer should never fail"); output.into() } } @@ -328,6 +384,7 @@ pub fn materialize_merge_result_to_bytes>( fn materialize_conflict_hunks( hunks: &[Merge], conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { let num_conflicts = hunks @@ -345,13 +402,21 @@ fn materialize_conflict_hunks( match (conflict_marker_style, hunk.as_slice()) { // 2-sided conflicts can use Git-style conflict markers (ConflictMarkerStyle::Git, [left, base, right]) => { - materialize_git_style_conflict(left, base, right, &conflict_info, output)?; + materialize_git_style_conflict( + left, + base, + right, + &conflict_info, + conflict_marker_len, + output, + )?; } _ => { materialize_jj_style_conflict( hunk, &conflict_info, conflict_marker_style, + conflict_marker_len, output, )?; } @@ -366,22 +431,35 @@ fn materialize_git_style_conflict( base: &[u8], right: &[u8], conflict_info: &str, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { write_conflict_marker( output, ConflictMarkerLineChar::ConflictStart, + conflict_marker_len, &format!("Side #1 ({conflict_info})"), )?; output.write_all(left)?; - write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::GitAncestor, + conflict_marker_len, + "Base", + )?; output.write_all(base)?; // VS Code doesn't seem to support any trailing text on the separator line - write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::GitSeparator, + conflict_marker_len, + "", + )?; output.write_all(right)?; write_conflict_marker( output, ConflictMarkerLineChar::ConflictEnd, + conflict_marker_len, &format!("Side #2 ({conflict_info} ends)"), )?; @@ -392,44 +470,49 @@ fn materialize_jj_style_conflict( hunk: &Merge, conflict_info: &str, conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { // Write a positive snapshot (side) of a conflict - fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { + let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| { write_conflict_marker( output, ConflictMarkerLineChar::Add, + conflict_marker_len, &format!("Contents of side #{}", add_index + 1), )?; output.write_all(data) - } + }; // Write a negative snapshot (base) of a conflict - fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { + let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| { write_conflict_marker( output, ConflictMarkerLineChar::Remove, + conflict_marker_len, &format!("Contents of {base_str}"), )?; output.write_all(data) - } + }; // Write a diff from a negative term to a positive term - fn write_diff( - base_str: &str, - add_index: usize, - diff: &[DiffHunk], - output: &mut dyn Write, - ) -> io::Result<()> { - write_conflict_marker( - output, - ConflictMarkerLineChar::Diff, - &format!("Changes from {base_str} to side #{}", add_index + 1), - )?; - write_diff_hunks(diff, output) - } + let write_diff = + |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| { + write_conflict_marker( + output, + ConflictMarkerLineChar::Diff, + conflict_marker_len, + &format!("Changes from {base_str} to side #{}", add_index + 1), + )?; + write_diff_hunks(diff, output) + }; - write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictStart, + conflict_marker_len, + conflict_info, + )?; let mut add_index = 0; for (base_index, left) in hunk.removes().enumerate() { // The vast majority of conflicts one actually tries to resolve manually have 1 @@ -482,6 +565,7 @@ fn materialize_jj_style_conflict( write_conflict_marker( output, ConflictMarkerLineChar::ConflictEnd, + conflict_marker_len, &format!("{conflict_info} ends"), )?; Ok(()) @@ -530,9 +614,16 @@ pub fn materialized_diff_stream<'a>( /// has to provide the expected number of merge sides (adds). Conflict /// markers that are otherwise valid will be considered invalid if /// they don't have the expected arity. +/// +/// All conflict markers in the file must be at least as long as the expected +/// length. Any shorter conflict markers will be ignored. // TODO: "parse" is not usually the opposite of "materialize", so maybe we // should rename them to "serialize" and "deserialize"? -pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option>> { +pub fn parse_conflict( + input: &[u8], + num_sides: usize, + expected_marker_len: usize, +) -> Option>> { if input.is_empty() { return None; } @@ -542,7 +633,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { conflict_start = Some(pos); conflict_start_len = line.len(); @@ -550,7 +641,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { if let Some(conflict_start_index) = conflict_start.take() { let conflict_body = &input[conflict_start_index + conflict_start_len..pos]; - let hunk = parse_conflict_hunk(conflict_body); + let hunk = parse_conflict_hunk(conflict_body, expected_marker_len); if hunk.num_sides() == num_sides { let resolved_slice = &input[resolved_start..conflict_start_index]; if !resolved_slice.is_empty() { @@ -581,12 +672,12 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { +fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { // If the hunk starts with a conflict marker, find its first character let initial_conflict_marker = input .lines_with_terminator() .next() - .and_then(parse_conflict_marker); + .and_then(|line| parse_conflict_marker(line, expected_marker_len)); match initial_conflict_marker { // JJ-style conflicts must start with one of these 3 conflict marker lines @@ -594,16 +685,18 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge { ConflictMarkerLineChar::Diff | ConflictMarkerLineChar::Remove | ConflictMarkerLineChar::Add, - ) => parse_jj_style_conflict_hunk(input), + ) => parse_jj_style_conflict_hunk(input, expected_marker_len), // Git-style conflicts either must not start with a conflict marker line, or must start with // the "|||||||" conflict marker line (if the first side was empty) - None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input), + None | Some(ConflictMarkerLineChar::GitAncestor) => { + parse_git_style_conflict_hunk(input, expected_marker_len) + } // No other conflict markers are allowed at the start of a hunk Some(_) => Merge::resolved(BString::new(vec![])), } } -fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { enum State { Diff, Remove, @@ -614,7 +707,7 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { let mut removes = vec![]; let mut adds = vec![]; for line in input.lines_with_terminator() { - match parse_conflict_marker(line) { + match parse_conflict_marker(line, expected_marker_len) { Some(ConflictMarkerLineChar::Diff) => { state = State::Diff; removes.push(BString::new(vec![])); @@ -674,7 +767,7 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { } } -fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { #[derive(PartialEq, Eq)] enum State { Left, @@ -686,7 +779,7 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { let mut base = BString::new(vec![]); let mut right = BString::new(vec![]); for line in input.lines_with_terminator() { - match parse_conflict_marker(line) { + match parse_conflict_marker(line, expected_marker_len) { Some(ConflictMarkerLineChar::GitAncestor) => { if state == State::Left { state = State::Base; @@ -742,7 +835,14 @@ pub async fn update_from_content( // copy. let mut old_content = Vec::with_capacity(content.len()); let merge_hunk = extract_as_single_hunk(simplified_file_ids, store, path).await?; - materialize_merge_result(&merge_hunk, conflict_marker_style, &mut old_content).unwrap(); + let conflict_marker_len = choose_materialized_conflict_marker_len(&merge_hunk); + materialize_merge_result_with_marker_len( + &merge_hunk, + conflict_marker_style, + conflict_marker_len, + &mut old_content, + ) + .unwrap(); if content == old_content { return Ok(file_ids.clone()); } @@ -752,11 +852,16 @@ pub async fn update_from_content( // the arity of the unsimplified conflicts since such a conflict may be // present in the working copy if written by an earlier version of jj. let (used_file_ids, hunks) = 'hunks: { - if let Some(hunks) = parse_conflict(content, simplified_file_ids.num_sides()) { + if let Some(hunks) = parse_conflict( + content, + simplified_file_ids.num_sides(), + conflict_marker_len, + ) { break 'hunks (simplified_file_ids, hunks); }; if simplified_file_ids.num_sides() != file_ids.num_sides() { - if let Some(hunks) = parse_conflict(content, file_ids.num_sides()) { + if let Some(hunks) = parse_conflict(content, file_ids.num_sides(), conflict_marker_len) + { break 'hunks (file_ids, hunks); }; }; diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index e3c4a00f6f..d62f8e1eac 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -13,12 +13,14 @@ // limitations under the License. use indoc::indoc; +use itertools::Itertools; use jj_lib::backend::FileId; use jj_lib::conflicts::extract_as_single_hunk; use jj_lib::conflicts::materialize_merge_result_to_bytes; use jj_lib::conflicts::parse_conflict; use jj_lib::conflicts::update_from_content; use jj_lib::conflicts::ConflictMarkerStyle; +use jj_lib::conflicts::MIN_CONFLICT_MARKER_LEN; use jj_lib::merge::Merge; use jj_lib::repo::Repo; use jj_lib::repo_path::RepoPath; @@ -505,7 +507,7 @@ fn test_materialize_parse_roundtrip() { // The first add should always be from the left side insta::assert_debug_snapshot!( - parse_conflict(materialized.as_bytes(), conflict.num_sides()), + parse_conflict(materialized.as_bytes(), conflict.num_sides(), MIN_CONFLICT_MARKER_LEN), @r###" Some( [ @@ -628,7 +630,8 @@ fn test_materialize_conflict_no_newlines_at_eof() { // BUG(#3968): These conflict markers cannot be parsed insta::assert_debug_snapshot!(parse_conflict( materialized.as_bytes(), - conflict.num_sides() + conflict.num_sides(), + MIN_CONFLICT_MARKER_LEN ),@"None"); } @@ -795,7 +798,8 @@ line 3 line 4 line 5 "}, - 2 + 2, + 7 ), None ); @@ -817,7 +821,8 @@ fn test_parse_conflict_simple() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -853,7 +858,8 @@ fn test_parse_conflict_simple() { >>>>>>> More and more text line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -892,7 +898,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -931,7 +938,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -969,7 +977,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1005,7 +1014,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1027,8 +1037,8 @@ fn test_parse_conflict_simple() { ) "# ); - // The conflict markers are too long and shouldn't parse (though we may - // decide to change this in the future) + // The conflict markers are longer than the originally materialized markers, but + // we allow them to parse anyway insta::assert_debug_snapshot!( parse_conflict(indoc! {b" line 1 @@ -1043,9 +1053,28 @@ fn test_parse_conflict_simple() { >>>>>>>>>>> line 5 "}, - 2 + 2, + 7 ), - @"None" + @r#" + Some( + [ + Resolved( + "line 1\n", + ), + Conflicted( + [ + "line 2\nleft\nline 4\n", + "line 2\nline 3\nline 4\n", + "right\n", + ], + ), + Resolved( + "line 5\n", + ), + ], + ) + "# ); } @@ -1071,7 +1100,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1114,7 +1144,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1160,7 +1191,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r#" Some( @@ -1203,7 +1235,8 @@ fn test_parse_conflict_crlf_markers() { >>>>>>>\r line 5\r "}, - 2 + 2, + 7 ), @r#" Some( @@ -1248,7 +1281,8 @@ fn test_parse_conflict_diff_stripped_whitespace() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1290,7 +1324,8 @@ fn test_parse_conflict_wrong_arity() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1311,7 +1346,8 @@ fn test_parse_conflict_malformed_missing_removes() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1334,7 +1370,8 @@ fn test_parse_conflict_malformed_marker() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1358,7 +1395,8 @@ fn test_parse_conflict_malformed_diff() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1380,7 +1418,8 @@ fn test_parse_conflict_snapshot_missing_header() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1400,7 +1439,8 @@ fn test_parse_conflict_wrong_git_style() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1422,7 +1462,8 @@ fn test_parse_conflict_git_reordered_headers() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1448,7 +1489,8 @@ fn test_parse_conflict_git_too_many_sides() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1471,7 +1513,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1489,7 +1532,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1506,7 +1550,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Conflict 1 of 1 ends line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1541,7 +1586,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Side #2 (Conflict 1 of 1 ends) line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1810,6 +1856,320 @@ fn test_update_conflict_from_content_simplified_conflict() { ); } +#[test] +fn test_update_conflict_from_content_with_long_markers() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + // Create conflicts which contain conflict markers of varying lengths + let path = RepoPath::from_internal_string("dir/file"); + let base_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 + line 3 + "}, + ); + let left_file_id = testutils::write_file( + store, + path, + indoc! {" + <<<< left 1 + line 2 + <<<<<<<<<<<< left 3 + "}, + ); + let right_file_id = testutils::write_file( + store, + path, + indoc! {" + >>>>>>> right 1 + line 2 + >>>>>>>>>>>> right 3 + "}, + ); + let conflict = Merge::from_removes_adds( + vec![Some(base_file_id.clone())], + vec![Some(left_file_id.clone()), Some(right_file_id.clone())], + ); + + // The conflict should be materialized using long conflict markers + let materialized = + materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Snapshot); + insta::assert_snapshot!(materialized, @r##" + <<<<<<<<<<<<<<<< Conflict 1 of 2 + ++++++++++++++++ Contents of side #1 + <<<< left 1 + ---------------- Contents of base + line 1 + ++++++++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>>>>>>> Conflict 1 of 2 ends + line 2 + <<<<<<<<<<<<<<<< Conflict 2 of 2 + ++++++++++++++++ Contents of side #1 + <<<<<<<<<<<< left 3 + ---------------- Contents of base + line 3 + ++++++++++++++++ Contents of side #2 + >>>>>>>>>>>> right 3 + >>>>>>>>>>>>>>>> Conflict 2 of 2 ends + "## + ); + + // Parse the conflict markers using a different conflict marker style. This is + // to avoid the two versions of the file being obviously identical, so that we + // can test the actual parsing logic. + let parse = |conflict, content| { + update_from_content(conflict, store, path, content, ConflictMarkerStyle::Diff) + .block_on() + .unwrap() + }; + assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); + + // Test resolving the conflict, leaving some fake conflict markers which should + // not be parsed since they are too short + let resolved_file_contents = indoc! {" + <<<<<<<<<<<< not a real conflict! + ++++++++++++ + left + ------------ + base + ++++++++++++ + right + >>>>>>>>>>>> + "}; + let resolved_file_id = testutils::write_file(store, path, resolved_file_contents); + assert_eq!( + parse(&conflict, resolved_file_contents.as_bytes()), + Merge::normal(resolved_file_id) + ); + + // Resolve one of the conflicts, decreasing the minimum conflict marker length + let new_conflict_contents = indoc! {" + <<<<<<<<<<<<<<<< Conflict 1 of 2 + ++++++++++++++++ Contents of side #1 + <<<< left 1 + ---------------- Contents of base + line 1 + ++++++++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>>>>>>> Conflict 1 of 2 ends + line 2 + line 3 + "}; + + // Confirm that the new conflict parsed correctly + let new_conflict = parse(&conflict, new_conflict_contents.as_bytes()); + assert_eq!(new_conflict.num_sides(), 2); + let new_conflict_terms = new_conflict + .iter() + .map(|id| { + let mut file = store.read_file(path, id.as_ref().unwrap()).unwrap(); + let mut buf = String::new(); + file.read_to_string(&mut buf).unwrap(); + buf + }) + .collect_vec(); + let [new_left_side, new_base, new_right_side] = new_conflict_terms.as_slice() else { + unreachable!() + }; + insta::assert_snapshot!(new_left_side, @r#" + <<<< left 1 + line 2 + line 3 + "#); + insta::assert_snapshot!(new_base, @r#" + line 1 + line 2 + line 3 + "#); + insta::assert_snapshot!(new_right_side, @r#" + >>>>>>> right 1 + line 2 + line 3 + "#); + + // The conflict markers should still parse in future snapshots even though + // they're now longer than necessary + assert_eq!( + parse(&new_conflict, new_conflict_contents.as_bytes()), + new_conflict + ); + + // If the new conflict is materialized again, it should have shorter + // conflict markers now + insta::assert_snapshot!( + materialize_conflict_string(store, path, &new_conflict, ConflictMarkerStyle::Snapshot), + @r##" + <<<<<<<<<<< Conflict 1 of 1 + +++++++++++ Contents of side #1 + <<<< left 1 + ----------- Contents of base + line 1 + +++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>> Conflict 1 of 1 ends + line 2 + line 3 + "## + ); +} + +#[test] +fn test_update_from_content_malformed_conflict() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("dir/file"); + let base_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 + line 3 + line 4 + line 5 + "}, + ); + let left_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 left + line 3 + line 4 left + line 5 + "}, + ); + let right_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 right + line 3 + line 4 right + line 5 + "}, + ); + let conflict = Merge::from_removes_adds( + vec![Some(base_file_id.clone())], + vec![Some(left_file_id.clone()), Some(right_file_id.clone())], + ); + + let materialized = + materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); + insta::assert_snapshot!(materialized, @r##" + line 1 + <<<<<<< Conflict 1 of 2 + %%%%%%% Changes from base to side #1 + -line 2 + +line 2 left + +++++++ Contents of side #2 + line 2 right + >>>>>>> Conflict 1 of 2 ends + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + +++++++ Contents of side #2 + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "## + ); + + let parse = |conflict, content| { + update_from_content(conflict, store, path, content, ConflictMarkerStyle::Diff) + .block_on() + .unwrap() + }; + assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); + + // Make a change to the second conflict that causes it to become invalid + let new_conflict_contents = indoc! {" + line 1 + <<<<<<< Conflict 1 of 2 + %%%%%%% Changes from base to side #1 + -line 2 + +line 2 left + +++++++ Contents of side #2 + line 2 right + >>>>>>> Conflict 1 of 2 ends + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "}; + // On the first snapshot, it will parse as a conflict containing conflict + // markers as text + let new_conflict = parse(&conflict, new_conflict_contents.as_bytes()); + assert_eq!(new_conflict.num_sides(), 2); + let new_conflict_terms = new_conflict + .iter() + .map(|id| { + let mut file = store.read_file(path, id.as_ref().unwrap()).unwrap(); + let mut buf = String::new(); + file.read_to_string(&mut buf).unwrap(); + buf + }) + .collect_vec(); + let [new_left_side, new_base, new_right_side] = new_conflict_terms.as_slice() else { + unreachable!() + }; + insta::assert_snapshot!(new_left_side, @r##" + line 1 + line 2 left + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + insta::assert_snapshot!(new_base, @r##" + line 1 + line 2 + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + insta::assert_snapshot!(new_right_side, @r##" + line 1 + line 2 right + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + + // Snapshotting again will cause the conflict to appear resolved + let second_snapshot = parse(&new_conflict, new_conflict_contents.as_bytes()); + assert_ne!(second_snapshot, new_conflict); + assert!(second_snapshot.is_resolved()); +} + fn materialize_conflict_string( store: &Store, path: &RepoPath, From f2d6c289956a095e316a4b6fa572d33187a878b7 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Wed, 27 Nov 2024 14:15:49 -0600 Subject: [PATCH 3/3] local_working_copy: store materialized conflict marker length Storing the conflict marker length in the working copy makes conflict parsing more consistent, and it allows us to parse valid conflict hunks even if the user left some invalid conflict markers in the file while resolving the conflicts. --- cli/src/commands/debug/local_working_copy.rs | 3 +- cli/src/merge_tools/external.rs | 20 ++- cli/tests/test_working_copy.rs | 180 +++++++++++++++++++ lib/src/conflicts.rs | 26 ++- lib/src/local_working_copy.rs | 106 +++++++++-- lib/src/protos/working_copy.proto | 6 + lib/src/protos/working_copy.rs | 9 + lib/tests/test_conflicts.rs | 105 ++++++++--- 8 files changed, 412 insertions(+), 43 deletions(-) diff --git a/cli/src/commands/debug/local_working_copy.rs b/cli/src/commands/debug/local_working_copy.rs index 98408799b2..eee9a8de51 100644 --- a/cli/src/commands/debug/local_working_copy.rs +++ b/cli/src/commands/debug/local_working_copy.rs @@ -40,10 +40,11 @@ pub fn cmd_debug_local_working_copy( for (file, state) in wc.file_states()? { writeln!( ui.stdout(), - "{:?} {:13?} {:10?} {:?}", + "{:?} {:13?} {:10?} {:?} {:?}", state.file_type, state.size, state.mtime.0, + state.materialized_conflict_data, file )?; } diff --git a/cli/src/merge_tools/external.rs b/cli/src/merge_tools/external.rs index ff4db30619..5f388976c9 100644 --- a/cli/src/merge_tools/external.rs +++ b/cli/src/merge_tools/external.rs @@ -12,8 +12,10 @@ use jj_lib::backend::FileId; use jj_lib::backend::MergedTreeId; use jj_lib::backend::TreeValue; use jj_lib::conflicts; -use jj_lib::conflicts::materialize_merge_result_to_bytes; +use jj_lib::conflicts::choose_materialized_conflict_marker_len; +use jj_lib::conflicts::materialize_merge_result_to_bytes_with_marker_len; use jj_lib::conflicts::ConflictMarkerStyle; +use jj_lib::conflicts::MIN_CONFLICT_MARKER_LEN; use jj_lib::gitignore::GitIgnoreFile; use jj_lib::matchers::Matcher; use jj_lib::merge::Merge; @@ -181,8 +183,21 @@ pub fn run_mergetool_external( .conflict_marker_style .unwrap_or(default_conflict_marker_style); + // If the merge tool doesn't get conflict markers pre-populated in the output + // file, we should default to accepting MIN_CONFLICT_MARKER_LEN since the + // merge tool is unlikely to know about our rules for conflict marker length. + // In the future, we may want to add a "$markerLength" variable. + let conflict_marker_len = if editor.merge_tool_edits_conflict_markers { + choose_materialized_conflict_marker_len(&content) + } else { + MIN_CONFLICT_MARKER_LEN + }; let initial_output_content = if editor.merge_tool_edits_conflict_markers { - materialize_merge_result_to_bytes(&content, conflict_marker_style) + materialize_merge_result_to_bytes_with_marker_len( + &content, + conflict_marker_style, + conflict_marker_len, + ) } else { BString::default() }; @@ -257,6 +272,7 @@ pub fn run_mergetool_external( repo_path, output_file_contents.as_slice(), conflict_marker_style, + conflict_marker_len, ) .block_on()? } else { diff --git a/cli/tests/test_working_copy.rs b/cli/tests/test_working_copy.rs index c037675cf9..43407df29d 100644 --- a/cli/tests/test_working_copy.rs +++ b/cli/tests/test_working_copy.rs @@ -13,6 +13,7 @@ // limitations under the License. use indoc::indoc; +use regex::Regex; use crate::common::TestEnvironment; @@ -249,3 +250,182 @@ fn test_snapshot_invalid_ignore_pattern() { 2: invalid utf-8 sequence of 1 bytes from index 0 "##); } + +#[test] +fn test_conflict_marker_length_stored_in_working_copy() { + let test_env = TestEnvironment::default(); + test_env.jj_cmd_ok(test_env.env_root(), &["git", "init", "repo"]); + let repo_path = test_env.env_root().join("repo"); + + // Create a conflict in the working copy with long markers on one side + let conflict_file = repo_path.join("file"); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + line 2 + line 3 + "}, + ) + .unwrap(); + test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "base"]); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + line 2 - left + line 3 - left + "}, + ) + .unwrap(); + test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "side-a"]); + test_env.jj_cmd_ok(&repo_path, &["new", "description(base)", "-m", "side-b"]); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + ======= fake marker + line 2 - right + ======= fake marker + line 3 + "}, + ) + .unwrap(); + test_env.jj_cmd_ok( + &repo_path, + &["new", "description(side-a)", "description(side-b)"], + ); + + // File should be materialized with long conflict markers + insta::assert_snapshot!(std::fs::read_to_string(&conflict_file).unwrap(), @r##" + line 1 + <<<<<<<<<<< Conflict 1 of 1 + %%%%%%%%%%% Changes from base to side #1 + -line 2 + -line 3 + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + ======= fake marker + line 2 - right + ======= fake marker + line 3 + >>>>>>>>>>> Conflict 1 of 1 ends + "##); + + // The timestamps in the `jj debug local-working-copy` output change, so we want + // to remove them before asserting the snapshot + let timestamp_regex = Regex::new(r"\b\d{10,}\b").unwrap(); + // On Windows, executable is always `()`, but on Unix-like systems, it's `true` + // or `false`, so we want to remove it from the output as well + let executable_regex = Regex::new("executable: [^ ]+").unwrap(); + + let redact_output = |output: &str| { + let output = timestamp_regex.replace_all(output, ""); + let output = executable_regex.replace_all(&output, ""); + output.into_owned() + }; + + // Working copy should contain conflict marker length + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("6feb53603f9f7324085d2d89dca19a6dac93fef6795cfd5d57090ff803d404ab1196b45d5b97faa641f6a78302ac0fbd149f5e5a880d1fd64d6520c31beab213") + Current tree: Merge(Conflicted([TreeId("381273b50cf73f8c81b3f1502ee89e9bbd6c1518"), TreeId("771f3d31c4588ea40a8864b2a981749888e596c2"), TreeId("f56b8223da0dab22b03b8323ced4946329aeb4e0")])) + Normal { } 249 Some(MaterializedConflictData { conflict_marker_len: 11 }) "file" + "#); + + // Update the conflict with more fake markers, and it should still parse + // correctly (the markers should be ignored) + std::fs::write( + &conflict_file, + indoc! {" + line 1 + <<<<<<<<<<< Conflict 1 of 1 + %%%%%%%%%%% Changes from base to side #1 + -line 2 + -line 3 + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + <<<<<<< fake marker + ||||||| fake marker + line 2 - right + ======= fake marker + line 3 + >>>>>>> fake marker + >>>>>>>>>>> Conflict 1 of 1 ends + "}, + ) + .unwrap(); + + // The file should still be conflicted, and the new content should be saved + let stdout = test_env.jj_cmd_success(&repo_path, &["st"]); + insta::assert_snapshot!(stdout, @r#" + Working copy changes: + M file + There are unresolved conflicts at these paths: + file 2-sided conflict + Working copy : mzvwutvl 3a981880 (conflict) (no description set) + Parent commit: rlvkpnrz ce613b49 side-a + Parent commit: zsuskuln 7b2b03ab side-b + "#); + insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["diff", "--git"]), @r##" + diff --git a/file b/file + --- a/file + +++ b/file + @@ -6,8 +6,10 @@ + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + -======= fake marker + +<<<<<<< fake marker + +||||||| fake marker + line 2 - right + ======= fake marker + line 3 + +>>>>>>> fake marker + >>>>>>>>>>> Conflict 1 of 1 ends + "##); + + // Working copy should still contain conflict marker length + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("205bc702428a522e0b175938a51c51b59741c854a609ba63c89de76ffda6e5eff6fcc00725328b1a91f448401769773cefcff01fac3448190d2cea4e137d2166") + Current tree: Merge(Conflicted([TreeId("381273b50cf73f8c81b3f1502ee89e9bbd6c1518"), TreeId("771f3d31c4588ea40a8864b2a981749888e596c2"), TreeId("3329c18c95f7b7a55c278c2259e9c4ce711fae59")])) + Normal { } 289 Some(MaterializedConflictData { conflict_marker_len: 11 }) "file" + "#); + + // Resolve the conflict + std::fs::write( + &conflict_file, + indoc! {" + line 1 + <<<<<<< fake marker + ||||||| fake marker + line 2 - left + line 2 - right + ======= fake marker + line 3 - left + >>>>>>> fake marker + "}, + ) + .unwrap(); + + let stdout = test_env.jj_cmd_success(&repo_path, &["st"]); + insta::assert_snapshot!(stdout, @r#" + Working copy changes: + M file + Working copy : mzvwutvl 1aefd866 (no description set) + Parent commit: rlvkpnrz ce613b49 side-a + Parent commit: zsuskuln 7b2b03ab side-b + "#); + + // When the file is resolved, the conflict marker length is removed from the + // working copy + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("2206ce3c108b1573df0841138c226bba1ab3cff900a5899ed31ac69162c7d6f30d37fb5ab43da60dba88047b8ab22d453887fff688f26dfcf04f2c99420a5563") + Current tree: Merge(Resolved(TreeId("6120567b3cb2472d549753ed3e4b84183d52a650"))) + Normal { } 130 None "file" + "#); +} diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 0438f802ac..1a081c43fe 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -344,7 +344,7 @@ pub fn materialize_merge_result>( } } -fn materialize_merge_result_with_marker_len>( +pub fn materialize_merge_result_with_marker_len>( single_hunk: &Merge, conflict_marker_style: ConflictMarkerStyle, conflict_marker_len: usize, @@ -381,6 +381,28 @@ pub fn materialize_merge_result_to_bytes>( } } +pub fn materialize_merge_result_to_bytes_with_marker_len>( + single_hunk: &Merge, + conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, +) -> BString { + let merge_result = files::merge(single_hunk); + match merge_result { + MergeResult::Resolved(content) => content, + MergeResult::Conflict(hunks) => { + let mut output = Vec::new(); + materialize_conflict_hunks( + &hunks, + conflict_marker_style, + conflict_marker_len, + &mut output, + ) + .expect("writing to an in-memory buffer should never fail"); + output.into() + } + } +} + fn materialize_conflict_hunks( hunks: &[Merge], conflict_marker_style: ConflictMarkerStyle, @@ -824,6 +846,7 @@ pub async fn update_from_content( path: &RepoPath, content: &[u8], conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, ) -> BackendResult>> { let simplified_file_ids = file_ids.clone().simplify(); let simplified_file_ids = &simplified_file_ids; @@ -835,7 +858,6 @@ pub async fn update_from_content( // copy. let mut old_content = Vec::with_capacity(content.len()); let merge_hunk = extract_as_single_hunk(simplified_file_ids, store, path).await?; - let conflict_marker_len = choose_materialized_conflict_marker_len(&merge_hunk); materialize_merge_result_with_marker_len( &merge_hunk, conflict_marker_style, diff --git a/lib/src/local_working_copy.rs b/lib/src/local_working_copy.rs index f07af6d0ee..5fa8251abd 100644 --- a/lib/src/local_working_copy.rs +++ b/lib/src/local_working_copy.rs @@ -66,10 +66,12 @@ use crate::backend::TreeId; use crate::backend::TreeValue; use crate::commit::Commit; use crate::conflicts; -use crate::conflicts::materialize_merge_result_to_bytes; +use crate::conflicts::choose_materialized_conflict_marker_len; +use crate::conflicts::materialize_merge_result_to_bytes_with_marker_len; use crate::conflicts::materialize_tree_value; use crate::conflicts::ConflictMarkerStyle; use crate::conflicts::MaterializedTreeValue; +use crate::conflicts::MIN_CONFLICT_MARKER_LEN; use crate::file_util::check_symlink_support; use crate::file_util::try_symlink; #[cfg(feature = "watchman")] @@ -125,11 +127,17 @@ pub enum FileType { GitSubmodule, } +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct MaterializedConflictData { + pub conflict_marker_len: u32, +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct FileState { pub file_type: FileType, pub mtime: MillisSinceEpoch, pub size: u64, + pub materialized_conflict_data: Option, /* TODO: What else do we need here? Git stores a lot of fields. * TODO: Could possibly handle case-insensitive file systems keeping an * Option with the actual path here. */ @@ -147,10 +155,16 @@ impl FileState { file_type: FileType::Normal { executable }, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, } } - fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self { + fn for_file( + executable: bool, + size: u64, + metadata: &Metadata, + materialized_conflict_data: Option, + ) -> Self { #[cfg(windows)] let executable = { // Windows doesn't support executable bit. @@ -160,6 +174,7 @@ impl FileState { file_type: FileType::Normal { executable }, mtime: mtime_from_metadata(metadata), size, + materialized_conflict_data, } } @@ -171,6 +186,7 @@ impl FileState { file_type: FileType::Symlink, mtime: mtime_from_metadata(metadata), size: metadata.len(), + materialized_conflict_data: None, } } @@ -179,6 +195,7 @@ impl FileState { file_type: FileType::GitSubmodule, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, } } } @@ -418,6 +435,11 @@ fn file_state_from_proto(proto: &crate::protos::working_copy::FileState) -> File file_type, mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch), size: proto.size, + materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| { + MaterializedConflictData { + conflict_marker_len: data.conflict_marker_len, + } + }), } } @@ -436,6 +458,11 @@ fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::F proto.file_type = file_type as i32; proto.mtime_millis_since_epoch = file_state.mtime.0; proto.size = file_state.size; + proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| { + crate::protos::working_copy::MaterializedConflictData { + conflict_marker_len: data.conflict_marker_len, + } + }); proto } @@ -651,7 +678,10 @@ fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch { ) } -fn file_state(metadata: &Metadata) -> Option { +fn file_state( + metadata: &Metadata, + materialized_conflict_data: Option, +) -> Option { let metadata_file_type = metadata.file_type(); let file_type = if metadata_file_type.is_dir() { None @@ -676,6 +706,7 @@ fn file_state(metadata: &Metadata) -> Option { file_type, mtime, size, + materialized_conflict_data, } }) } @@ -1250,8 +1281,12 @@ impl FileSnapshotter<'_> { max_size: self.max_new_file_size, }; self.untracked_paths_tx.send((path, reason)).ok(); - Ok(None) - } else if let Some(new_file_state) = file_state(&metadata) { + return Ok(None); + } + let materialized_conflict_data = maybe_current_file_state + .as_ref() + .and_then(|state| state.materialized_conflict_data); + if let Some(new_file_state) = file_state(&metadata, materialized_conflict_data) { self.process_present_file( path, &entry.path(), @@ -1286,7 +1321,9 @@ impl FileSnapshotter<'_> { }); } }; - if let Some(new_file_state) = metadata.as_ref().and_then(file_state) { + if let Some(new_file_state) = metadata.as_ref().and_then(|metadata| { + file_state(metadata, current_file_state.materialized_conflict_data) + }) { self.process_present_file( tracked_path.to_owned(), &disk_path, @@ -1305,13 +1342,13 @@ impl FileSnapshotter<'_> { path: RepoPathBuf, disk_path: &Path, maybe_current_file_state: Option<&FileState>, - new_file_state: FileState, + mut new_file_state: FileState, ) -> Result<(), SnapshotError> { let update = self.get_updated_tree_value( &path, disk_path, maybe_current_file_state, - &new_file_state, + &mut new_file_state, )?; if let Some(tree_value) = update { self.tree_entries_tx.send((path.clone(), tree_value)).ok(); @@ -1360,7 +1397,7 @@ impl FileSnapshotter<'_> { repo_path: &RepoPath, disk_path: &Path, maybe_current_file_state: Option<&FileState>, - new_file_state: &FileState, + new_file_state: &mut FileState, ) -> Result, SnapshotError> { let clean = match maybe_current_file_state { None => { @@ -1391,7 +1428,13 @@ impl FileSnapshotter<'_> { }; let new_tree_values = match new_file_type { FileType::Normal { executable } => self - .write_path_to_store(repo_path, disk_path, ¤t_tree_values, executable) + .write_path_to_store( + repo_path, + disk_path, + ¤t_tree_values, + new_file_state, + executable, + ) .block_on()?, FileType::Symlink => { let id = self @@ -1418,6 +1461,7 @@ impl FileSnapshotter<'_> { repo_path: &RepoPath, disk_path: &Path, current_tree_values: &MergedTreeValue, + new_file_state: &mut FileState, executable: FileExecutableFlag, ) -> Result { if let Some(current_tree_value) = current_tree_values.as_resolved() { @@ -1449,6 +1493,11 @@ impl FileSnapshotter<'_> { repo_path, &content, self.conflict_marker_style, + new_file_state + .materialized_conflict_data + .map_or(MIN_CONFLICT_MARKER_LEN, |data| { + data.conflict_marker_len as usize + }), ) .block_on()?; match new_file_ids.into_resolved() { @@ -1463,6 +1512,8 @@ impl FileSnapshotter<'_> { false } }; + // Clear materialized conflict data if the file was resolved + new_file_state.materialized_conflict_data = None; Ok(Merge::normal(TreeValue::File { id: file_id.unwrap(), executable, @@ -1552,7 +1603,7 @@ impl TreeState { let metadata = file .metadata() .map_err(|err| checkout_error_for_stat_error(err, disk_path))?; - Ok(FileState::for_file(executable, size, &metadata)) + Ok(FileState::for_file(executable, size, &metadata, None)) } fn write_symlink(&self, disk_path: &Path, target: String) -> Result { @@ -1576,6 +1627,7 @@ impl TreeState { disk_path: &Path, conflict_data: Vec, executable: bool, + materialized_conflict_data: Option, ) -> Result { let mut file = OpenOptions::new() .write(true) @@ -1595,7 +1647,12 @@ impl TreeState { let metadata = file .metadata() .map_err(|err| checkout_error_for_stat_error(err, disk_path))?; - Ok(FileState::for_file(executable, size, &metadata)) + Ok(FileState::for_file( + executable, + size, + &metadata, + materialized_conflict_data, + )) } #[cfg_attr(windows, allow(unused_variables))] @@ -1786,16 +1843,29 @@ impl TreeState { contents, executable, } => { - let data = - materialize_merge_result_to_bytes(&contents, conflict_marker_style).into(); - self.write_conflict(&disk_path, data, executable)? + let conflict_marker_len = choose_materialized_conflict_marker_len(&contents); + let data = materialize_merge_result_to_bytes_with_marker_len( + &contents, + conflict_marker_style, + conflict_marker_len, + ) + .into(); + let materialized_conflict_data = MaterializedConflictData { + conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX), + }; + self.write_conflict( + &disk_path, + data, + executable, + Some(materialized_conflict_data), + )? } MaterializedTreeValue::OtherConflict { id } => { // Unless all terms are regular files, we can't do much // better than trying to describe the merge. let data = id.describe().into_bytes(); let executable = false; - self.write_conflict(&disk_path, data, executable)? + self.write_conflict(&disk_path, data, executable, None)? } }; changed_file_states.push((path, file_state)); @@ -1851,6 +1921,7 @@ impl TreeState { file_type, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, }; changed_file_states.push((path, file_state)); } @@ -2310,6 +2381,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size)); let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size)); @@ -2358,6 +2430,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_proto_entry = |path: &str, size| { file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size)) @@ -2422,6 +2495,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_proto_entry = |path: &str, size| { file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size)) diff --git a/lib/src/protos/working_copy.proto b/lib/src/protos/working_copy.proto index 634c25da2e..7ed5c21a9e 100644 --- a/lib/src/protos/working_copy.proto +++ b/lib/src/protos/working_copy.proto @@ -24,12 +24,18 @@ enum FileType { GitSubmodule = 4; } +message MaterializedConflictData { + // TODO: maybe we should store num_sides here as well + uint32 conflict_marker_len = 1; +} + message FileState { int64 mtime_millis_since_epoch = 1; uint64 size = 2; FileType file_type = 3; // Set only if file_type is Conflict bytes conflict_id = 4 [deprecated = true]; + MaterializedConflictData materialized_conflict_data = 5; } message FileStateEntry { diff --git a/lib/src/protos/working_copy.rs b/lib/src/protos/working_copy.rs index 90c1bfe676..50621c7164 100644 --- a/lib/src/protos/working_copy.rs +++ b/lib/src/protos/working_copy.rs @@ -1,6 +1,13 @@ // This file is @generated by prost-build. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct MaterializedConflictData { + /// TODO: maybe we should store num_sides here as well + #[prost(uint32, tag = "1")] + pub conflict_marker_len: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct FileState { #[prost(int64, tag = "1")] pub mtime_millis_since_epoch: i64, @@ -12,6 +19,8 @@ pub struct FileState { #[deprecated] #[prost(bytes = "vec", tag = "4")] pub conflict_id: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "5")] + pub materialized_conflict_data: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index d62f8e1eac..cd2ace3e6d 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -15,6 +15,7 @@ use indoc::indoc; use itertools::Itertools; use jj_lib::backend::FileId; +use jj_lib::conflicts::choose_materialized_conflict_marker_len; use jj_lib::conflicts::extract_as_single_hunk; use jj_lib::conflicts::materialize_merge_result_to_bytes; use jj_lib::conflicts::parse_conflict; @@ -590,10 +591,16 @@ fn test_materialize_parse_roundtrip_different_markers() { for materialize_style in all_styles { let materialized = materialize_conflict_string(store, path, &conflict, materialize_style); for parse_style in all_styles { - let parsed = - update_from_content(&conflict, store, path, materialized.as_bytes(), parse_style) - .block_on() - .unwrap(); + let parsed = update_from_content( + &conflict, + store, + path, + materialized.as_bytes(), + parse_style, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap(); assert_eq!( parsed, conflict, @@ -1630,9 +1637,16 @@ fn test_update_conflict_from_content() { let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; assert_eq!(parse(materialized.as_bytes()), conflict); @@ -1680,9 +1694,16 @@ fn test_update_conflict_from_content_modify_delete() { let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; assert_eq!(parse(materialized.as_bytes()), conflict); @@ -1736,9 +1757,16 @@ fn test_update_conflict_from_content_simplified_conflict() { let materialized_simplified = materialize_conflict_string(store, path, &simplified_conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; insta::assert_snapshot!( materialized, @@ -1896,6 +1924,12 @@ fn test_update_conflict_from_content_with_long_markers() { ); // The conflict should be materialized using long conflict markers + let materialized_marker_len = choose_materialized_conflict_marker_len( + &extract_as_single_hunk(&conflict, store, path) + .block_on() + .unwrap(), + ); + assert!(materialized_marker_len > MIN_CONFLICT_MARKER_LEN); let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Snapshot); insta::assert_snapshot!(materialized, @r##" @@ -1923,9 +1957,16 @@ fn test_update_conflict_from_content_with_long_markers() { // to avoid the two versions of the file being obviously identical, so that we // can test the actual parsing logic. let parse = |conflict, content| { - update_from_content(conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + materialized_marker_len, + ) + .block_on() + .unwrap() }; assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); @@ -1999,6 +2040,11 @@ fn test_update_conflict_from_content_with_long_markers() { new_conflict ); + // If we add back the second conflict, it should still be parsed correctly + // (the fake conflict markers shouldn't be interpreted as conflict markers + // still, since they aren't the longest ones in the file). + assert_eq!(parse(&new_conflict, materialized.as_bytes()), conflict); + // If the new conflict is materialized again, it should have shorter // conflict markers now insta::assert_snapshot!( @@ -2062,6 +2108,14 @@ fn test_update_from_content_malformed_conflict() { vec![Some(left_file_id.clone()), Some(right_file_id.clone())], ); + // The conflict should be materialized with normal markers + let materialized_marker_len = choose_materialized_conflict_marker_len( + &extract_as_single_hunk(&conflict, store, path) + .block_on() + .unwrap(), + ); + assert!(materialized_marker_len == MIN_CONFLICT_MARKER_LEN); + let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); insta::assert_snapshot!(materialized, @r##" @@ -2086,9 +2140,16 @@ fn test_update_from_content_malformed_conflict() { ); let parse = |conflict, content| { - update_from_content(conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + materialized_marker_len, + ) + .block_on() + .unwrap() }; assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); @@ -2164,10 +2225,10 @@ fn test_update_from_content_malformed_conflict() { line 5 "##); - // Snapshotting again will cause the conflict to appear resolved + // Even though the file now contains markers of length 7, the materialized + // markers of length 7 are still parsed let second_snapshot = parse(&new_conflict, new_conflict_contents.as_bytes()); - assert_ne!(second_snapshot, new_conflict); - assert!(second_snapshot.is_resolved()); + assert_eq!(second_snapshot, new_conflict); } fn materialize_conflict_string(