diff --git a/CHANGELOG.md b/CHANGELOG.md index c94e2cbe4c..e862858f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * A new Email template type is added. `Signature.email()` now returns an Email template type instead of a String. +* Conflict markers are now allowed to be longer than 7 characters, allowing + conflicts to be materialized and parsed correctly in files which already + contain lines which look like conflict markers. + ### Fixed bugs * The `$NO_COLOR` environment variable must now be non-empty to be respected. diff --git a/cli/src/commands/debug/local_working_copy.rs b/cli/src/commands/debug/local_working_copy.rs index 98408799b2..eee9a8de51 100644 --- a/cli/src/commands/debug/local_working_copy.rs +++ b/cli/src/commands/debug/local_working_copy.rs @@ -40,10 +40,11 @@ pub fn cmd_debug_local_working_copy( for (file, state) in wc.file_states()? { writeln!( ui.stdout(), - "{:?} {:13?} {:10?} {:?}", + "{:?} {:13?} {:10?} {:?} {:?}", state.file_type, state.size, state.mtime.0, + state.materialized_conflict_data, file )?; } diff --git a/cli/src/merge_tools/external.rs b/cli/src/merge_tools/external.rs index ff4db30619..5f388976c9 100644 --- a/cli/src/merge_tools/external.rs +++ b/cli/src/merge_tools/external.rs @@ -12,8 +12,10 @@ use jj_lib::backend::FileId; use jj_lib::backend::MergedTreeId; use jj_lib::backend::TreeValue; use jj_lib::conflicts; -use jj_lib::conflicts::materialize_merge_result_to_bytes; +use jj_lib::conflicts::choose_materialized_conflict_marker_len; +use jj_lib::conflicts::materialize_merge_result_to_bytes_with_marker_len; use jj_lib::conflicts::ConflictMarkerStyle; +use jj_lib::conflicts::MIN_CONFLICT_MARKER_LEN; use jj_lib::gitignore::GitIgnoreFile; use jj_lib::matchers::Matcher; use jj_lib::merge::Merge; @@ -181,8 +183,21 @@ pub fn run_mergetool_external( .conflict_marker_style .unwrap_or(default_conflict_marker_style); + // If the merge tool doesn't get conflict markers pre-populated in the output + // file, we should default to accepting MIN_CONFLICT_MARKER_LEN since the + // merge tool is unlikely to know about our rules for conflict marker length. + // In the future, we may want to add a "$markerLength" variable. + let conflict_marker_len = if editor.merge_tool_edits_conflict_markers { + choose_materialized_conflict_marker_len(&content) + } else { + MIN_CONFLICT_MARKER_LEN + }; let initial_output_content = if editor.merge_tool_edits_conflict_markers { - materialize_merge_result_to_bytes(&content, conflict_marker_style) + materialize_merge_result_to_bytes_with_marker_len( + &content, + conflict_marker_style, + conflict_marker_len, + ) } else { BString::default() }; @@ -257,6 +272,7 @@ pub fn run_mergetool_external( repo_path, output_file_contents.as_slice(), conflict_marker_style, + conflict_marker_len, ) .block_on()? } else { diff --git a/cli/tests/test_working_copy.rs b/cli/tests/test_working_copy.rs index c037675cf9..43407df29d 100644 --- a/cli/tests/test_working_copy.rs +++ b/cli/tests/test_working_copy.rs @@ -13,6 +13,7 @@ // limitations under the License. use indoc::indoc; +use regex::Regex; use crate::common::TestEnvironment; @@ -249,3 +250,182 @@ fn test_snapshot_invalid_ignore_pattern() { 2: invalid utf-8 sequence of 1 bytes from index 0 "##); } + +#[test] +fn test_conflict_marker_length_stored_in_working_copy() { + let test_env = TestEnvironment::default(); + test_env.jj_cmd_ok(test_env.env_root(), &["git", "init", "repo"]); + let repo_path = test_env.env_root().join("repo"); + + // Create a conflict in the working copy with long markers on one side + let conflict_file = repo_path.join("file"); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + line 2 + line 3 + "}, + ) + .unwrap(); + test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "base"]); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + line 2 - left + line 3 - left + "}, + ) + .unwrap(); + test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "side-a"]); + test_env.jj_cmd_ok(&repo_path, &["new", "description(base)", "-m", "side-b"]); + std::fs::write( + &conflict_file, + indoc! {" + line 1 + ======= fake marker + line 2 - right + ======= fake marker + line 3 + "}, + ) + .unwrap(); + test_env.jj_cmd_ok( + &repo_path, + &["new", "description(side-a)", "description(side-b)"], + ); + + // File should be materialized with long conflict markers + insta::assert_snapshot!(std::fs::read_to_string(&conflict_file).unwrap(), @r##" + line 1 + <<<<<<<<<<< Conflict 1 of 1 + %%%%%%%%%%% Changes from base to side #1 + -line 2 + -line 3 + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + ======= fake marker + line 2 - right + ======= fake marker + line 3 + >>>>>>>>>>> Conflict 1 of 1 ends + "##); + + // The timestamps in the `jj debug local-working-copy` output change, so we want + // to remove them before asserting the snapshot + let timestamp_regex = Regex::new(r"\b\d{10,}\b").unwrap(); + // On Windows, executable is always `()`, but on Unix-like systems, it's `true` + // or `false`, so we want to remove it from the output as well + let executable_regex = Regex::new("executable: [^ ]+").unwrap(); + + let redact_output = |output: &str| { + let output = timestamp_regex.replace_all(output, ""); + let output = executable_regex.replace_all(&output, ""); + output.into_owned() + }; + + // Working copy should contain conflict marker length + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("6feb53603f9f7324085d2d89dca19a6dac93fef6795cfd5d57090ff803d404ab1196b45d5b97faa641f6a78302ac0fbd149f5e5a880d1fd64d6520c31beab213") + Current tree: Merge(Conflicted([TreeId("381273b50cf73f8c81b3f1502ee89e9bbd6c1518"), TreeId("771f3d31c4588ea40a8864b2a981749888e596c2"), TreeId("f56b8223da0dab22b03b8323ced4946329aeb4e0")])) + Normal { } 249 Some(MaterializedConflictData { conflict_marker_len: 11 }) "file" + "#); + + // Update the conflict with more fake markers, and it should still parse + // correctly (the markers should be ignored) + std::fs::write( + &conflict_file, + indoc! {" + line 1 + <<<<<<<<<<< Conflict 1 of 1 + %%%%%%%%%%% Changes from base to side #1 + -line 2 + -line 3 + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + <<<<<<< fake marker + ||||||| fake marker + line 2 - right + ======= fake marker + line 3 + >>>>>>> fake marker + >>>>>>>>>>> Conflict 1 of 1 ends + "}, + ) + .unwrap(); + + // The file should still be conflicted, and the new content should be saved + let stdout = test_env.jj_cmd_success(&repo_path, &["st"]); + insta::assert_snapshot!(stdout, @r#" + Working copy changes: + M file + There are unresolved conflicts at these paths: + file 2-sided conflict + Working copy : mzvwutvl 3a981880 (conflict) (no description set) + Parent commit: rlvkpnrz ce613b49 side-a + Parent commit: zsuskuln 7b2b03ab side-b + "#); + insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["diff", "--git"]), @r##" + diff --git a/file b/file + --- a/file + +++ b/file + @@ -6,8 +6,10 @@ + +line 2 - left + +line 3 - left + +++++++++++ Contents of side #2 + -======= fake marker + +<<<<<<< fake marker + +||||||| fake marker + line 2 - right + ======= fake marker + line 3 + +>>>>>>> fake marker + >>>>>>>>>>> Conflict 1 of 1 ends + "##); + + // Working copy should still contain conflict marker length + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("205bc702428a522e0b175938a51c51b59741c854a609ba63c89de76ffda6e5eff6fcc00725328b1a91f448401769773cefcff01fac3448190d2cea4e137d2166") + Current tree: Merge(Conflicted([TreeId("381273b50cf73f8c81b3f1502ee89e9bbd6c1518"), TreeId("771f3d31c4588ea40a8864b2a981749888e596c2"), TreeId("3329c18c95f7b7a55c278c2259e9c4ce711fae59")])) + Normal { } 289 Some(MaterializedConflictData { conflict_marker_len: 11 }) "file" + "#); + + // Resolve the conflict + std::fs::write( + &conflict_file, + indoc! {" + line 1 + <<<<<<< fake marker + ||||||| fake marker + line 2 - left + line 2 - right + ======= fake marker + line 3 - left + >>>>>>> fake marker + "}, + ) + .unwrap(); + + let stdout = test_env.jj_cmd_success(&repo_path, &["st"]); + insta::assert_snapshot!(stdout, @r#" + Working copy changes: + M file + Working copy : mzvwutvl 1aefd866 (no description set) + Parent commit: rlvkpnrz ce613b49 side-a + Parent commit: zsuskuln 7b2b03ab side-b + "#); + + // When the file is resolved, the conflict marker length is removed from the + // working copy + let stdout = test_env.jj_cmd_success(&repo_path, &["debug", "local-working-copy"]); + insta::assert_snapshot!(redact_output(&stdout), @r#" + Current operation: OperationId("2206ce3c108b1573df0841138c226bba1ab3cff900a5899ed31ac69162c7d6f30d37fb5ab43da60dba88047b8ab22d453887fff688f26dfcf04f2c99420a5563") + Current tree: Merge(Resolved(TreeId("6120567b3cb2472d549753ed3e4b84183d52a650"))) + Normal { } 130 None "file" + "#); +} diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 32997363c1..1a081c43fe 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -28,8 +28,6 @@ use futures::StreamExt; use futures::TryStreamExt; use itertools::Itertools; use pollster::FutureExt; -use regex::bytes::Regex; -use regex::bytes::RegexBuilder; use crate::backend::BackendError; use crate::backend::BackendResult; @@ -51,49 +49,31 @@ use crate::merge::MergedTreeValue; use crate::repo_path::RepoPath; use crate::store::Store; -const CONFLICT_START_LINE: &str = "<<<<<<<"; -const CONFLICT_END_LINE: &str = ">>>>>>>"; -const CONFLICT_DIFF_LINE: &str = "%%%%%%%"; -const CONFLICT_MINUS_LINE: &str = "-------"; -const CONFLICT_PLUS_LINE: &str = "+++++++"; -const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||"; -const CONFLICT_GIT_SEPARATOR_LINE: &str = "======="; -const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0]; -const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0]; -const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0]; -const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0]; -const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0]; -const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0]; -const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0]; - -/// A conflict marker is one of the separators, optionally followed by a space -/// and some text. -// TODO: All the `{7}` could be replaced with `{7,}` to allow longer -// separators. This could be useful to make it possible to allow conflict -// markers inside the text of the conflicts. -static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$") - .multi_line(true) - .build() - .unwrap() -}); +/// Minimum length of conflict markers. +pub const MIN_CONFLICT_MARKER_LEN: usize = 7; + +/// If a file already contains lines which look like conflict markers of length +/// N, then the conflict markers we add will be of length (N + increment). This +/// number is chosen to make the conflict markers noticeably longer than the +/// existing markers. +const CONFLICT_MARKER_LEN_INCREMENT: usize = 4; fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { for hunk in hunks { match hunk.kind { DiffHunkKind::Matching => { debug_assert!(hunk.contents.iter().all_equal()); - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b" ")?; file.write_all(line)?; } } DiffHunkKind::Different => { - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b"-")?; file.write_all(line)?; } - for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[1].lines_with_terminator() { file.write_all(b"+")?; file.write_all(line)?; } @@ -250,6 +230,105 @@ pub enum ConflictMarkerStyle { Git, } +/// Characters which can be repeated to form a conflict marker line when +/// materializing and parsing conflicts. +#[derive(Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum ConflictMarkerLineChar { + ConflictStart = b'<', + ConflictEnd = b'>', + Add = b'+', + Remove = b'-', + Diff = b'%', + GitAncestor = b'|', + GitSeparator = b'=', +} + +impl ConflictMarkerLineChar { + /// Get the ASCII byte used for this conflict marker. + fn to_byte(self) -> u8 { + self as u8 + } + + /// Parse a byte to see if it corresponds with any kind of conflict marker. + fn parse_byte(byte: u8) -> Option { + match byte { + b'<' => Some(Self::ConflictStart), + b'>' => Some(Self::ConflictEnd), + b'+' => Some(Self::Add), + b'-' => Some(Self::Remove), + b'%' => Some(Self::Diff), + b'|' => Some(Self::GitAncestor), + b'=' => Some(Self::GitSeparator), + _ => None, + } + } +} + +/// Represents a conflict marker line parsed from the file. Conflict marker +/// lines consist of a single ASCII character repeated for a certain length. +struct ConflictMarkerLine { + kind: ConflictMarkerLineChar, + len: usize, +} + +/// Write a conflict marker to an output file. +fn write_conflict_marker( + output: &mut dyn Write, + kind: ConflictMarkerLineChar, + len: usize, + suffix_text: &str, +) -> io::Result<()> { + let conflict_marker = BString::new(vec![kind.to_byte(); len]); + + if suffix_text.is_empty() { + writeln!(output, "{conflict_marker}") + } else { + writeln!(output, "{conflict_marker} {suffix_text}") + } +} + +/// Parse a conflict marker from a line of a file. The conflict marker may have +/// any length (even less than MIN_CONFLICT_MARKER_LEN). +fn parse_conflict_marker_any_len(line: &[u8]) -> Option { + let first_byte = *line.first()?; + let kind = ConflictMarkerLineChar::parse_byte(first_byte)?; + let len = line.iter().take_while(|&&b| b == first_byte).count(); + + if let Some(next_byte) = line.get(len) { + // If there is a character after the marker, it must be ASCII whitespace + if !next_byte.is_ascii_whitespace() { + return None; + } + } + + Some(ConflictMarkerLine { kind, len }) +} + +/// Parse a conflict marker, expecting it to be at least a certain length. Any +/// shorter conflict markers are ignored. +fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option { + parse_conflict_marker_any_len(line) + .filter(|marker| marker.len >= expected_len) + .map(|marker| marker.kind) +} + +/// Given a Merge of files, choose the conflict marker length to use when +/// materializing conflicts. +pub fn choose_materialized_conflict_marker_len>(single_hunk: &Merge) -> usize { + let max_existing_marker_len = single_hunk + .iter() + .flat_map(|file| file.as_ref().lines_with_terminator()) + .filter_map(parse_conflict_marker_any_len) + .map(|marker| marker.len) + .max() + .unwrap_or_default(); + + max_existing_marker_len + .saturating_add(CONFLICT_MARKER_LEN_INCREMENT) + .max(MIN_CONFLICT_MARKER_LEN) +} + pub fn materialize_merge_result>( single_hunk: &Merge, conflict_marker_style: ConflictMarkerStyle, @@ -259,7 +338,23 @@ pub fn materialize_merge_result>( match &merge_result { MergeResult::Resolved(content) => output.write_all(content), MergeResult::Conflict(hunks) => { - materialize_conflict_hunks(hunks, conflict_marker_style, output) + let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk); + materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output) + } + } +} + +pub fn materialize_merge_result_with_marker_len>( + single_hunk: &Merge, + conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, + output: &mut dyn Write, +) -> io::Result<()> { + let merge_result = files::merge(single_hunk); + match &merge_result { + MergeResult::Resolved(content) => output.write_all(content), + MergeResult::Conflict(hunks) => { + materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output) } } } @@ -272,9 +367,37 @@ pub fn materialize_merge_result_to_bytes>( match merge_result { MergeResult::Resolved(content) => content, MergeResult::Conflict(hunks) => { + let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk); let mut output = Vec::new(); - materialize_conflict_hunks(&hunks, conflict_marker_style, &mut output) - .expect("writing to an in-memory buffer should never fail"); + materialize_conflict_hunks( + &hunks, + conflict_marker_style, + conflict_marker_len, + &mut output, + ) + .expect("writing to an in-memory buffer should never fail"); + output.into() + } + } +} + +pub fn materialize_merge_result_to_bytes_with_marker_len>( + single_hunk: &Merge, + conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, +) -> BString { + let merge_result = files::merge(single_hunk); + match merge_result { + MergeResult::Resolved(content) => content, + MergeResult::Conflict(hunks) => { + let mut output = Vec::new(); + materialize_conflict_hunks( + &hunks, + conflict_marker_style, + conflict_marker_len, + &mut output, + ) + .expect("writing to an in-memory buffer should never fail"); output.into() } } @@ -283,6 +406,7 @@ pub fn materialize_merge_result_to_bytes>( fn materialize_conflict_hunks( hunks: &[Merge], conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { let num_conflicts = hunks @@ -300,13 +424,21 @@ fn materialize_conflict_hunks( match (conflict_marker_style, hunk.as_slice()) { // 2-sided conflicts can use Git-style conflict markers (ConflictMarkerStyle::Git, [left, base, right]) => { - materialize_git_style_conflict(left, base, right, &conflict_info, output)?; + materialize_git_style_conflict( + left, + base, + right, + &conflict_info, + conflict_marker_len, + output, + )?; } _ => { materialize_jj_style_conflict( hunk, &conflict_info, conflict_marker_style, + conflict_marker_len, output, )?; } @@ -321,16 +453,37 @@ fn materialize_git_style_conflict( base: &[u8], right: &[u8], conflict_info: &str, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { - writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictStart, + conflict_marker_len, + &format!("Side #1 ({conflict_info})"), + )?; output.write_all(left)?; - writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::GitAncestor, + conflict_marker_len, + "Base", + )?; output.write_all(base)?; // VS Code doesn't seem to support any trailing text on the separator line - writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::GitSeparator, + conflict_marker_len, + "", + )?; output.write_all(right)?; - writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictEnd, + conflict_marker_len, + &format!("Side #2 ({conflict_info} ends)"), + )?; Ok(()) } @@ -339,40 +492,49 @@ fn materialize_jj_style_conflict( hunk: &Merge, conflict_info: &str, conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { // Write a positive snapshot (side) of a conflict - fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!( + let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| { + write_conflict_marker( output, - "{CONFLICT_PLUS_LINE} Contents of side #{}", - add_index + 1 + ConflictMarkerLineChar::Add, + conflict_marker_len, + &format!("Contents of side #{}", add_index + 1), )?; output.write_all(data) - } + }; // Write a negative snapshot (base) of a conflict - fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?; + let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| { + write_conflict_marker( + output, + ConflictMarkerLineChar::Remove, + conflict_marker_len, + &format!("Contents of {base_str}"), + )?; output.write_all(data) - } + }; // Write a diff from a negative term to a positive term - fn write_diff( - base_str: &str, - add_index: usize, - diff: &[DiffHunk], - output: &mut dyn Write, - ) -> io::Result<()> { - writeln!( - output, - "{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}", - add_index + 1 - )?; - write_diff_hunks(diff, output) - } + let write_diff = + |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| { + write_conflict_marker( + output, + ConflictMarkerLineChar::Diff, + conflict_marker_len, + &format!("Changes from {base_str} to side #{}", add_index + 1), + )?; + write_diff_hunks(diff, output) + }; - writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictStart, + conflict_marker_len, + conflict_info, + )?; let mut add_index = 0; for (base_index, left) in hunk.removes().enumerate() { // The vast majority of conflicts one actually tries to resolve manually have 1 @@ -422,7 +584,12 @@ fn materialize_jj_style_conflict( for (add_index, slice) in hunk.adds().enumerate().skip(add_index) { write_side(add_index, slice, output)?; } - writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?; + write_conflict_marker( + output, + ConflictMarkerLineChar::ConflictEnd, + conflict_marker_len, + &format!("{conflict_info} ends"), + )?; Ok(()) } @@ -469,9 +636,16 @@ pub fn materialized_diff_stream<'a>( /// has to provide the expected number of merge sides (adds). Conflict /// markers that are otherwise valid will be considered invalid if /// they don't have the expected arity. +/// +/// All conflict markers in the file must be at least as long as the expected +/// length. Any shorter conflict markers will be ignored. // TODO: "parse" is not usually the opposite of "materialize", so maybe we // should rename them to "serialize" and "deserialize"? -pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option>> { +pub fn parse_conflict( + input: &[u8], + num_sides: usize, + expected_marker_len: usize, +) -> Option>> { if input.is_empty() { return None; } @@ -480,24 +654,27 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { conflict_start = Some(pos); conflict_start_len = line.len(); - } else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR { - let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos]; - let hunk = parse_conflict_hunk(conflict_body); - if hunk.num_sides() == num_sides { - let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; - if !resolved_slice.is_empty() { - hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + Some(ConflictMarkerLineChar::ConflictEnd) => { + if let Some(conflict_start_index) = conflict_start.take() { + let conflict_body = &input[conflict_start_index + conflict_start_len..pos]; + let hunk = parse_conflict_hunk(conflict_body, expected_marker_len); + if hunk.num_sides() == num_sides { + let resolved_slice = &input[resolved_start..conflict_start_index]; + if !resolved_slice.is_empty() { + hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + hunks.push(hunk); + resolved_start = pos + line.len(); } - hunks.push(hunk); - resolved_start = pos + line.len(); } - conflict_start = None; } + _ => {} } pos += line.len(); } @@ -517,58 +694,59 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { +fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { // If the hunk starts with a conflict marker, find its first character - let initial_conflict_marker_char = input + let initial_conflict_marker = input .lines_with_terminator() .next() - .filter(|line| is_conflict_marker_line(line)) - .map(|line| line[0]); + .and_then(|line| parse_conflict_marker(line, expected_marker_len)); - match initial_conflict_marker_char { + match initial_conflict_marker { // JJ-style conflicts must start with one of these 3 conflict marker lines - Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => { - parse_jj_style_conflict_hunk(input) - } + Some( + ConflictMarkerLineChar::Diff + | ConflictMarkerLineChar::Remove + | ConflictMarkerLineChar::Add, + ) => parse_jj_style_conflict_hunk(input, expected_marker_len), // Git-style conflicts either must not start with a conflict marker line, or must start with // the "|||||||" conflict marker line (if the first side was empty) - None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input), + None | Some(ConflictMarkerLineChar::GitAncestor) => { + parse_git_style_conflict_hunk(input, expected_marker_len) + } // No other conflict markers are allowed at the start of a hunk Some(_) => Merge::resolved(BString::new(vec![])), } } -fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { enum State { Diff, - Minus, - Plus, + Remove, + Add, Unknown, } let mut state = State::Unknown; let mut removes = vec![]; let mut adds = vec![]; for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_DIFF_LINE_CHAR => { - state = State::Diff; - removes.push(BString::new(vec![])); - adds.push(BString::new(vec![])); - continue; - } - CONFLICT_MINUS_LINE_CHAR => { - state = State::Minus; - removes.push(BString::new(vec![])); - continue; - } - CONFLICT_PLUS_LINE_CHAR => { - state = State::Plus; - adds.push(BString::new(vec![])); - continue; - } - _ => {} + match parse_conflict_marker(line, expected_marker_len) { + Some(ConflictMarkerLineChar::Diff) => { + state = State::Diff; + removes.push(BString::new(vec![])); + adds.push(BString::new(vec![])); + continue; + } + Some(ConflictMarkerLineChar::Remove) => { + state = State::Remove; + removes.push(BString::new(vec![])); + continue; + } + Some(ConflictMarkerLineChar::Add) => { + state = State::Add; + adds.push(BString::new(vec![])); + continue; } + _ => {} } match state { State::Diff => { @@ -590,10 +768,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { return Merge::resolved(BString::new(vec![])); } } - State::Minus => { + State::Remove => { removes.last_mut().unwrap().extend_from_slice(line); } - State::Plus => { + State::Add => { adds.last_mut().unwrap().extend_from_slice(line); } State::Unknown => { @@ -611,7 +789,7 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { } } -fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge { #[derive(PartialEq, Eq)] enum State { Left, @@ -623,28 +801,26 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { let mut base = BString::new(vec![]); let mut right = BString::new(vec![]); for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_GIT_ANCESTOR_LINE_CHAR => { - if state == State::Left { - state = State::Base; - continue; - } else { - // Base must come after left - return Merge::resolved(BString::new(vec![])); - } + match parse_conflict_marker(line, expected_marker_len) { + Some(ConflictMarkerLineChar::GitAncestor) => { + if state == State::Left { + state = State::Base; + continue; + } else { + // Base must come after left + return Merge::resolved(BString::new(vec![])); } - CONFLICT_GIT_SEPARATOR_LINE_CHAR => { - if state == State::Base { - state = State::Right; - continue; - } else { - // Right must come after base - return Merge::resolved(BString::new(vec![])); - } + } + Some(ConflictMarkerLineChar::GitSeparator) => { + if state == State::Base { + state = State::Right; + continue; + } else { + // Right must come after base + return Merge::resolved(BString::new(vec![])); } - _ => {} } + _ => {} } match state { State::Left => left.extend_from_slice(line), @@ -661,13 +837,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { } } -/// Check whether a line is a conflict marker. Removes trailing whitespace -/// before checking against regex to ensure it parses CRLF endings correctly. -fn is_conflict_marker_line(line: &[u8]) -> bool { - let line = line.trim_end_with(|ch| ch.is_ascii_whitespace()); - CONFLICT_MARKER_REGEX.is_match_at(line, 0) -} - /// Parses conflict markers in `content` and returns an updated version of /// `file_ids` with the new contents. If no (valid) conflict markers remain, a /// single resolves `FileId` will be returned. @@ -677,6 +846,7 @@ pub async fn update_from_content( path: &RepoPath, content: &[u8], conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, ) -> BackendResult>> { let simplified_file_ids = file_ids.clone().simplify(); let simplified_file_ids = &simplified_file_ids; @@ -688,7 +858,13 @@ pub async fn update_from_content( // copy. let mut old_content = Vec::with_capacity(content.len()); let merge_hunk = extract_as_single_hunk(simplified_file_ids, store, path).await?; - materialize_merge_result(&merge_hunk, conflict_marker_style, &mut old_content).unwrap(); + materialize_merge_result_with_marker_len( + &merge_hunk, + conflict_marker_style, + conflict_marker_len, + &mut old_content, + ) + .unwrap(); if content == old_content { return Ok(file_ids.clone()); } @@ -698,11 +874,16 @@ pub async fn update_from_content( // the arity of the unsimplified conflicts since such a conflict may be // present in the working copy if written by an earlier version of jj. let (used_file_ids, hunks) = 'hunks: { - if let Some(hunks) = parse_conflict(content, simplified_file_ids.num_sides()) { + if let Some(hunks) = parse_conflict( + content, + simplified_file_ids.num_sides(), + conflict_marker_len, + ) { break 'hunks (simplified_file_ids, hunks); }; if simplified_file_ids.num_sides() != file_ids.num_sides() { - if let Some(hunks) = parse_conflict(content, file_ids.num_sides()) { + if let Some(hunks) = parse_conflict(content, file_ids.num_sides(), conflict_marker_len) + { break 'hunks (file_ids, hunks); }; }; diff --git a/lib/src/local_working_copy.rs b/lib/src/local_working_copy.rs index f07af6d0ee..5fa8251abd 100644 --- a/lib/src/local_working_copy.rs +++ b/lib/src/local_working_copy.rs @@ -66,10 +66,12 @@ use crate::backend::TreeId; use crate::backend::TreeValue; use crate::commit::Commit; use crate::conflicts; -use crate::conflicts::materialize_merge_result_to_bytes; +use crate::conflicts::choose_materialized_conflict_marker_len; +use crate::conflicts::materialize_merge_result_to_bytes_with_marker_len; use crate::conflicts::materialize_tree_value; use crate::conflicts::ConflictMarkerStyle; use crate::conflicts::MaterializedTreeValue; +use crate::conflicts::MIN_CONFLICT_MARKER_LEN; use crate::file_util::check_symlink_support; use crate::file_util::try_symlink; #[cfg(feature = "watchman")] @@ -125,11 +127,17 @@ pub enum FileType { GitSubmodule, } +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct MaterializedConflictData { + pub conflict_marker_len: u32, +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct FileState { pub file_type: FileType, pub mtime: MillisSinceEpoch, pub size: u64, + pub materialized_conflict_data: Option, /* TODO: What else do we need here? Git stores a lot of fields. * TODO: Could possibly handle case-insensitive file systems keeping an * Option with the actual path here. */ @@ -147,10 +155,16 @@ impl FileState { file_type: FileType::Normal { executable }, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, } } - fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self { + fn for_file( + executable: bool, + size: u64, + metadata: &Metadata, + materialized_conflict_data: Option, + ) -> Self { #[cfg(windows)] let executable = { // Windows doesn't support executable bit. @@ -160,6 +174,7 @@ impl FileState { file_type: FileType::Normal { executable }, mtime: mtime_from_metadata(metadata), size, + materialized_conflict_data, } } @@ -171,6 +186,7 @@ impl FileState { file_type: FileType::Symlink, mtime: mtime_from_metadata(metadata), size: metadata.len(), + materialized_conflict_data: None, } } @@ -179,6 +195,7 @@ impl FileState { file_type: FileType::GitSubmodule, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, } } } @@ -418,6 +435,11 @@ fn file_state_from_proto(proto: &crate::protos::working_copy::FileState) -> File file_type, mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch), size: proto.size, + materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| { + MaterializedConflictData { + conflict_marker_len: data.conflict_marker_len, + } + }), } } @@ -436,6 +458,11 @@ fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::F proto.file_type = file_type as i32; proto.mtime_millis_since_epoch = file_state.mtime.0; proto.size = file_state.size; + proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| { + crate::protos::working_copy::MaterializedConflictData { + conflict_marker_len: data.conflict_marker_len, + } + }); proto } @@ -651,7 +678,10 @@ fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch { ) } -fn file_state(metadata: &Metadata) -> Option { +fn file_state( + metadata: &Metadata, + materialized_conflict_data: Option, +) -> Option { let metadata_file_type = metadata.file_type(); let file_type = if metadata_file_type.is_dir() { None @@ -676,6 +706,7 @@ fn file_state(metadata: &Metadata) -> Option { file_type, mtime, size, + materialized_conflict_data, } }) } @@ -1250,8 +1281,12 @@ impl FileSnapshotter<'_> { max_size: self.max_new_file_size, }; self.untracked_paths_tx.send((path, reason)).ok(); - Ok(None) - } else if let Some(new_file_state) = file_state(&metadata) { + return Ok(None); + } + let materialized_conflict_data = maybe_current_file_state + .as_ref() + .and_then(|state| state.materialized_conflict_data); + if let Some(new_file_state) = file_state(&metadata, materialized_conflict_data) { self.process_present_file( path, &entry.path(), @@ -1286,7 +1321,9 @@ impl FileSnapshotter<'_> { }); } }; - if let Some(new_file_state) = metadata.as_ref().and_then(file_state) { + if let Some(new_file_state) = metadata.as_ref().and_then(|metadata| { + file_state(metadata, current_file_state.materialized_conflict_data) + }) { self.process_present_file( tracked_path.to_owned(), &disk_path, @@ -1305,13 +1342,13 @@ impl FileSnapshotter<'_> { path: RepoPathBuf, disk_path: &Path, maybe_current_file_state: Option<&FileState>, - new_file_state: FileState, + mut new_file_state: FileState, ) -> Result<(), SnapshotError> { let update = self.get_updated_tree_value( &path, disk_path, maybe_current_file_state, - &new_file_state, + &mut new_file_state, )?; if let Some(tree_value) = update { self.tree_entries_tx.send((path.clone(), tree_value)).ok(); @@ -1360,7 +1397,7 @@ impl FileSnapshotter<'_> { repo_path: &RepoPath, disk_path: &Path, maybe_current_file_state: Option<&FileState>, - new_file_state: &FileState, + new_file_state: &mut FileState, ) -> Result, SnapshotError> { let clean = match maybe_current_file_state { None => { @@ -1391,7 +1428,13 @@ impl FileSnapshotter<'_> { }; let new_tree_values = match new_file_type { FileType::Normal { executable } => self - .write_path_to_store(repo_path, disk_path, ¤t_tree_values, executable) + .write_path_to_store( + repo_path, + disk_path, + ¤t_tree_values, + new_file_state, + executable, + ) .block_on()?, FileType::Symlink => { let id = self @@ -1418,6 +1461,7 @@ impl FileSnapshotter<'_> { repo_path: &RepoPath, disk_path: &Path, current_tree_values: &MergedTreeValue, + new_file_state: &mut FileState, executable: FileExecutableFlag, ) -> Result { if let Some(current_tree_value) = current_tree_values.as_resolved() { @@ -1449,6 +1493,11 @@ impl FileSnapshotter<'_> { repo_path, &content, self.conflict_marker_style, + new_file_state + .materialized_conflict_data + .map_or(MIN_CONFLICT_MARKER_LEN, |data| { + data.conflict_marker_len as usize + }), ) .block_on()?; match new_file_ids.into_resolved() { @@ -1463,6 +1512,8 @@ impl FileSnapshotter<'_> { false } }; + // Clear materialized conflict data if the file was resolved + new_file_state.materialized_conflict_data = None; Ok(Merge::normal(TreeValue::File { id: file_id.unwrap(), executable, @@ -1552,7 +1603,7 @@ impl TreeState { let metadata = file .metadata() .map_err(|err| checkout_error_for_stat_error(err, disk_path))?; - Ok(FileState::for_file(executable, size, &metadata)) + Ok(FileState::for_file(executable, size, &metadata, None)) } fn write_symlink(&self, disk_path: &Path, target: String) -> Result { @@ -1576,6 +1627,7 @@ impl TreeState { disk_path: &Path, conflict_data: Vec, executable: bool, + materialized_conflict_data: Option, ) -> Result { let mut file = OpenOptions::new() .write(true) @@ -1595,7 +1647,12 @@ impl TreeState { let metadata = file .metadata() .map_err(|err| checkout_error_for_stat_error(err, disk_path))?; - Ok(FileState::for_file(executable, size, &metadata)) + Ok(FileState::for_file( + executable, + size, + &metadata, + materialized_conflict_data, + )) } #[cfg_attr(windows, allow(unused_variables))] @@ -1786,16 +1843,29 @@ impl TreeState { contents, executable, } => { - let data = - materialize_merge_result_to_bytes(&contents, conflict_marker_style).into(); - self.write_conflict(&disk_path, data, executable)? + let conflict_marker_len = choose_materialized_conflict_marker_len(&contents); + let data = materialize_merge_result_to_bytes_with_marker_len( + &contents, + conflict_marker_style, + conflict_marker_len, + ) + .into(); + let materialized_conflict_data = MaterializedConflictData { + conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX), + }; + self.write_conflict( + &disk_path, + data, + executable, + Some(materialized_conflict_data), + )? } MaterializedTreeValue::OtherConflict { id } => { // Unless all terms are regular files, we can't do much // better than trying to describe the merge. let data = id.describe().into_bytes(); let executable = false; - self.write_conflict(&disk_path, data, executable)? + self.write_conflict(&disk_path, data, executable, None)? } }; changed_file_states.push((path, file_state)); @@ -1851,6 +1921,7 @@ impl TreeState { file_type, mtime: MillisSinceEpoch(0), size: 0, + materialized_conflict_data: None, }; changed_file_states.push((path, file_state)); } @@ -2310,6 +2381,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size)); let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size)); @@ -2358,6 +2430,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_proto_entry = |path: &str, size| { file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size)) @@ -2422,6 +2495,7 @@ mod tests { }, mtime: MillisSinceEpoch(0), size, + materialized_conflict_data: None, }; let new_proto_entry = |path: &str, size| { file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size)) diff --git a/lib/src/protos/working_copy.proto b/lib/src/protos/working_copy.proto index 634c25da2e..7ed5c21a9e 100644 --- a/lib/src/protos/working_copy.proto +++ b/lib/src/protos/working_copy.proto @@ -24,12 +24,18 @@ enum FileType { GitSubmodule = 4; } +message MaterializedConflictData { + // TODO: maybe we should store num_sides here as well + uint32 conflict_marker_len = 1; +} + message FileState { int64 mtime_millis_since_epoch = 1; uint64 size = 2; FileType file_type = 3; // Set only if file_type is Conflict bytes conflict_id = 4 [deprecated = true]; + MaterializedConflictData materialized_conflict_data = 5; } message FileStateEntry { diff --git a/lib/src/protos/working_copy.rs b/lib/src/protos/working_copy.rs index 90c1bfe676..50621c7164 100644 --- a/lib/src/protos/working_copy.rs +++ b/lib/src/protos/working_copy.rs @@ -1,6 +1,13 @@ // This file is @generated by prost-build. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct MaterializedConflictData { + /// TODO: maybe we should store num_sides here as well + #[prost(uint32, tag = "1")] + pub conflict_marker_len: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct FileState { #[prost(int64, tag = "1")] pub mtime_millis_since_epoch: i64, @@ -12,6 +19,8 @@ pub struct FileState { #[deprecated] #[prost(bytes = "vec", tag = "4")] pub conflict_id: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "5")] + pub materialized_conflict_data: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index e3c4a00f6f..cd2ace3e6d 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -13,12 +13,15 @@ // limitations under the License. use indoc::indoc; +use itertools::Itertools; use jj_lib::backend::FileId; +use jj_lib::conflicts::choose_materialized_conflict_marker_len; use jj_lib::conflicts::extract_as_single_hunk; use jj_lib::conflicts::materialize_merge_result_to_bytes; use jj_lib::conflicts::parse_conflict; use jj_lib::conflicts::update_from_content; use jj_lib::conflicts::ConflictMarkerStyle; +use jj_lib::conflicts::MIN_CONFLICT_MARKER_LEN; use jj_lib::merge::Merge; use jj_lib::repo::Repo; use jj_lib::repo_path::RepoPath; @@ -505,7 +508,7 @@ fn test_materialize_parse_roundtrip() { // The first add should always be from the left side insta::assert_debug_snapshot!( - parse_conflict(materialized.as_bytes(), conflict.num_sides()), + parse_conflict(materialized.as_bytes(), conflict.num_sides(), MIN_CONFLICT_MARKER_LEN), @r###" Some( [ @@ -588,10 +591,16 @@ fn test_materialize_parse_roundtrip_different_markers() { for materialize_style in all_styles { let materialized = materialize_conflict_string(store, path, &conflict, materialize_style); for parse_style in all_styles { - let parsed = - update_from_content(&conflict, store, path, materialized.as_bytes(), parse_style) - .block_on() - .unwrap(); + let parsed = update_from_content( + &conflict, + store, + path, + materialized.as_bytes(), + parse_style, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap(); assert_eq!( parsed, conflict, @@ -628,7 +637,8 @@ fn test_materialize_conflict_no_newlines_at_eof() { // BUG(#3968): These conflict markers cannot be parsed insta::assert_debug_snapshot!(parse_conflict( materialized.as_bytes(), - conflict.num_sides() + conflict.num_sides(), + MIN_CONFLICT_MARKER_LEN ),@"None"); } @@ -795,7 +805,8 @@ line 3 line 4 line 5 "}, - 2 + 2, + 7 ), None ); @@ -817,7 +828,8 @@ fn test_parse_conflict_simple() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -853,7 +865,8 @@ fn test_parse_conflict_simple() { >>>>>>> More and more text line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -892,7 +905,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -931,7 +945,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -969,7 +984,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1005,7 +1021,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1027,8 +1044,8 @@ fn test_parse_conflict_simple() { ) "# ); - // The conflict markers are too long and shouldn't parse (though we may - // decide to change this in the future) + // The conflict markers are longer than the originally materialized markers, but + // we allow them to parse anyway insta::assert_debug_snapshot!( parse_conflict(indoc! {b" line 1 @@ -1043,9 +1060,28 @@ fn test_parse_conflict_simple() { >>>>>>>>>>> line 5 "}, - 2 + 2, + 7 ), - @"None" + @r#" + Some( + [ + Resolved( + "line 1\n", + ), + Conflicted( + [ + "line 2\nleft\nline 4\n", + "line 2\nline 3\nline 4\n", + "right\n", + ], + ), + Resolved( + "line 5\n", + ), + ], + ) + "# ); } @@ -1071,7 +1107,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1114,7 +1151,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1160,7 +1198,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r#" Some( @@ -1203,7 +1242,8 @@ fn test_parse_conflict_crlf_markers() { >>>>>>>\r line 5\r "}, - 2 + 2, + 7 ), @r#" Some( @@ -1248,7 +1288,8 @@ fn test_parse_conflict_diff_stripped_whitespace() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1290,7 +1331,8 @@ fn test_parse_conflict_wrong_arity() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1311,7 +1353,8 @@ fn test_parse_conflict_malformed_missing_removes() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1334,7 +1377,8 @@ fn test_parse_conflict_malformed_marker() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1358,7 +1402,8 @@ fn test_parse_conflict_malformed_diff() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1380,7 +1425,8 @@ fn test_parse_conflict_snapshot_missing_header() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1400,7 +1446,8 @@ fn test_parse_conflict_wrong_git_style() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1422,7 +1469,8 @@ fn test_parse_conflict_git_reordered_headers() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1448,7 +1496,8 @@ fn test_parse_conflict_git_too_many_sides() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1471,7 +1520,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1489,7 +1539,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1506,7 +1557,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Conflict 1 of 1 ends line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1541,7 +1593,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Side #2 (Conflict 1 of 1 ends) line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1584,9 +1637,16 @@ fn test_update_conflict_from_content() { let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; assert_eq!(parse(materialized.as_bytes()), conflict); @@ -1634,9 +1694,16 @@ fn test_update_conflict_from_content_modify_delete() { let materialized = materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; assert_eq!(parse(materialized.as_bytes()), conflict); @@ -1690,9 +1757,16 @@ fn test_update_conflict_from_content_simplified_conflict() { let materialized_simplified = materialize_conflict_string(store, path, &simplified_conflict, ConflictMarkerStyle::Diff); let parse = |content| { - update_from_content(&conflict, store, path, content, ConflictMarkerStyle::Diff) - .block_on() - .unwrap() + update_from_content( + &conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + MIN_CONFLICT_MARKER_LEN, + ) + .block_on() + .unwrap() }; insta::assert_snapshot!( materialized, @@ -1810,6 +1884,353 @@ fn test_update_conflict_from_content_simplified_conflict() { ); } +#[test] +fn test_update_conflict_from_content_with_long_markers() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + // Create conflicts which contain conflict markers of varying lengths + let path = RepoPath::from_internal_string("dir/file"); + let base_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 + line 3 + "}, + ); + let left_file_id = testutils::write_file( + store, + path, + indoc! {" + <<<< left 1 + line 2 + <<<<<<<<<<<< left 3 + "}, + ); + let right_file_id = testutils::write_file( + store, + path, + indoc! {" + >>>>>>> right 1 + line 2 + >>>>>>>>>>>> right 3 + "}, + ); + let conflict = Merge::from_removes_adds( + vec![Some(base_file_id.clone())], + vec![Some(left_file_id.clone()), Some(right_file_id.clone())], + ); + + // The conflict should be materialized using long conflict markers + let materialized_marker_len = choose_materialized_conflict_marker_len( + &extract_as_single_hunk(&conflict, store, path) + .block_on() + .unwrap(), + ); + assert!(materialized_marker_len > MIN_CONFLICT_MARKER_LEN); + let materialized = + materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Snapshot); + insta::assert_snapshot!(materialized, @r##" + <<<<<<<<<<<<<<<< Conflict 1 of 2 + ++++++++++++++++ Contents of side #1 + <<<< left 1 + ---------------- Contents of base + line 1 + ++++++++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>>>>>>> Conflict 1 of 2 ends + line 2 + <<<<<<<<<<<<<<<< Conflict 2 of 2 + ++++++++++++++++ Contents of side #1 + <<<<<<<<<<<< left 3 + ---------------- Contents of base + line 3 + ++++++++++++++++ Contents of side #2 + >>>>>>>>>>>> right 3 + >>>>>>>>>>>>>>>> Conflict 2 of 2 ends + "## + ); + + // Parse the conflict markers using a different conflict marker style. This is + // to avoid the two versions of the file being obviously identical, so that we + // can test the actual parsing logic. + let parse = |conflict, content| { + update_from_content( + conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + materialized_marker_len, + ) + .block_on() + .unwrap() + }; + assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); + + // Test resolving the conflict, leaving some fake conflict markers which should + // not be parsed since they are too short + let resolved_file_contents = indoc! {" + <<<<<<<<<<<< not a real conflict! + ++++++++++++ + left + ------------ + base + ++++++++++++ + right + >>>>>>>>>>>> + "}; + let resolved_file_id = testutils::write_file(store, path, resolved_file_contents); + assert_eq!( + parse(&conflict, resolved_file_contents.as_bytes()), + Merge::normal(resolved_file_id) + ); + + // Resolve one of the conflicts, decreasing the minimum conflict marker length + let new_conflict_contents = indoc! {" + <<<<<<<<<<<<<<<< Conflict 1 of 2 + ++++++++++++++++ Contents of side #1 + <<<< left 1 + ---------------- Contents of base + line 1 + ++++++++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>>>>>>> Conflict 1 of 2 ends + line 2 + line 3 + "}; + + // Confirm that the new conflict parsed correctly + let new_conflict = parse(&conflict, new_conflict_contents.as_bytes()); + assert_eq!(new_conflict.num_sides(), 2); + let new_conflict_terms = new_conflict + .iter() + .map(|id| { + let mut file = store.read_file(path, id.as_ref().unwrap()).unwrap(); + let mut buf = String::new(); + file.read_to_string(&mut buf).unwrap(); + buf + }) + .collect_vec(); + let [new_left_side, new_base, new_right_side] = new_conflict_terms.as_slice() else { + unreachable!() + }; + insta::assert_snapshot!(new_left_side, @r#" + <<<< left 1 + line 2 + line 3 + "#); + insta::assert_snapshot!(new_base, @r#" + line 1 + line 2 + line 3 + "#); + insta::assert_snapshot!(new_right_side, @r#" + >>>>>>> right 1 + line 2 + line 3 + "#); + + // The conflict markers should still parse in future snapshots even though + // they're now longer than necessary + assert_eq!( + parse(&new_conflict, new_conflict_contents.as_bytes()), + new_conflict + ); + + // If we add back the second conflict, it should still be parsed correctly + // (the fake conflict markers shouldn't be interpreted as conflict markers + // still, since they aren't the longest ones in the file). + assert_eq!(parse(&new_conflict, materialized.as_bytes()), conflict); + + // If the new conflict is materialized again, it should have shorter + // conflict markers now + insta::assert_snapshot!( + materialize_conflict_string(store, path, &new_conflict, ConflictMarkerStyle::Snapshot), + @r##" + <<<<<<<<<<< Conflict 1 of 1 + +++++++++++ Contents of side #1 + <<<< left 1 + ----------- Contents of base + line 1 + +++++++++++ Contents of side #2 + >>>>>>> right 1 + >>>>>>>>>>> Conflict 1 of 1 ends + line 2 + line 3 + "## + ); +} + +#[test] +fn test_update_from_content_malformed_conflict() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("dir/file"); + let base_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 + line 3 + line 4 + line 5 + "}, + ); + let left_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 left + line 3 + line 4 left + line 5 + "}, + ); + let right_file_id = testutils::write_file( + store, + path, + indoc! {" + line 1 + line 2 right + line 3 + line 4 right + line 5 + "}, + ); + let conflict = Merge::from_removes_adds( + vec![Some(base_file_id.clone())], + vec![Some(left_file_id.clone()), Some(right_file_id.clone())], + ); + + // The conflict should be materialized with normal markers + let materialized_marker_len = choose_materialized_conflict_marker_len( + &extract_as_single_hunk(&conflict, store, path) + .block_on() + .unwrap(), + ); + assert!(materialized_marker_len == MIN_CONFLICT_MARKER_LEN); + + let materialized = + materialize_conflict_string(store, path, &conflict, ConflictMarkerStyle::Diff); + insta::assert_snapshot!(materialized, @r##" + line 1 + <<<<<<< Conflict 1 of 2 + %%%%%%% Changes from base to side #1 + -line 2 + +line 2 left + +++++++ Contents of side #2 + line 2 right + >>>>>>> Conflict 1 of 2 ends + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + +++++++ Contents of side #2 + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "## + ); + + let parse = |conflict, content| { + update_from_content( + conflict, + store, + path, + content, + ConflictMarkerStyle::Diff, + materialized_marker_len, + ) + .block_on() + .unwrap() + }; + assert_eq!(parse(&conflict, materialized.as_bytes()), conflict); + + // Make a change to the second conflict that causes it to become invalid + let new_conflict_contents = indoc! {" + line 1 + <<<<<<< Conflict 1 of 2 + %%%%%%% Changes from base to side #1 + -line 2 + +line 2 left + +++++++ Contents of side #2 + line 2 right + >>>>>>> Conflict 1 of 2 ends + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "}; + // On the first snapshot, it will parse as a conflict containing conflict + // markers as text + let new_conflict = parse(&conflict, new_conflict_contents.as_bytes()); + assert_eq!(new_conflict.num_sides(), 2); + let new_conflict_terms = new_conflict + .iter() + .map(|id| { + let mut file = store.read_file(path, id.as_ref().unwrap()).unwrap(); + let mut buf = String::new(); + file.read_to_string(&mut buf).unwrap(); + buf + }) + .collect_vec(); + let [new_left_side, new_base, new_right_side] = new_conflict_terms.as_slice() else { + unreachable!() + }; + insta::assert_snapshot!(new_left_side, @r##" + line 1 + line 2 left + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + insta::assert_snapshot!(new_base, @r##" + line 1 + line 2 + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + insta::assert_snapshot!(new_right_side, @r##" + line 1 + line 2 right + line 3 + <<<<<<< Conflict 2 of 2 + %%%%%%% Changes from base to side #1 + -line 4 + +line 4 left + line 4 right + >>>>>>> Conflict 2 of 2 ends + line 5 + "##); + + // Even though the file now contains markers of length 7, the materialized + // markers of length 7 are still parsed + let second_snapshot = parse(&new_conflict, new_conflict_contents.as_bytes()); + assert_eq!(second_snapshot, new_conflict); +} + fn materialize_conflict_string( store: &Store, path: &RepoPath,