From b74816a53e36cd2db2852f89eddf558d7d359414 Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Thu, 11 Jul 2024 16:53:19 -0700 Subject: [PATCH] conflicts: encode unmaterializeable lines Fixes #3968 Fixes #3975 --- CHANGELOG.md | 9 +++ lib/src/conflicts.rs | 131 +++++++++++++++++++++++++++++++++++- lib/tests/test_conflicts.rs | 113 +++++++++++++++++++++++++++++-- 3 files changed, 247 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b26e7da4f56..00402ab0210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,15 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * `jj diff --git` no longer shows the contents of binary files. +* Conflicts involving non-empty files that do not end in a newline no longer + look broken when materialized. + [#3968](https://github.com/martinvonz/jj/issues/3968) + +* Conflicts involving sides that themselves contain conflict markers can now be + materialized in a way that `jj` can parse correctly. + [#3975](https://github.com/martinvonz/jj/issues/3968) + + ## [0.19.0] - 2024-07-03 ### Breaking changes diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 1ffd73ea1da..d35fbcce44b 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -14,9 +14,11 @@ #![allow(missing_docs)] +use std::borrow::Cow; use std::io::{Read, Write}; use std::iter::zip; +use bstr::ByteVec; use futures::{StreamExt, TryStreamExt}; use itertools::Itertools; use regex::bytes::{Regex, RegexBuilder}; @@ -51,6 +53,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::La .build() .unwrap() }); +static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(r"^\\JJ") + .multi_line(true) + .build() + .unwrap() +}); fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> { for hunk in hunks { @@ -217,6 +225,12 @@ pub fn materialize_merge_result( single_hunk: Merge, output: &mut dyn Write, ) -> std::io::Result<()> { + let single_hunk: Merge = + single_hunk.into_map(|ContentHunk(side)| ContentHunk(encode_unmaterializeable_lines(side))); + + // From now on, we assume some invariants guaranteed by the encoding function + // above. See its docstring for details. + let slices = single_hunk.map(|content| content.0.as_slice()); let merge_result = files::merge(&slices); match merge_result { @@ -347,7 +361,11 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option> = + once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes()); +static VERBATIM_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE))) + .multi_line(true) + .build() + .unwrap() +}); + +/// Encode a side of a conflict to satisfy some invariants +/// +/// - The result will not contain any conflict markers +/// - The result will contain 0 or more newline-terminated lines. In other +/// words, it is either empty or ends in a newline. +/// +/// This transformation is reversible and it is hoped that the result is +/// human-readable. See the tests below for examples. +fn encode_unmaterializeable_lines(mut side: Vec) -> Vec { + replace_all_avoid_cloning( + &DIRECTIVE_LINE_REGEX, + &mut side, + JJ_VERBATIM_LINE_REPLACEMENT.as_slice(), + ); + replace_all_avoid_cloning( + &CONFLICT_MARKER_REGEX, + &mut side, + JJ_VERBATIM_LINE_REPLACEMENT.as_slice(), + ); + if !side.is_empty() && !side.ends_with(b"\n") { + side.push_str(JJ_NO_NEWLINE_AT_EOF); + } + side +} + +/// Undo the transformation done by `encode_unmaterializeable_lines` +fn decode_materialized_side(mut side: Vec) -> Vec { + if side.ends_with(JJ_NO_NEWLINE_AT_EOF) { + side.truncate(side.len() - JJ_NO_NEWLINE_AT_EOF.len()); + } + replace_all_avoid_cloning(&VERBATIM_LINE_REGEX, &mut side, b""); + side +} + +fn replace_all_avoid_cloning(regex: &Regex, side: &mut Vec, replacement: &[u8]) { + // TODO(ilyagr): The optimization to avoid the clone in the common case + // might be premature. See also + // https://github.com/rust-lang/regex/issues/676 for other options. I like + // my proposal there in theory, but it would take work that I haven't done + // to make sure it works, and ideally it would be a crate. + if regex.is_match(side) { + *side = regex.replace_all(side, replacement).to_vec(); + } +} + +#[cfg(test)] +mod test { + use bstr::BString; + use indoc::indoc; + + use super::{decode_materialized_side, encode_unmaterializeable_lines}; + + #[test] + fn test_conflict_side_encoding_and_decoding() { + let initial_text: BString = indoc! {br" + <<<<<<< + blahblah"} + .into(); + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:<<<<<<< + blahblah + \JJ: No newline at the end of file + "###); + assert_eq!( + BString::from(decode_materialized_side(encoded_text.clone().into())), + initial_text + ); + + let doubly_encoded_text: BString = + encode_unmaterializeable_lines(encoded_text.clone().into()).into(); + insta::assert_snapshot!(doubly_encoded_text, @r###" + \JJ Verbatim Line:\JJ Verbatim Line:<<<<<<< + blahblah + \JJ Verbatim Line:\JJ: No newline at the end of file + "###); + // The above should (and does) decode to a file with a newline at the end + assert_eq!( + BString::from(decode_materialized_side(doubly_encoded_text.into())), + encoded_text + ); + } + + #[test] + fn test_conflict_side_encoding_and_decoding2() { + let initial_text = br"\JJ: No newline at the end of file"; + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:\JJ: No newline at the end of file + \JJ: No newline at the end of file + "###); + assert_eq!( + BString::from(decode_materialized_side(encoded_text.clone().into())), + BString::from(initial_text) + ); + } +} diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 79325b1e106..bd8ff217896 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -326,6 +326,96 @@ fn test_materialize_parse_roundtrip() { "###); } +#[test] +fn test_materialize_parse_roundtrip_tricky() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("file"); + let base_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 + line 2 <<<<<<< + <<<<<<< line 3 + line 4 + line 5"}, + ); + let left_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 left + line 2 left + <<<<<<<< line 3 + line 4 + line 5 left"}, + ); + let right_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 right + line 2 + line 3 + line 4 right + line 5 right + "}, + ); + + let conflict = Merge::from_removes_adds( + vec![Some(base_id.clone())], + vec![Some(left_id.clone()), Some(right_id.clone())], + ); + let materialized = materialize_conflict_string(store, path, &conflict); + insta::assert_snapshot!( + materialized, + @r###" + \JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line + <<<<<<< Conflict 1 of 1 + %%%%%%% Changes from base to side #1 + -line 1 + -line 2 <<<<<<< + -\JJ Verbatim Line:<<<<<<< line 3 + +line 1 left + +line 2 left + +<<<<<<<< line 3 + line 4 + -line 5 + +line 5 left + \JJ: No newline at the end of file + +++++++ Contents of side #2 + line 1 right + line 2 + line 3 + line 4 right + line 5 right + >>>>>>> Conflict 1 of 1 ends + "### + ); + + // The first add should always be from the left side + insta::assert_debug_snapshot!( + parse_merge_result(materialized.as_bytes(), conflict.num_sides()), + @r###" + Some( + Conflicted( + [ + "\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left", + "\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5", + "\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n", + ], + ), + ) + "###); + + // TODO: update_from_conflict? +} + #[test] fn test_materialize_conflict_no_newlines_at_eof() { let test_repo = TestRepo::init(); @@ -344,16 +434,29 @@ fn test_materialize_conflict_no_newlines_at_eof() { insta::assert_snapshot!(materialized, @r###" <<<<<<< Conflict 1 of 1 - %%%%%%% Changes from base to side #1 - -base+++++++ Contents of side #2 - right>>>>>>> Conflict 1 of 1 ends + +++++++ Contents of side #1 + %%%%%%% Changes from base to side #2 + -base + +right + \JJ: No newline at the end of file + >>>>>>> Conflict 1 of 1 ends "### ); - // BUG(#3968): These conflict markers cannot be parsed + // These conflict markers can be parsed (issue #3968) insta::assert_debug_snapshot!(parse_merge_result( materialized.as_bytes(), conflict.num_sides() - ),@"None"); + ),@r###" + Some( + Conflicted( + [ + "", + "base", + "right", + ], + ), + ) + "###); } #[test]