From f91a13c9d89baed4936ea71ae84f18f9f35bfb43 Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Thu, 11 Jul 2024 16:53:19 -0700 Subject: [PATCH] conflicts: encode unmaterializeable lines Fixes #3968 Fixes #3975 --- lib/src/conflicts.rs | 125 +++++++++++++++++++++++++++++++++++- lib/tests/test_conflicts.rs | 113 ++++++++++++++++++++++++++++++-- 2 files changed, 232 insertions(+), 6 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 1ffd73ea1da..5230d1851ec 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -14,9 +14,11 @@ #![allow(missing_docs)] +use std::borrow::Cow; use std::io::{Read, Write}; use std::iter::zip; +use bstr::ByteVec; use futures::{StreamExt, TryStreamExt}; use itertools::Itertools; use regex::bytes::{Regex, RegexBuilder}; @@ -51,6 +53,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::La .build() .unwrap() }); +static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(r"^\\JJ") + .multi_line(true) + .build() + .unwrap() +}); fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> { for hunk in hunks { @@ -217,6 +225,12 @@ pub fn materialize_merge_result( single_hunk: Merge, output: &mut dyn Write, ) -> std::io::Result<()> { + let single_hunk: Merge = + single_hunk.into_map(|ContentHunk(side)| ContentHunk(encode_unmaterializeable_lines(side))); + + // From now on, we assume some invariants guaranteed by the encoding function + // above. See its docstring for details. + let slices = single_hunk.map(|content| content.0.as_slice()); let merge_result = files::merge(&slices); match merge_result { @@ -347,7 +361,11 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option> = + once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes()); +static VERBATIM_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE))) + .multi_line(true) + .build() + .unwrap() +}); + +/// Encode a side of a conflict to satisfy some invariants +/// +/// - The result will not contain any conflict markers +/// - The result will contain 0 or more newline-terminated lines. In other +/// words, it is either empty or ends in a newline. +/// +/// This transformation is reversible and it is hoped that the result is +/// human-readable. See the tests below for examples. +fn encode_unmaterializeable_lines(mut side: Vec) -> Vec { + // TODO(ilyagr): It's likely the compiler won't be able to avoid a clone in + // the no-replacements case. If we care about that, we could try: + // - adding `if(regex.matches(side))` before `replace_all` + // - one of the approaches discussed in https://github.com/rust-lang/regex/issues/676. + // I like my proposal there in theory, but it would take work that I haven't + // done to make sure it works, and ideally it would be a crate. + // This also applies to the decoding function. + side = DIRECTIVE_LINE_REGEX + .replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice()) + .to_vec(); + side = CONFLICT_MARKER_REGEX + .replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice()) + .to_vec(); + if !side.is_empty() && !side.ends_with(b"\n") { + side.push_str(JJ_NO_NEWLINE_AT_EOF); + } + side +} + +/// Undo the transformation done by `encode_unmaterializeable_lines` +fn decode_materialized_side(mut side: Vec) -> Vec { + if side.ends_with(JJ_NO_NEWLINE_AT_EOF) { + side.truncate(side.len() - JJ_NO_NEWLINE_AT_EOF.len()); + } + side = VERBATIM_LINE_REGEX + .replace_all(&Cow::from(side), b"") + .to_vec(); + side +} + +#[cfg(test)] +mod test { + use bstr::BString; + use indoc::indoc; + + use super::{decode_materialized_side, encode_unmaterializeable_lines}; + + #[test] + fn test_conflict_side_encoding_and_decoding() { + let initial_text: BString = indoc! {br" + <<<<<<< + blahblah"} + .into(); + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:<<<<<<< + blahblah + \JJ: No newline at the end of file + "###); + assert_eq!( + BString::from(decode_materialized_side(encoded_text.clone().into())), + initial_text + ); + + let doubly_encoded_text: BString = + encode_unmaterializeable_lines(encoded_text.clone().into()).into(); + insta::assert_snapshot!(doubly_encoded_text, @r###" + \JJ Verbatim Line:\JJ Verbatim Line:<<<<<<< + blahblah + \JJ Verbatim Line:\JJ: No newline at the end of file + "###); + // The above should (and does) decode to a file with a newline at the end + assert_eq!( + BString::from(decode_materialized_side(doubly_encoded_text.into())), + encoded_text + ); + } + + #[test] + fn test_conflict_side_encoding_and_decoding2() { + let initial_text = br"\JJ: No newline at the end of file"; + + let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into(); + insta::assert_snapshot!(encoded_text, @r###" + \JJ Verbatim Line:\JJ: No newline at the end of file + \JJ: No newline at the end of file + "###); + assert_eq!( + BString::from(decode_materialized_side(encoded_text.clone().into())), + BString::from(initial_text) + ); + } +} diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 79325b1e106..bd8ff217896 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -326,6 +326,96 @@ fn test_materialize_parse_roundtrip() { "###); } +#[test] +fn test_materialize_parse_roundtrip_tricky() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("file"); + let base_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 + line 2 <<<<<<< + <<<<<<< line 3 + line 4 + line 5"}, + ); + let left_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 left + line 2 left + <<<<<<<< line 3 + line 4 + line 5 left"}, + ); + let right_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 right + line 2 + line 3 + line 4 right + line 5 right + "}, + ); + + let conflict = Merge::from_removes_adds( + vec![Some(base_id.clone())], + vec![Some(left_id.clone()), Some(right_id.clone())], + ); + let materialized = materialize_conflict_string(store, path, &conflict); + insta::assert_snapshot!( + materialized, + @r###" + \JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line + <<<<<<< Conflict 1 of 1 + %%%%%%% Changes from base to side #1 + -line 1 + -line 2 <<<<<<< + -\JJ Verbatim Line:<<<<<<< line 3 + +line 1 left + +line 2 left + +<<<<<<<< line 3 + line 4 + -line 5 + +line 5 left + \JJ: No newline at the end of file + +++++++ Contents of side #2 + line 1 right + line 2 + line 3 + line 4 right + line 5 right + >>>>>>> Conflict 1 of 1 ends + "### + ); + + // The first add should always be from the left side + insta::assert_debug_snapshot!( + parse_merge_result(materialized.as_bytes(), conflict.num_sides()), + @r###" + Some( + Conflicted( + [ + "\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left", + "\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5", + "\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n", + ], + ), + ) + "###); + + // TODO: update_from_conflict? +} + #[test] fn test_materialize_conflict_no_newlines_at_eof() { let test_repo = TestRepo::init(); @@ -344,16 +434,29 @@ fn test_materialize_conflict_no_newlines_at_eof() { insta::assert_snapshot!(materialized, @r###" <<<<<<< Conflict 1 of 1 - %%%%%%% Changes from base to side #1 - -base+++++++ Contents of side #2 - right>>>>>>> Conflict 1 of 1 ends + +++++++ Contents of side #1 + %%%%%%% Changes from base to side #2 + -base + +right + \JJ: No newline at the end of file + >>>>>>> Conflict 1 of 1 ends "### ); - // BUG(#3968): These conflict markers cannot be parsed + // These conflict markers can be parsed (issue #3968) insta::assert_debug_snapshot!(parse_merge_result( materialized.as_bytes(), conflict.num_sides() - ),@"None"); + ),@r###" + Some( + Conflicted( + [ + "", + "base", + "right", + ], + ), + ) + "###); } #[test]