From b0dee781da2198f44258bc23ec1cdb8236d7015a Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Thu, 21 Nov 2024 22:07:53 -0600 Subject: [PATCH 1/4] conflicts: demo failed parse of markers with CRLF --- lib/tests/test_conflicts.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 11bbf5ab7d..8534f817e0 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -725,6 +725,29 @@ fn test_parse_conflict_multi_way() { ); } +#[test] +fn test_parse_conflict_crlf_markers() { + // Conflict markers aren't recognized due to CRLF + assert_eq!( + parse_conflict( + indoc! {b" + line 1\r + <<<<<<<\r + +++++++\r + left\r + -------\r + base\r + +++++++\r + right\r + >>>>>>>\r + line 5\r + "}, + 2 + ), + None + ); +} + #[test] fn test_parse_conflict_wrong_arity() { // Valid conflict marker but it has fewer sides than the caller expected From f16639949c76c9b3cd0daf7aefc2e3b40a0388ba Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Wed, 20 Nov 2024 18:04:25 -0600 Subject: [PATCH 2/4] conflicts: allow CRLF line endings on conflict markers Currently, conflict markers ending in CRLF line endings aren't allowed. I don't see any reason why we should reject them, since some editors/tools might produce CRLF automatically on Windows when saving files, which would break the conflicts otherwise. --- lib/src/conflicts.rs | 12 ++++++++++-- lib/tests/test_conflicts.rs | 24 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 4d139e2355..44557c7685 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -20,6 +20,7 @@ use std::io::Write; use std::iter::zip; use bstr::BString; +use bstr::ByteSlice; use futures::stream::BoxStream; use futures::try_join; use futures::Stream; @@ -394,7 +395,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { let mut removes = vec![]; let mut adds = vec![]; for line in input.split_inclusive(|b| *b == b'\n') { - if CONFLICT_MARKER_REGEX.is_match_at(line, 0) { + if is_conflict_marker_line(line) { match line[0] { CONFLICT_DIFF_LINE_CHAR => { state = State::Diff; @@ -492,6 +493,13 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge { } } +/// Check whether a line is a conflict marker. Removes trailing whitespace +/// before checking against regex to ensure it parses CRLF endings correctly. +fn is_conflict_marker_line(line: &[u8]) -> bool { + let line = line.trim_end_with(|ch| ch.is_ascii_whitespace()); + CONFLICT_MARKER_REGEX.is_match_at(line, 0) +} + /// Parses conflict markers in `content` and returns an updated version of /// `file_ids` with the new contents. If no (valid) conflict markers remain, a /// single resolves `FileId` will be returned. diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 8534f817e0..126d879d6a 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -727,8 +727,8 @@ fn test_parse_conflict_multi_way() { #[test] fn test_parse_conflict_crlf_markers() { - // Conflict markers aren't recognized due to CRLF - assert_eq!( + // Conflict markers should be recognized even with CRLF + insta::assert_debug_snapshot!( parse_conflict( indoc! {b" line 1\r @@ -744,7 +744,25 @@ fn test_parse_conflict_crlf_markers() { "}, 2 ), - None + @r#" + Some( + [ + Resolved( + "line 1\r\n", + ), + Conflicted( + [ + "left\r\n", + "base\r\n", + "right\r\n", + ], + ), + Resolved( + "line 5\r\n", + ), + ], + ) + "# ); } From 8f71310dea3d7c9e72f7d52aca5aa534c9ddf149 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Thu, 21 Nov 2024 22:20:51 -0600 Subject: [PATCH 3/4] conflicts: demo failed parse of diff with empty line --- lib/tests/test_conflicts.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 126d879d6a..d3ff2fb438 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -766,6 +766,32 @@ fn test_parse_conflict_crlf_markers() { ); } +#[test] +fn test_parse_conflict_diff_stripped_whitespace() { + // Conflict parsing fails since diff contains empty line without leading space + assert_eq!( + parse_conflict( + indoc! {b" + line 1 + <<<<<<< + %%%%%%% + line 2 + + -line 3 + +left + \r + line 4 + +++++++ + right + >>>>>>> + line 5 + "}, + 2 + ), + None + ); +} + #[test] fn test_parse_conflict_wrong_arity() { // Valid conflict marker but it has fewer sides than the caller expected From 196fbefcb20dfb82540e8587d5515dd377d63689 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Wed, 20 Nov 2024 18:04:32 -0600 Subject: [PATCH 4/4] conflicts: allow stripped trailing whitespace in diffs Some editors strip trailing whitespace on save, which breaks any diffs which have context lines, since the parsing function expects them to start with a space. There's no visual difference between " \n" and "\n", so it seems reasonable to accept both. --- lib/src/conflicts.rs | 6 ++++++ lib/tests/test_conflicts.rs | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 44557c7685..0417422e28 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -467,6 +467,12 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge { } else if let Some(rest) = line.strip_prefix(b" ") { removes.last_mut().unwrap().extend_from_slice(rest); adds.last_mut().unwrap().extend_from_slice(rest); + } else if line == b"\n" || line == b"\r\n" { + // Some editors strip trailing whitespace, so " \n" might become "\n". It would + // be unfortunate if this prevented the conflict from being parsed, so we add + // the empty line to the "remove" and "add" as if there was a space in front + removes.last_mut().unwrap().extend_from_slice(line); + adds.last_mut().unwrap().extend_from_slice(line); } else { // Doesn't look like a valid conflict return Merge::resolved(BString::new(vec![])); diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index d3ff2fb438..b6f7a9371e 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -768,8 +768,9 @@ fn test_parse_conflict_crlf_markers() { #[test] fn test_parse_conflict_diff_stripped_whitespace() { - // Conflict parsing fails since diff contains empty line without leading space - assert_eq!( + // Should be able to parse conflict even if diff contains empty line (without + // even a leading space, which is sometimes stripped by text editors) + insta::assert_debug_snapshot!( parse_conflict( indoc! {b" line 1 @@ -788,7 +789,25 @@ fn test_parse_conflict_diff_stripped_whitespace() { "}, 2 ), - None + @r#" + Some( + [ + Resolved( + "line 1\n", + ), + Conflicted( + [ + "line 2\n\nleft\n\r\nline 4\n", + "line 2\n\nline 3\n\r\nline 4\n", + "right\n", + ], + ), + Resolved( + "line 5\n", + ), + ], + ) + "# ); }