diff --git a/cli/tests/test_obslog_command.rs b/cli/tests/test_obslog_command.rs index c29d3cc008..8112891344 100644 --- a/cli/tests/test_obslog_command.rs +++ b/cli/tests/test_obslog_command.rs @@ -59,13 +59,13 @@ fn test_obslog_with_or_without_diff() { @ rlvkpnrz test.user@example.com 2001-02-03 08:05:10 66b42ad3 │ my description │ Resolved conflict in file1: - │ 1 1: <<<<<<< Conflict 1 of 1resolved + │ 1 : <<<<<<< Conflict 1 of 1 │ 2 : %%%%%%% Changes from base to side #1 │ 3 : -foo │ 4 : +++++++ Contents of side #2 │ 5 : foo │ 6 : bar - │ 7 : >>>>>>> Conflict 1 of 1 ends + │ 7 1: >>>>>>> Conflict 1 of 1 endsresolved ◉ rlvkpnrz hidden test.user@example.com 2001-02-03 08:05:09 cf73917d conflict │ my description ◉ rlvkpnrz hidden test.user@example.com 2001-02-03 08:05:09 068224a7 diff --git a/lib/src/diff.rs b/lib/src/diff.rs index ccf42577c5..dc31e1d851 100644 --- a/lib/src/diff.rs +++ b/lib/src/diff.rs @@ -230,12 +230,18 @@ fn unchanged_ranges_lcs( // the LCS. let Some(uncommon_shared_words) = left_histogram .count_to_words - .values() - .map(|left_words| -> Vec<&[u8]> { + .iter() + .map(|(left_count, left_words)| -> Vec<&[u8]> { left_words .iter() .copied() - .filter(|left_word| right_histogram.word_to_positions.contains_key(left_word)) + .filter(|left_word| { + let right_count = right_histogram + .word_to_positions + .get(left_word) + .map_or(0, |right_positions| right_positions.len()); + *left_count == right_count + }) .collect() }) .find(|words| !words.is_empty()) @@ -243,23 +249,14 @@ fn unchanged_ranges_lcs( return vec![]; }; - // Let's say our inputs are "a b a b" and "a b c c b a b". We will have found - // the least common words to be "a" and "b". We now assume that each - // occurrence of each word lines up in the left and right input. We do that - // by numbering the shared occurrences, effectively instead comparing "a1 b1 - // a2 b2" and "a1 b1 c c b2 a2 b". We then walk the common words in the - // right input in order (["a1", "b1", "b2", "a2"]), and record the index of - // that word in the left input ([0,1,3,2]). We then find the LCS and split - // points based on that ([0,1,3] or [0,1,2] are both valid). - // [(index into left_ranges, word, occurrence #)] let mut left_positions = vec![]; let mut right_positions = vec![]; for uncommon_shared_word in uncommon_shared_words { let left_occurrences = &left_histogram.word_to_positions[uncommon_shared_word]; let right_occurrences = &right_histogram.word_to_positions[uncommon_shared_word]; - let shared_count = min(left_occurrences.len(), right_occurrences.len()); - for occurrence in 0..shared_count { + assert_eq!(left_occurrences.len(), right_occurrences.len()); + for occurrence in 0..left_occurrences.len() { left_positions.push(( left_occurrences[occurrence], uncommon_shared_word, @@ -800,6 +797,8 @@ mod tests { #[test] fn test_unchanged_ranges_non_unique_removed() { + // We used to consider the first two "a" in the first input to match the two + // "a"s in the second input. We no longer do. assert_eq!( unchanged_ranges( b"a a a a", @@ -807,7 +806,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 0..1), (2..3, 4..5)] + vec![(0..1, 0..1)] ); assert_eq!( unchanged_ranges( @@ -816,7 +815,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 2..3), (2..3, 6..7)] + vec![(6..7, 6..7)] ); assert_eq!( unchanged_ranges( @@ -825,7 +824,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 2..3), (2..3, 4..5)] + vec![] ); assert_eq!( unchanged_ranges( @@ -834,12 +833,14 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 0..1), (2..3, 6..7)] + vec![(0..1, 0..1), (6..7, 6..7)] ); } #[test] fn test_unchanged_ranges_non_unique_added() { + // We used to consider the first two "a" in the first input to match the two + // "a"s in the second input. We no longer do. assert_eq!( unchanged_ranges( b"a b a c", @@ -847,7 +848,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 0..1), (4..5, 2..3)] + vec![(0..1, 0..1)] ); assert_eq!( unchanged_ranges( @@ -856,7 +857,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(2..3, 0..1), (6..7, 2..3)] + vec![(6..7, 6..7)] ); assert_eq!( unchanged_ranges( @@ -865,7 +866,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(2..3, 0..1), (4..5, 2..3)] + vec![] ); assert_eq!( unchanged_ranges( @@ -874,7 +875,7 @@ mod tests { &[0..1, 2..3, 4..5, 6..7], &[0..1, 2..3, 4..5, 6..7], ), - vec![(0..1, 0..1), (6..7, 2..3)] + vec![(0..1, 0..1), (6..7, 6..7)] ); } @@ -1258,16 +1259,12 @@ int main(int argc, char **argv) DiffHunk::Matching(b"\t\tunsigned int mode;\n"), DiffHunk::Different(vec![b"", b"\t\tint fd;\n\n"]), DiffHunk::Matching(b"\t\tif (size < len + 20 || sscanf(buffer, \"%o\", &mode) != 1)\n\t\t\tusage(\"corrupt \'tree\' file\");\n\t\tbuffer = sha1 + 20;\n\t\tsize -= len + 20;\n\t\t"), - DiffHunk::Different(vec![b"printf", b"data = read_sha1_file"]), - DiffHunk::Matching(b"("), - DiffHunk::Different(vec![b"\"%o %s (%s)\\n\", mode, path, sha1_to_hex(", b""]), - DiffHunk::Matching(b"sha1"), - DiffHunk::Different(vec![b"", b", type, &filesize"]), - DiffHunk::Matching(b")"), - DiffHunk::Different(vec![b")", b""]), - DiffHunk::Matching(b";\n"), - DiffHunk::Different(vec![b"", b"\t\tif (!data || strcmp(type, \"blob\"))\n\t\t\tusage(\"tree file refers to bad file data\");\n\t\tfd = create_file(path);\n\t\tif (fd < 0)\n\t\t\tusage(\"unable to create file\");\n\t\tif (write(fd, data, filesize) != filesize)\n\t\t\tusage(\"unable to write file\");\n\t\tfchmod(fd, mode);\n\t\tclose(fd);\n\t\tfree(data);\n"]), - DiffHunk::Matching(b"\t}\n\treturn 0;\n}\n\nint main(int argc, char **argv)\n{\n\tint fd;\n\tunsigned char sha1[20];\n\n\tif (argc != 2)\n\t\tusage(\"read-tree \");\n\tif (get_sha1_hex(argv[1], sha1) < 0)\n\t\tusage(\"read-tree \");\n\tsha1_file_directory = getenv(DB_ENVIRONMENT);\n\tif (!sha1_file_directory)\n\t\tsha1_file_directory = DEFAULT_DB_ENVIRONMENT;\n\tif (unpack(sha1) < 0)\n\t\tusage(\"unpack failed\");\n\treturn 0;\n}\n") + DiffHunk::Different(vec![b"printf(\"%o %s (%s)\\n\", mode, path,", b"data ="]), + DiffHunk::Matching(b" "), + DiffHunk::Different(vec![b"sha1_to_hex", b"read_sha1_file"]), + DiffHunk::Matching(b"(sha1"), + DiffHunk::Different(vec![b")", b", type, &filesize);\n\t\tif (!data || strcmp(type, \"blob\"))\n\t\t\tusage(\"tree file refers to bad file data\");\n\t\tfd = create_file(path);\n\t\tif (fd < 0)\n\t\t\tusage(\"unable to create file\");\n\t\tif (write(fd, data, filesize) != filesize)\n\t\t\tusage(\"unable to write file\");\n\t\tfchmod(fd, mode);\n\t\tclose(fd);\n\t\tfree(data"]), + DiffHunk::Matching(b");\n\t}\n\treturn 0;\n}\n\nint main(int argc, char **argv)\n{\n\tint fd;\n\tunsigned char sha1[20];\n\n\tif (argc != 2)\n\t\tusage(\"read-tree \");\n\tif (get_sha1_hex(argv[1], sha1) < 0)\n\t\tusage(\"read-tree \");\n\tsha1_file_directory = getenv(DB_ENVIRONMENT);\n\tif (!sha1_file_directory)\n\t\tsha1_file_directory = DEFAULT_DB_ENVIRONMENT;\n\tif (unpack(sha1) < 0)\n\t\tusage(\"unpack failed\");\n\treturn 0;\n}\n"), ] ); } diff --git a/lib/src/files.rs b/lib/src/files.rs index d7ecd73e9c..deb1c23fd5 100644 --- a/lib/src/files.rs +++ b/lib/src/files.rs @@ -373,10 +373,20 @@ mod tests { ]) ); // One side changes a line and adds a block after. The other side just adds the - // same block. This currently behaves as one would reasonably hope, but - // it's likely that it will change if when we fix - // https://github.com/martinvonz/jj/issues/761. Git and Mercurial both duplicate - // the block in the result. + // same block. You might expect the last block would be deduplicated. However, + // the changes in the first side can be parsed as follows: + // ``` + // a { + // - p + // + q + // +} + // + + // +b { + // + x + // } + // ``` + // Therefore, the first side modifies the block `a { .. }`, and the second side + // adds `b { .. }`. Git and Mercurial both duplicate the block in the result. assert_eq!( merge( &[b"\ @@ -411,6 +421,10 @@ a { q } +b { + x +} + b { x }