Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diff: consider uncommon words to match only if they have the same count #763

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 37 additions & 54 deletions lib/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,30 +193,19 @@ pub(crate) fn unchanged_ranges(
// the LCS.
let mut uncommon_shared_words = vec![];
while !left_histogram.count_to_words.is_empty() && uncommon_shared_words.is_empty() {
let left_words = left_histogram
.count_to_words
.first_entry()
.map(|x| x.remove())
.unwrap();
let (left_count, left_words) = left_histogram.count_to_words.pop_first().unwrap();
for left_word in left_words {
if right_histogram.word_to_positions.contains_key(left_word) {
uncommon_shared_words.push(left_word);
if let Some(right_positions) = right_histogram.word_to_positions.get(left_word) {
if right_positions.len() == left_count {
uncommon_shared_words.push(left_word);
}
}
}
}
if uncommon_shared_words.is_empty() {
return vec![];
}

// Let's say our inputs are "a b a b" and "a b c c b a b". We will have found
// the least common words to be "a" and "b". We now assume that each
// occurrence of each word lines up in the left and right input. We do that
// by numbering the shared occurrences, effectively instead comparing "a1 b1
// a2 b2" and "a1 b1 c c b2 a2 b". We then walk the common words in the
// right input in order (["a1", "b1", "b2", "a2"]), and record the index of
// that word in the left input ([0,1,3,2]). We then find the LCS and split
// points based on that ([0,1,3] or [0,1,2] are both valid).

// [(index into left_ranges, word, occurrence #)]
let mut left_positions = vec![];
let mut right_positions = vec![];
Expand All @@ -229,8 +218,8 @@ pub(crate) fn unchanged_ranges(
.word_to_positions
.get_mut(uncommon_shared_word)
.unwrap();
let shared_count = min(left_occurrences.len(), right_occurrences.len());
for occurrence in 0..shared_count {
assert_eq!(left_occurrences.len(), right_occurrences.len());
for occurrence in 0..left_occurrences.len() {
left_positions.push((
left_occurrences[occurrence],
uncommon_shared_word,
Expand Down Expand Up @@ -771,27 +760,31 @@ mod tests {

#[test]
fn test_unchanged_ranges_non_unique_removed() {
// We used to consider the first two "a" in the first input to match the two
// "a"s in the second input. We no longer do.
assert_eq!(
unchanged_ranges(
b"a a a a",
b"a b a c",
&[0..1, 2..3, 4..5, 6..7],
&[0..1, 2..3, 4..5, 6..7],
),
vec![(0..1, 0..1), (2..3, 4..5)]
vec![]
);
}

#[test]
fn test_unchanged_ranges_non_unique_added() {
// We used to consider the first two "a" in the first input to match the two
// "a"s in the second input. We no longer do.
assert_eq!(
unchanged_ranges(
b"a b a c",
b"a a a a",
&[0..1, 2..3, 4..5, 6..7],
&[0..1, 2..3, 4..5, 6..7],
),
vec![(0..1, 0..1), (4..5, 2..3)]
vec![]
);
}

Expand Down Expand Up @@ -918,8 +911,8 @@ mod tests {
assert_eq!(
diff.hunks().collect_vec(),
vec![
DiffHunk::Matching(b"a "),
DiffHunk::Different(vec![b"b ", b"X ", b""]),
DiffHunk::Matching(b"a"),
DiffHunk::Different(vec![b" b ", b" X ", b" "]),
DiffHunk::Matching(b"c"),
DiffHunk::Different(vec![b"", b"", b" X"]),
]
Expand Down Expand Up @@ -956,8 +949,8 @@ mod tests {
assert_eq!(
diff(b"a z", b"a S z"),
vec![
DiffHunk::Matching(b"a "),
DiffHunk::Different(vec![b"", b"S "]),
DiffHunk::Matching(b"a"),
DiffHunk::Different(vec![b" ", b" S "]),
DiffHunk::Matching(b"z"),
]
);
Expand All @@ -968,10 +961,10 @@ mod tests {
assert_eq!(
diff(b"a R R S S z", b"a S S R R z"),
vec![
DiffHunk::Matching(b"a "),
DiffHunk::Different(vec![b"R R ", b""]),
DiffHunk::Matching(b"S S "),
DiffHunk::Different(vec![b"", b"R R "]),
DiffHunk::Matching(b"a"),
DiffHunk::Different(vec![b" R R ", b" "]),
DiffHunk::Matching(b"S S"),
DiffHunk::Different(vec![b" ", b" R R "]),
DiffHunk::Matching(b"z")
],
);
Expand All @@ -985,18 +978,14 @@ mod tests {
b"a r r x q y z q b y q x r r c",
),
vec![
DiffHunk::Matching(b"a "),
DiffHunk::Different(vec![b"q", b"r"]),
DiffHunk::Matching(b" "),
DiffHunk::Different(vec![b"", b"r "]),
DiffHunk::Matching(b"x q y "),
DiffHunk::Different(vec![b"q ", b""]),
DiffHunk::Matching(b"z q b "),
DiffHunk::Different(vec![b"q ", b""]),
DiffHunk::Matching(b"y q x "),
DiffHunk::Different(vec![b"q", b"r"]),
DiffHunk::Matching(b" "),
DiffHunk::Different(vec![b"", b"r "]),
DiffHunk::Matching(b"a"),
DiffHunk::Different(vec![b" q ", b" r r "]),
DiffHunk::Matching(b"x q y"),
DiffHunk::Different(vec![b" q ", b" "]),
DiffHunk::Matching(b"z q b"),
DiffHunk::Different(vec![b" q ", b" "]),
DiffHunk::Matching(b"y q x"),
DiffHunk::Different(vec![b" q ", b" r r "]),
DiffHunk::Matching(b"c"),
]
);
Expand All @@ -1014,10 +1003,10 @@ mod tests {
b" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) -> io::Result<()> {\n self.styler().write_fmt(fmt)\n"
),
vec![
DiffHunk::Matching(b" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) "),
DiffHunk::Different(vec![b"", b"-> io::Result<()> "]),
DiffHunk::Matching(b"{\n self.styler().write_fmt(fmt)"),
DiffHunk::Different(vec![b".unwrap()", b""]),
DiffHunk::Matching(b" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_"),
DiffHunk::Different(vec![b">) ", b">) -> io::Result<()> "]),
DiffHunk::Matching(b"{\n self.styler().write_fmt(fmt"),
DiffHunk::Different(vec![b").unwrap()", b")"]),
DiffHunk::Matching(b"\n")
]
);
Expand Down Expand Up @@ -1168,22 +1157,16 @@ int main(int argc, char **argv)
"##,
),
vec![
DiffHunk::Matching(b"/*\n * GIT - The information manager from hell\n *\n * Copyright (C) Linus Torvalds, 2005\n */\n#include \"#cache.h\"\n\n"),
DiffHunk::Different(vec![b"", b"static void create_directories(const char *path)\n{\n\tint len = strlen(path);\n\tchar *buf = malloc(len + 1);\n\tconst char *slash = path;\n\n\twhile ((slash = strchr(slash+1, \'/\')) != NULL) {\n\t\tlen = slash - path;\n\t\tmemcpy(buf, path, len);\n\t\tbuf[len] = 0;\n\t\tmkdir(buf, 0700);\n\t}\n}\n\nstatic int create_file(const char *path)\n{\n\tint fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\tif (fd < 0) {\n\t\tif (errno == ENOENT) {\n\t\t\tcreate_directories(path);\n\t\t\tfd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\t\t}\n\t}\n\treturn fd;\n}\n\n"]),
DiffHunk::Matching(b"/*\n * GIT - The information manager from hell\n *\n * Copyright (C) Linus Torvalds, 2005\n */\n#include \"#cache.h\"\n"),
DiffHunk::Different(vec![b"\n", b"\nstatic void create_directories(const char *path)\n{\n\tint len = strlen(path);\n\tchar *buf = malloc(len + 1);\n\tconst char *slash = path;\n\n\twhile ((slash = strchr(slash+1, \'/\')) != NULL) {\n\t\tlen = slash - path;\n\t\tmemcpy(buf, path, len);\n\t\tbuf[len] = 0;\n\t\tmkdir(buf, 0700);\n\t}\n}\n\nstatic int create_file(const char *path)\n{\n\tint fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\tif (fd < 0) {\n\t\tif (errno == ENOENT) {\n\t\t\tcreate_directories(path);\n\t\t\tfd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\t\t}\n\t}\n\treturn fd;\n}\n\n"]),
DiffHunk::Matching(b"static int unpack(unsigned char *sha1)\n{\n\tvoid *buffer;\n\tunsigned long size;\n\tchar type[20];\n\n\tbuffer = read_sha1_file(sha1, type, &size);\n\tif (!buffer)\n\t\tusage(\"unable to read sha1 file\");\n\tif (strcmp(type, \"tree\"))\n\t\tusage(\"expected a \'tree\' node\");\n\twhile (size) {\n\t\tint len = strlen(buffer)+1;\n\t\tunsigned char *sha1 = buffer + len;\n\t\tchar *path = strchr(buffer, \' \')+1;\n"),
DiffHunk::Different(vec![b"", b"\t\tchar *data;\n\t\tunsigned long filesize;\n"]),
DiffHunk::Matching(b"\t\tunsigned int mode;\n"),
DiffHunk::Different(vec![b"", b"\t\tint fd;\n\n"]),
DiffHunk::Matching(b"\t\tif (size < len + 20 || sscanf(buffer, \"%o\", &mode) != 1)\n\t\t\tusage(\"corrupt \'tree\' file\");\n\t\tbuffer = sha1 + 20;\n\t\tsize -= len + 20;\n\t\t"),
DiffHunk::Different(vec![b"printf", b"data = read_sha1_file"]),
DiffHunk::Matching(b"("),
DiffHunk::Different(vec![b"\"%o %s (%s)\\n\", mode, path, sha1_to_hex(", b""]),
DiffHunk::Different(vec![b"printf(\"%o %s (%s)\\n\", mode, path, sha1_to_hex(", b"data = read_sha1_file("]),
DiffHunk::Matching(b"sha1"),
DiffHunk::Different(vec![b"", b", type, &filesize"]),
DiffHunk::Matching(b")"),
DiffHunk::Different(vec![b")", b""]),
DiffHunk::Matching(b";\n"),
DiffHunk::Different(vec![b"", b"\t\tif (!data || strcmp(type, \"blob\"))\n\t\t\tusage(\"tree file refers to bad file data\");\n\t\tfd = create_file(path);\n\t\tif (fd < 0)\n\t\t\tusage(\"unable to create file\");\n\t\tif (write(fd, data, filesize) != filesize)\n\t\t\tusage(\"unable to write file\");\n\t\tfchmod(fd, mode);\n\t\tclose(fd);\n\t\tfree(data);\n"]),
DiffHunk::Different(vec![b"));\n", b", type, &filesize);\n\t\tif (!data || strcmp(type, \"blob\"))\n\t\t\tusage(\"tree file refers to bad file data\");\n\t\tfd = create_file(path);\n\t\tif (fd < 0)\n\t\t\tusage(\"unable to create file\");\n\t\tif (write(fd, data, filesize) != filesize)\n\t\t\tusage(\"unable to write file\");\n\t\tfchmod(fd, mode);\n\t\tclose(fd);\n\t\tfree(data);\n"]),
DiffHunk::Matching(b"\t}\n\treturn 0;\n}\n\nint main(int argc, char **argv)\n{\n\tint fd;\n\tunsigned char sha1[20];\n\n\tif (argc != 2)\n\t\tusage(\"read-tree <key>\");\n\tif (get_sha1_hex(argv[1], sha1) < 0)\n\t\tusage(\"read-tree <key>\");\n\tsha1_file_directory = getenv(DB_ENVIRONMENT);\n\tif (!sha1_file_directory)\n\t\tsha1_file_directory = DEFAULT_DB_ENVIRONMENT;\n\tif (unpack(sha1) < 0)\n\t\tusage(\"unpack failed\");\n\treturn 0;\n}\n")
]
);
Expand Down
26 changes: 12 additions & 14 deletions lib/src/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,8 @@ mod tests {
])
);
// One side changes a line and adds a block after. The other side just adds the
// same block. This currently behaves as one would reasonably hope, but
// it's likely that it will change if when we fix
// https://github.com/martinvonz/jj/issues/761. Git and Mercurial both duplicate
// same block. This currently results in a conflict, but it's likely that it
// will change if when we fix https://github.com/martinvonz/jj/issues/761. Git and Mercurial both duplicate
// the block in the result.
assert_eq!(
merge(
Expand Down Expand Up @@ -415,17 +414,16 @@ b {
"
],
),
MergeResult::Resolved(hunk(
b"\
a {
q
}

b {
x
}
"
))
MergeResult::Conflict(vec![
Conflict::resolved(hunk(b"a {\n")),
Conflict::new(
vec![hunk(b" p\n}\n")],
vec![
hunk(b" q\n}\n\nb {\n x\n}\n"),
hunk(b" p\n}\n\nb {\n x\n}\n"),
]
)
])
);
}
}
3 changes: 2 additions & 1 deletion tests/test_obslog_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ fn test_obslog_with_or_without_diff() {
@ rlvkpnrzqnoo [email protected] 2001-02-03 04:05:10.000 +07:00 66b42ad36073
│ my description
│ Resolved conflict in file1:
│ 1 1: <<<<<<<resolved
│ 1 : <<<<<<<
│ 2 : %%%%%%%
│ 3 : foo
│ 4 : +bar
│ 5 : +++++++
│ 6 : >>>>>>>
│ 1: resolved
◉ rlvkpnrzqnoo hidden [email protected] 2001-02-03 04:05:09.000 +07:00 af536e5af67e conflict
│ my description
◉ rlvkpnrzqnoo hidden [email protected] 2001-02-03 04:05:09.000 +07:00 6fbba7bcb590
Expand Down
18 changes: 12 additions & 6 deletions tests/test_resolve_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,14 @@ fn test_resolution() {
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["diff"]),
@r###"
Resolved conflict in file:
1 1: <<<<<<<resolution
1 : <<<<<<<
2 : %%%%%%%
3 : -base
4 : +a
5 : +++++++
6 : b
7 : >>>>>>>
1: resolution
"###);
insta::assert_snapshot!(test_env.jj_cmd_cli_error(&repo_path, &["resolve", "--list"]),
@r###"
Expand Down Expand Up @@ -142,13 +143,14 @@ fn test_resolution() {
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["diff"]),
@r###"
Resolved conflict in file:
1 1: <<<<<<<resolution
1 : <<<<<<<
2 : %%%%%%%
3 : -base
4 : +a
5 : +++++++
6 : b
7 : >>>>>>>
1: resolution
"###);

// Check that if merge tool leaves conflict markers in output file and
Expand Down Expand Up @@ -644,11 +646,12 @@ fn test_multiple_conflicts() {
Resolved conflict in another_file:
1 : <<<<<<<
2 : %%%%%%%
3 1: -secondresolution baseanother_file
3 : -second base
4 : +second a
5 : +++++++
6 : second b
7 : >>>>>>>
1: resolution another_file
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["resolve", "--list"]),
@r###"
Expand Down Expand Up @@ -682,11 +685,12 @@ fn test_multiple_conflicts() {
Resolved conflict in another_file:
1 : <<<<<<<
2 : %%%%%%%
3 1: first resolution for auto-secondchosen basefile
3 1: first resolution for auto-second base
4 : +second a
5 : +++++++
6 : second b
7 : >>>>>>>
1: chosen file
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["resolve", "--list"]),
@r###"
Expand All @@ -704,19 +708,21 @@ fn test_multiple_conflicts() {
Resolved conflict in another_file:
1 : <<<<<<<
2 : %%%%%%%
3 1: first resolution for auto-secondchosen basefile
3 1: first resolution for auto-second base
4 : +second a
5 : +++++++
6 : second b
7 : >>>>>>>
1: chosen file
Resolved conflict in this_file_has_a_very_long_name_to_test_padding:
1 : <<<<<<<
2 : %%%%%%%
3 1: second resolution for auto-firstchosen basefile
3 1: second resolution for auto-first base
4 : +first a
5 : +++++++
6 : first b
7 : >>>>>>>
1: chosen file
"###);

insta::assert_snapshot!(test_env.jj_cmd_cli_error(&repo_path, &["resolve", "--list"]),
Expand Down