Skip to content

Commit

Permalink
diff: highlight word-level changes in git diffs
Browse files Browse the repository at this point in the history
The output looks somewhat similar to color-words diffs. Unified diffs are
verbose, but are easier to follow if adjacent lines are added/removed + modified
for example.

Word-level diffing is forcibly enabled. We can also add a config knob (or
!color condition) to turn it off to save CPU time.

I originally considered disabling highlights in block insertion/deletion, but
that wasn't always great. This can be addressed separately as it also applies
to color-words diffs. #3958
  • Loading branch information
yuja committed Jul 4, 2024
1 parent fa442b6 commit 61ce79b
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 19 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
individually instead of being passed a directory by setting
`merge-tools.$TOOL.diff-invocation-mode="file-by-file"` in config.toml.

* In git diffs, word-level hunks are now highlighted with underline. See [diff
colors and styles](docs/config.md#diff-colors-and-styles) for customization.

### Fixed bugs

## [0.19.0] - 2024-07-03
Expand Down
96 changes: 84 additions & 12 deletions cli/src/diff_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

use std::cmp::max;
use std::collections::VecDeque;
use std::io;
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::{io, mem};

use futures::{try_join, Stream, StreamExt};
use itertools::Itertools;
Expand Down Expand Up @@ -794,36 +794,46 @@ fn git_diff_part(
})
}

#[derive(PartialEq)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum DiffLineType {
Context,
Removed,
Added,
}

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum DiffTokenType {
Matching,
Different,
}

type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;

struct UnifiedDiffHunk<'content> {
left_line_range: Range<usize>,
right_line_range: Range<usize>,
lines: Vec<(DiffLineType, &'content [u8])>,
lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
}

impl<'content> UnifiedDiffHunk<'content> {
fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
let old_len = self.lines.len();
self.lines
.extend(lines.into_iter().map(|line| (DiffLineType::Context, line)));
self.lines.extend(lines.into_iter().map(|line| {
let tokens = vec![(DiffTokenType::Matching, line)];
(DiffLineType::Context, tokens)
}));
self.left_line_range.end += self.lines.len() - old_len;
self.right_line_range.end += self.lines.len() - old_len;
}

fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
let old_len = self.lines.len();
self.lines
.extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
self.left_line_range.end += self.lines.len() - old_len;
}

fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
let old_len = self.lines.len();
self.lines
.extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
Expand Down Expand Up @@ -873,9 +883,9 @@ fn unified_diff_hunks<'content>(
// The next hunk should be of DiffHunk::Different type if any.
current_hunk.extend_context_lines(before_lines.into_iter().rev());
}
DiffHunk::Different(content) => {
let left_lines = content[0].split_inclusive(|b| *b == b'\n');
let right_lines = content[1].split_inclusive(|b| *b == b'\n');
DiffHunk::Different(contents) => {
let [left, right] = contents.try_into().unwrap();
let (left_lines, right_lines) = inline_diff_hunks(left, right);
current_hunk.extend_removed_lines(left_lines);
current_hunk.extend_added_lines(right_lines);
}
Expand All @@ -887,6 +897,60 @@ fn unified_diff_hunks<'content>(
hunks
}

/// Splits line-level hunks into word-level tokens. Returns lists of tokens per
/// line.
fn inline_diff_hunks<'content>(
left_content: &'content [u8],
right_content: &'content [u8],
) -> (Vec<DiffTokenVec<'content>>, Vec<DiffTokenVec<'content>>) {
let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
let mut left_tokens: DiffTokenVec<'content> = vec![];
let mut right_tokens: DiffTokenVec<'content> = vec![];

// Like Diff::default_refinement(), but doesn't try to match up contents by
// lines. left/right_contents are unmatched hunks.
let mut diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_word_ranges);
diff.refine_changed_regions(diff::find_nonword_ranges);
for hunk in diff.hunks() {
match hunk {
DiffHunk::Matching(content) => {
for token in content.split_inclusive(|b| *b == b'\n') {
left_tokens.push((DiffTokenType::Matching, token));
right_tokens.push((DiffTokenType::Matching, token));
if token.ends_with(b"\n") {
left_lines.push(mem::take(&mut left_tokens));
right_lines.push(mem::take(&mut right_tokens));
}
}
}
DiffHunk::Different(contents) => {
let [left, right] = contents.try_into().unwrap();
for token in left.split_inclusive(|b| *b == b'\n') {
left_tokens.push((DiffTokenType::Different, token));
if token.ends_with(b"\n") {
left_lines.push(mem::take(&mut left_tokens));
}
}
for token in right.split_inclusive(|b| *b == b'\n') {
right_tokens.push((DiffTokenType::Different, token));
if token.ends_with(b"\n") {
right_lines.push(mem::take(&mut right_tokens));
}
}
}
}
}

if !left_tokens.is_empty() {
left_lines.push(left_tokens);
}
if !right_tokens.is_empty() {
right_lines.push(right_tokens);
}
(left_lines, right_lines)
}

fn show_unified_diff_hunks(
formatter: &mut dyn Formatter,
left_content: &[u8],
Expand All @@ -902,16 +966,24 @@ fn show_unified_diff_hunks(
hunk.right_line_range.start,
hunk.right_line_range.len()
)?;
for (line_type, content) in hunk.lines {
for (line_type, tokens) in &hunk.lines {
let (label, sigil) = match line_type {
DiffLineType::Context => ("context", " "),
DiffLineType::Removed => ("removed", "-"),
DiffLineType::Added => ("added", "+"),
};
formatter.with_label(label, |formatter| {
write!(formatter, "{sigil}")?;
formatter.write_all(content)
for (token_type, content) in tokens {
match token_type {
DiffTokenType::Matching => formatter.write_all(content)?,
DiffTokenType::Different => formatter
.with_label("token", |formatter| formatter.write_all(content))?,
}
}
Ok(())
})?;
let (_, content) = tokens.last().expect("hunk line must not be empty");
if !content.ends_with(b"\n") {
write!(formatter, "\n\\ No newline at end of file\n")?;
}
Expand Down
10 changes: 5 additions & 5 deletions cli/tests/test_diff_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,23 @@ fn test_diff_basic() {
<<diff file_header::--- a/>><<diff file_header::file1>><<diff file_header::>>
<<diff file_header::+++ /dev/null>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: @@>>
[38;5;1m<<diff removed::->><<diff removed::foo>>[39m
[38;5;1m<<diff removed::->>[4m<<diff removed token::foo>>[24m[39m
<<diff file_header::diff --git a/>><<diff file_header::file2>><<diff file_header:: b/>><<diff file_header::file2>><<diff file_header::>>
<<diff file_header::index >><<diff file_header::523a4a9de8>><<diff file_header::...>><<diff file_header::485b56a572>><<diff file_header:: >><<diff file_header::100644>><<diff file_header::>>
<<diff file_header::--- a/>><<diff file_header::file2>><<diff file_header::>>
<<diff file_header::+++ b/>><<diff file_header::file2>><<diff file_header::>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::2>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::3>><<diff hunk_header:: @@>>
<<diff context:: >><<diff context::foo>>
[38;5;1m<<diff removed::->><<diff removed::baz qux>>[39m
[38;5;2m<<diff added::+>><<diff added::bar>>[39m
[38;5;2m<<diff added::+>><<diff added::baz quux>>[39m
[38;5;1m<<diff removed::->><<diff removed::baz >>[4m<<diff removed token::qux>>[24m<<diff removed::>>[39m
[38;5;2m<<diff added::+>>[4m<<diff added token::bar>>[24m[39m
[38;5;2m<<diff added::+>><<diff added::baz >>[4m<<diff added token::quux>>[24m<<diff added::>>[39m
<<diff file_header::diff --git a/>><<diff file_header::file3>><<diff file_header:: b/>><<diff file_header::file3>><<diff file_header::>>
<<diff file_header::new file mode >><<diff file_header::100644>><<diff file_header::>>
<<diff file_header::index 0000000000..>><<diff file_header::257cc5642c>><<diff file_header::>>
<<diff file_header::--- /dev/null>>
<<diff file_header::+++ b/>><<diff file_header::file3>><<diff file_header::>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: @@>>
[38;5;2m<<diff added::+>><<diff added::foo>>[39m
[38;5;2m<<diff added::+>>[4m<<diff added token::foo>>[24m[39m
"###);

let stdout = test_env.jj_cmd_success(&repo_path, &["diff", "-s", "--git"]);
Expand Down
4 changes: 2 additions & 2 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ ui.default-description = "\n\nTESTED=TODO"

### Diff colors and styles

In color-words diffs, hunks are rendered with underline. You can override the
default style with the following keys:
In color-words and git diffs, word-level hunks are rendered with underline. You
can override the default style with the following keys:

```toml
[colors]
Expand Down

0 comments on commit 61ce79b

Please sign in to comment.