Skip to content

Commit

Permalink
diff: add short for Diff::for_tokenizer(_, find_line_ranges)
Browse files Browse the repository at this point in the history
Line-by-line diff is common. Let's add a helper method for convenience.
  • Loading branch information
yuja committed Jul 10, 2024
1 parent 44818e9 commit 6a1d926
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 19 deletions.
7 changes: 2 additions & 5 deletions cli/src/diff_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
right_line_range: 1..1,
lines: vec![],
};
let diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_line_ranges);
let diff = Diff::by_line(&[left_content, right_content]);
let mut diff_hunks = diff.hunks().peekable();
while let Some(hunk) = diff_hunks.next() {
match hunk {
Expand Down Expand Up @@ -1128,10 +1128,7 @@ fn get_diff_stat(
// TODO: this matches git's behavior, which is to count the number of newlines
// in the file. but that behavior seems unhelpful; no one really cares how
// many `0xa0` characters are in an image.
let diff = Diff::for_tokenizer(
&[&left_content.contents, &right_content.contents],
diff::find_line_ranges,
);
let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
let mut added = 0;
let mut removed = 0;
for hunk in diff.hunks() {
Expand Down
7 changes: 2 additions & 5 deletions cli/src/merge_tools/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use futures::{StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools;
use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue};
use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue};
use jj_lib::diff::{find_line_ranges, Diff, DiffHunk};
use jj_lib::diff::{Diff, DiffHunk};
use jj_lib::files::{self, ContentHunk, MergeResult};
use jj_lib::matchers::Matcher;
use jj_lib::merge::Merge;
Expand Down Expand Up @@ -225,10 +225,7 @@ fn make_diff_sections(
left_contents: &str,
right_contents: &str,
) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
let diff = Diff::for_tokenizer(
&[left_contents.as_bytes(), right_contents.as_bytes()],
find_line_ranges,
);
let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
let mut sections = Vec::new();
for hunk in diff.hunks() {
match hunk {
Expand Down
11 changes: 3 additions & 8 deletions lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use itertools::Itertools;
use regex::bytes::Regex;

use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue};
use crate::diff::{find_line_ranges, Diff, DiffHunk};
use crate::diff::{Diff, DiffHunk};
use crate::files;
use crate::files::{ContentHunk, MergeResult};
use crate::merge::{Merge, MergeBuilder, MergedTreeValue};
Expand Down Expand Up @@ -259,17 +259,12 @@ pub fn materialize_merge_result(
output.write_all(&left.0)?;
continue;
};
let diff1 = Diff::for_tokenizer(&[&left.0, &right1.0], find_line_ranges)
.hunks()
.collect_vec();
let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
// Check if the diff against the next positive term is better. Since
// we want to preserve the order of the terms, we don't match against
// any later positive terms.
if let Some(right2) = hunk.get_add(add_index + 1) {
let diff2 =
Diff::for_tokenizer(&[&left.0, &right2.0], find_line_ranges)
.hunks()
.collect_vec();
let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
if diff_size(&diff2) < diff_size(&diff1) {
// If the next positive term is a better match, emit
// the current positive term as a snapshot and the next
Expand Down
5 changes: 5 additions & 0 deletions lib/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,11 @@ impl<'input> Diff<'input> {
Diff::for_tokenizer(inputs, |_| vec![])
}

/// Compares `inputs` line by line.
pub fn by_line(inputs: &[&'input [u8]]) -> Self {
Diff::for_tokenizer(inputs, find_line_ranges)
}

// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
// out of 3 inputs match in the differing regions. Perhaps the refine()
// method should be on the hunk instead (probably returning a new Diff)?
Expand Down
2 changes: 1 addition & 1 deletion lib/src/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
let num_diffs = slices.removes().len();
let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();

let diff = Diff::for_tokenizer(&diff_inputs, diff::find_line_ranges);
let diff = Diff::by_line(&diff_inputs);
let mut resolved_hunk = ContentHunk(vec![]);
let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
for diff_hunk in diff.hunks() {
Expand Down

0 comments on commit 6a1d926

Please sign in to comment.