From 8148ce070573b3df21f217b2354b9e66f33e00c2 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 8 Jul 2024 19:59:42 +0900 Subject: [PATCH] diff: add short for Diff::for_tokenizer(_, find_line_ranges) Line-by-line diff is common. Let's add a helper method for convenience. --- cli/src/diff_util.rs | 7 ++----- cli/src/merge_tools/builtin.rs | 7 ++----- lib/src/conflicts.rs | 11 +++-------- lib/src/diff.rs | 5 +++++ lib/src/files.rs | 2 +- 5 files changed, 13 insertions(+), 19 deletions(-) diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs index af8a9a3607..f730b78c13 100644 --- a/cli/src/diff_util.rs +++ b/cli/src/diff_util.rs @@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>( right_line_range: 1..1, lines: vec![], }; - let diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_line_ranges); + let diff = Diff::by_line(&[left_content, right_content]); let mut diff_hunks = diff.hunks().peekable(); while let Some(hunk) = diff_hunks.next() { match hunk { @@ -1128,10 +1128,7 @@ fn get_diff_stat( // TODO: this matches git's behavior, which is to count the number of newlines // in the file. but that behavior seems unhelpful; no one really cares how // many `0xa0` characters are in an image. - let diff = Diff::for_tokenizer( - &[&left_content.contents, &right_content.contents], - diff::find_line_ranges, - ); + let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]); let mut added = 0; let mut removed = 0; for hunk in diff.hunks() { diff --git a/cli/src/merge_tools/builtin.rs b/cli/src/merge_tools/builtin.rs index e4c9b424e8..01ac8f6dd8 100644 --- a/cli/src/merge_tools/builtin.rs +++ b/cli/src/merge_tools/builtin.rs @@ -6,7 +6,7 @@ use futures::{StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue}; use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue}; -use jj_lib::diff::{find_line_ranges, Diff, DiffHunk}; +use jj_lib::diff::{Diff, DiffHunk}; use jj_lib::files::{self, ContentHunk, MergeResult}; use jj_lib::matchers::Matcher; use jj_lib::merge::Merge; @@ -225,10 +225,7 @@ fn make_diff_sections( left_contents: &str, right_contents: &str, ) -> Result>, BuiltinToolError> { - let diff = Diff::for_tokenizer( - &[left_contents.as_bytes(), right_contents.as_bytes()], - find_line_ranges, - ); + let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]); let mut sections = Vec::new(); for hunk in diff.hunks() { match hunk { diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 4da59c809c..c90aa27714 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -22,7 +22,7 @@ use itertools::Itertools; use regex::bytes::Regex; use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue}; -use crate::diff::{find_line_ranges, Diff, DiffHunk}; +use crate::diff::{Diff, DiffHunk}; use crate::files; use crate::files::{ContentHunk, MergeResult}; use crate::merge::{Merge, MergeBuilder, MergedTreeValue}; @@ -259,17 +259,12 @@ pub fn materialize_merge_result( output.write_all(&left.0)?; continue; }; - let diff1 = Diff::for_tokenizer(&[&left.0, &right1.0], find_line_ranges) - .hunks() - .collect_vec(); + let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec(); // Check if the diff against the next positive term is better. Since // we want to preserve the order of the terms, we don't match against // any later positive terms. if let Some(right2) = hunk.get_add(add_index + 1) { - let diff2 = - Diff::for_tokenizer(&[&left.0, &right2.0], find_line_ranges) - .hunks() - .collect_vec(); + let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec(); if diff_size(&diff2) < diff_size(&diff1) { // If the next positive term is a better match, emit // the current positive term as a snapshot and the next diff --git a/lib/src/diff.rs b/lib/src/diff.rs index 048c0eda50..042b9c51af 100644 --- a/lib/src/diff.rs +++ b/lib/src/diff.rs @@ -462,6 +462,11 @@ impl<'input> Diff<'input> { Diff::for_tokenizer(inputs, |_| vec![]) } + /// Compares `inputs` line by line. + pub fn by_line(inputs: &[&'input [u8]]) -> Self { + Diff::for_tokenizer(inputs, find_line_ranges) + } + // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2 // out of 3 inputs match in the differing regions. Perhaps the refine() // method should be on the hunk instead (probably returning a new Diff)? diff --git a/lib/src/files.rs b/lib/src/files.rs index deb1c23fd5..8203ae3bc5 100644 --- a/lib/src/files.rs +++ b/lib/src/files.rs @@ -165,7 +165,7 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult { let num_diffs = slices.removes().len(); let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec(); - let diff = Diff::for_tokenizer(&diff_inputs, diff::find_line_ranges); + let diff = Diff::by_line(&diff_inputs); let mut resolved_hunk = ContentHunk(vec![]); let mut merge_hunks: Vec> = vec![]; for diff_hunk in diff.hunks() {