From 8148ce070573b3df21f217b2354b9e66f33e00c2 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Mon, 8 Jul 2024 19:59:42 +0900
Subject: [PATCH] diff: add short for Diff::for_tokenizer(_, find_line_ranges)

Line-by-line diff is common. Let's add a helper method for convenience.
---
 cli/src/diff_util.rs           |  7 ++-----
 cli/src/merge_tools/builtin.rs |  7 ++-----
 lib/src/conflicts.rs           | 11 +++--------
 lib/src/diff.rs                |  5 +++++
 lib/src/files.rs               |  2 +-
 5 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs
index af8a9a3607..f730b78c13 100644
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
         right_line_range: 1..1,
         lines: vec![],
     };
-    let diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_line_ranges);
+    let diff = Diff::by_line(&[left_content, right_content]);
     let mut diff_hunks = diff.hunks().peekable();
     while let Some(hunk) = diff_hunks.next() {
         match hunk {
@@ -1128,10 +1128,7 @@ fn get_diff_stat(
     // TODO: this matches git's behavior, which is to count the number of newlines
     // in the file. but that behavior seems unhelpful; no one really cares how
     // many `0xa0` characters are in an image.
-    let diff = Diff::for_tokenizer(
-        &[&left_content.contents, &right_content.contents],
-        diff::find_line_ranges,
-    );
+    let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
     let mut added = 0;
     let mut removed = 0;
     for hunk in diff.hunks() {
diff --git a/cli/src/merge_tools/builtin.rs b/cli/src/merge_tools/builtin.rs
index e4c9b424e8..01ac8f6dd8 100644
--- a/cli/src/merge_tools/builtin.rs
+++ b/cli/src/merge_tools/builtin.rs
@@ -6,7 +6,7 @@ use futures::{StreamExt, TryFutureExt, TryStreamExt};
 use itertools::Itertools;
 use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue};
 use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue};
-use jj_lib::diff::{find_line_ranges, Diff, DiffHunk};
+use jj_lib::diff::{Diff, DiffHunk};
 use jj_lib::files::{self, ContentHunk, MergeResult};
 use jj_lib::matchers::Matcher;
 use jj_lib::merge::Merge;
@@ -225,10 +225,7 @@ fn make_diff_sections(
     left_contents: &str,
     right_contents: &str,
 ) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
-    let diff = Diff::for_tokenizer(
-        &[left_contents.as_bytes(), right_contents.as_bytes()],
-        find_line_ranges,
-    );
+    let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
     let mut sections = Vec::new();
     for hunk in diff.hunks() {
         match hunk {
diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs
index 4da59c809c..c90aa27714 100644
--- a/lib/src/conflicts.rs
+++ b/lib/src/conflicts.rs
@@ -22,7 +22,7 @@ use itertools::Itertools;
 use regex::bytes::Regex;
 
 use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue};
-use crate::diff::{find_line_ranges, Diff, DiffHunk};
+use crate::diff::{Diff, DiffHunk};
 use crate::files;
 use crate::files::{ContentHunk, MergeResult};
 use crate::merge::{Merge, MergeBuilder, MergedTreeValue};
@@ -259,17 +259,12 @@ pub fn materialize_merge_result(
                             output.write_all(&left.0)?;
                             continue;
                         };
-                        let diff1 = Diff::for_tokenizer(&[&left.0, &right1.0], find_line_ranges)
-                            .hunks()
-                            .collect_vec();
+                        let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
                         // Check if the diff against the next positive term is better. Since
                         // we want to preserve the order of the terms, we don't match against
                         // any later positive terms.
                         if let Some(right2) = hunk.get_add(add_index + 1) {
-                            let diff2 =
-                                Diff::for_tokenizer(&[&left.0, &right2.0], find_line_ranges)
-                                    .hunks()
-                                    .collect_vec();
+                            let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
                             if diff_size(&diff2) < diff_size(&diff1) {
                                 // If the next positive term is a better match, emit
                                 // the current positive term as a snapshot and the next
diff --git a/lib/src/diff.rs b/lib/src/diff.rs
index 048c0eda50..042b9c51af 100644
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@@ -462,6 +462,11 @@ impl<'input> Diff<'input> {
         Diff::for_tokenizer(inputs, |_| vec![])
     }
 
+    /// Compares `inputs` line by line.
+    pub fn by_line(inputs: &[&'input [u8]]) -> Self {
+        Diff::for_tokenizer(inputs, find_line_ranges)
+    }
+
     // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
     // out of 3 inputs match in the differing regions. Perhaps the refine()
     // method should be on the hunk instead (probably returning a new Diff)?
diff --git a/lib/src/files.rs b/lib/src/files.rs
index deb1c23fd5..8203ae3bc5 100644
--- a/lib/src/files.rs
+++ b/lib/src/files.rs
@@ -165,7 +165,7 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
     let num_diffs = slices.removes().len();
     let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();
 
-    let diff = Diff::for_tokenizer(&diff_inputs, diff::find_line_ranges);
+    let diff = Diff::by_line(&diff_inputs);
     let mut resolved_hunk = ContentHunk(vec![]);
     let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
     for diff_hunk in diff.hunks() {