Skip to content

Commit

Permalink
diff: implement some ignore-space rules
Browse files Browse the repository at this point in the history
The added comparison functions correspond to --ignore-all-space and
--ignore-space-change. --ignore-space-at-eol can be combined with the other
flags, so it will have to be implemented as a preprocessing function.
--ignore-blank-lines will also require some change in the tokenizer function.
  • Loading branch information
yuja committed Oct 2, 2024
1 parent 7949a3d commit ff5606b
Showing 1 changed file with 109 additions and 0 deletions.
109 changes: 109 additions & 0 deletions lib/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,38 @@ pub fn find_nonword_ranges(text: &[u8]) -> Vec<Range<usize>> {
.collect()
}

fn bytes_ignore_all_whitespace(text: &[u8]) -> impl Iterator<Item = u8> + '_ {
text.iter().copied().filter(|b| !b.is_ascii_whitespace())
}

fn bytes_ignore_whitespace_amount(text: &[u8]) -> impl Iterator<Item = u8> + '_ {
let mut prev_was_space = false;
text.iter().filter_map(move |&b| {
let was_space = prev_was_space;
let is_space = b.is_ascii_whitespace();
prev_was_space = is_space;
match (was_space, is_space) {
(_, false) => Some(b),
(false, true) => Some(b' '),
(true, true) => None,
}
})
}

fn hash_with_length_suffix<I, H>(data: I, state: &mut H)
where
I: IntoIterator,
I::Item: Hash,
H: Hasher,
{
let mut len: usize = 0;
for d in data {
d.hash(state);
len += 1;
}
state.write_usize(len);
}

/// Compares byte sequences based on a certain equivalence property.
pub trait CompareBytes {
/// Returns true if `left` and `right` are equivalent.
Expand Down Expand Up @@ -111,6 +143,34 @@ impl CompareBytes for CompareBytesExactly {
}
}

/// Compares byte sequences ignoring any whitespace occurrences.
#[derive(Clone, Debug, Default)]
pub struct CompareBytesIgnoreAllWhitespace;

impl CompareBytes for CompareBytesIgnoreAllWhitespace {
fn eq(&self, left: &[u8], right: &[u8]) -> bool {
bytes_ignore_all_whitespace(left).eq(bytes_ignore_all_whitespace(right))
}

fn hash<H: Hasher>(&self, text: &[u8], state: &mut H) {
hash_with_length_suffix(bytes_ignore_all_whitespace(text), state);
}
}

/// Compares byte sequences ignoring changes in whitespace amount.
#[derive(Clone, Debug, Default)]
pub struct CompareBytesIgnoreWhitespaceAmount;

impl CompareBytes for CompareBytesIgnoreWhitespaceAmount {
fn eq(&self, left: &[u8], right: &[u8]) -> bool {
bytes_ignore_whitespace_amount(left).eq(bytes_ignore_whitespace_amount(right))
}

fn hash<H: Hasher>(&self, text: &[u8], state: &mut H) {
hash_with_length_suffix(bytes_ignore_whitespace_amount(text), state);
}
}

/// Compares words (or tokens) under a certain hasher configuration.
#[derive(Clone, Debug, Default)]
struct WordComparator<C, S> {
Expand Down Expand Up @@ -878,6 +938,55 @@ mod tests {
);
}

#[test]
fn test_compare_bytes_ignore_all_whitespace() {
let comp = WordComparator::new(CompareBytesIgnoreAllWhitespace);
let hash = |data: &[u8]| comp.hash_one(data);

assert!(comp.eq(b"", b""));
assert!(comp.eq(b"", b" "));
assert!(comp.eq(b"\t", b"\r"));
assert_eq!(hash(b""), hash(b""));
assert_eq!(hash(b""), hash(b" "));
assert_eq!(hash(b""), hash(b"\t"));
assert_eq!(hash(b""), hash(b"\r"));

assert!(comp.eq(b"ab", b" a b\t"));
assert_eq!(hash(b"ab"), hash(b" a b\t"));

assert!(!comp.eq(b"a", b""));
assert!(!comp.eq(b"a", b" "));
assert!(!comp.eq(b"a", b"ab"));
assert!(!comp.eq(b"ab", b"ba"));
}

#[test]
fn test_compare_bytes_ignore_whitespace_amount() {
let comp = WordComparator::new(CompareBytesIgnoreWhitespaceAmount);
let hash = |data: &[u8]| comp.hash_one(data);

assert!(comp.eq(b"", b""));
assert!(comp.eq(b"\n", b" \n"));
assert!(comp.eq(b"\t", b"\r"));
assert_eq!(hash(b""), hash(b""));
assert_eq!(hash(b" "), hash(b"\n"));
assert_eq!(hash(b" "), hash(b" \n"));
assert_eq!(hash(b" "), hash(b"\t"));
assert_eq!(hash(b" "), hash(b"\r"));

assert!(comp.eq(b"a b c\n", b"a b\tc\r\n"));
assert_eq!(hash(b"a b c\n"), hash(b"a b\tc\r\n"));

assert!(!comp.eq(b"", b" "));
assert!(!comp.eq(b"a", b""));
assert!(!comp.eq(b"a", b" "));
assert!(!comp.eq(b"a", b"a "));
assert!(!comp.eq(b"a", b" a"));
assert!(!comp.eq(b"a", b"ab"));
assert!(!comp.eq(b"ab", b"ba"));
assert!(!comp.eq(b"ab", b"a b"));
}

fn unchanged_ranges(
left: &DiffSource,
right: &DiffSource,
Expand Down

0 comments on commit ff5606b

Please sign in to comment.