Skip to content

Commit

Permalink
diff: optimize allocation of histogram entries for unique words
Browse files Browse the repository at this point in the history
```
group                             new                     old
-----                             ---                     ---
bench_diff_git_git_read_tree_c    1.00     34.5±0.26µs    1.32     45.7±0.11µs
bench_diff_lines/modified/10k     1.00     28.2±0.10ms    1.19     33.5±0.69ms
bench_diff_lines/modified/1k      1.00      2.6±0.01ms    1.15      3.0±0.01ms
bench_diff_lines/reversed/10k     1.00     21.5±0.22ms    1.08     23.3±0.18ms
bench_diff_lines/reversed/1k      1.00   364.8±11.96µs    1.22    445.1±8.99µs
bench_diff_lines/unchanged/10k    1.00  1761.3±13.85µs    1.66      2.9±0.07ms
bench_diff_lines/unchanged/1k     1.00    163.6±1.25µs    1.47    240.7±2.72µs
```

```
% hyperfine --sort command --warmup 3 --runs 5 -L bin jj-0,jj-1 \
  'target/release-with-debug/{bin} --ignore-working-copy \
  file annotate lib/src/revset.rs'
Benchmark 1: target/release-with-debug/jj-0 ..
  Time (mean ± σ):      1.144 s ±  0.011 s    [User: 1.088 s, System: 0.053 s]
  Range (min … max):    1.131 s …  1.159 s    5 runs

Benchmark 2: target/release-with-debug/jj-1 ..
  Time (mean ± σ):      1.026 s ±  0.008 s    [User: 0.975 s, System: 0.048 s]
  Range (min … max):    1.015 s …  1.035 s    5 runs
```
  • Loading branch information
yuja committed Nov 21, 2024
1 parent 7906b3f commit 1b4964b
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions lib/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,9 @@ struct Histogram<'input> {
word_to_positions: HashTable<HistogramEntry<'input>>,
}

type HistogramEntry<'input> = (HashedWord<'input>, Vec<LocalWordPosition>);
// Many of the words are unique. We can inline up to 2 word positions (16 bytes
// on 64-bit platform) in SmallVec for free.
type HistogramEntry<'input> = (HashedWord<'input>, SmallVec<[LocalWordPosition; 2]>);

impl<'input> Histogram<'input> {
fn calculate<C: CompareBytes, S: BuildHasher>(
Expand All @@ -317,19 +319,21 @@ impl<'input> Histogram<'input> {
) -> Self {
let mut word_to_positions: HashTable<HistogramEntry> = HashTable::new();
for (i, word) in source.hashed_words().enumerate() {
let (_, positions) = word_to_positions
let pos = LocalWordPosition(i);
word_to_positions
.entry(
word.hash,
|(w, _)| comp.eq(w.text, word.text),
|(w, _)| w.hash,
)
.or_insert_with(|| (word, vec![]))
.into_mut();
// Allow one more than max_occurrences, so we can later skip those with more
// than max_occurrences
if positions.len() <= max_occurrences {
positions.push(LocalWordPosition(i));
}
.and_modify(|(_, positions)| {
// Allow one more than max_occurrences, so we can later skip
// those with more than max_occurrences
if positions.len() <= max_occurrences {
positions.push(pos);
}
})
.or_insert_with(|| (word, smallvec![pos]));
}
Histogram { word_to_positions }
}
Expand Down

0 comments on commit 1b4964b

Please sign in to comment.