Skip to content

Commit

Permalink
local_working_copy: optimize path comparison in prefixed file states
Browse files Browse the repository at this point in the history
Since all entries in filtered file states share the same directory prefix, we
don't need to compare full file paths.

The added functions take (path, name) instead of (path, sub_path) because the
comparison can be slightly faster if the name is guaranteed to be a single path
component.

Benchmark:
1. original (omitted)
2. per-directory spawn (omitted)
3. per-directory deleted files (previous patch)
4. shorter path comparison (this patch)

gecko-dev (~357k files, ~25k dirs)
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 30 ..
Benchmark 3: target/release-with-debug/jj-3 -R ~/mirrors/gecko-dev debug snapshot
  Time (mean ± σ):     480.1 ms ±   8.8 ms    [User: 3190.5 ms, System: 2127.2 ms]
  Range (min … max):   471.2 ms … 509.8 ms    30 runs

Benchmark 4: target/release-with-debug/jj-4 -R ~/mirrors/gecko-dev debug snapshot
  Time (mean ± σ):     404.0 ms ±   4.4 ms    [User: 1933.4 ms, System: 2148.8 ms]
  Range (min … max):   396.4 ms … 416.9 ms    30 runs

Relative speed comparison
        1.19 ±  0.03  target/release-with-debug/jj-3 -R ~/mirrors/gecko-dev debug snapshot
        1.00          target/release-with-debug/jj-4 -R ~/mirrors/gecko-dev debug snapshot
```

linux (~87k files, ~6k dirs)
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 30 ..
Benchmark 3: target/release-with-debug/jj-3 -R ~/mirrors/linux debug snapshot
  Time (mean ± σ):     204.2 ms ±   3.0 ms    [User: 667.3 ms, System: 545.6 ms]
  Range (min … max):   197.1 ms … 209.2 ms    30 runs

Benchmark 4: target/release-with-debug/jj-4 -R ~/mirrors/linux debug snapshot
  Time (mean ± σ):     191.3 ms ±   3.3 ms    [User: 467.4 ms, System: 542.2 ms]
  Range (min … max):   186.1 ms … 200.6 ms    30 runs

Relative speed comparison
        1.07 ±  0.02  target/release-with-debug/jj-3 -R ~/mirrors/linux debug snapshot
        1.00          target/release-with-debug/jj-4 -R ~/mirrors/linux debug snapshot
```

nixpkgs (~45k files, ~31k dirs)
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 3 --runs 30 ..
Benchmark 3: target/release-with-debug/jj-3 -R ~/mirrors/nixpkgs debug snapshot
  Time (mean ± σ):     173.3 ms ±   6.7 ms    [User: 899.4 ms, System: 889.0 ms]
  Range (min … max):   166.5 ms … 197.9 ms    30 runs

Benchmark 4: target/release-with-debug/jj-4 -R ~/mirrors/nixpkgs debug snapshot
  Time (mean ± σ):     161.7 ms ±   2.5 ms    [User: 739.1 ms, System: 881.7 ms]
  Range (min … max):   156.5 ms … 166.4 ms    30 runs

Relative speed comparison
        1.07 ±  0.04  target/release-with-debug/jj-3 -R ~/mirrors/nixpkgs debug snapshot
        1.00          target/release-with-debug/jj-4 -R ~/mirrors/nixpkgs debug snapshot
```

git (~4.5k files, 0.2k dirs)
```
% JJ_CONFIG=/dev/null hyperfine --sort command --warmup 30 --runs 50 ..
Benchmark 3: target/release-with-debug/jj-3 -R ~/mirrors/git debug snapshot
  Time (mean ± σ):      28.8 ms ±   1.0 ms    [User: 33.0 ms, System: 37.6 ms]
  Range (min … max):    26.8 ms …  31.3 ms    50 runs

Benchmark 4: target/release-with-debug/jj-4 -R ~/mirrors/git debug snapshot
  Time (mean ± σ):      28.8 ms ±   1.9 ms    [User: 30.3 ms, System: 36.5 ms]
  Range (min … max):    26.0 ms …  39.2 ms    50 runs

Relative speed comparison
        1.00          target/release-with-debug/jj-3 -R ~/mirrors/git debug snapshot
        1.00 ±  0.08  target/release-with-debug/jj-4 -R ~/mirrors/git debug snapshot
```
  • Loading branch information
yuja committed Dec 9, 2024
1 parent f6ce3d1 commit 11ebf1a
Showing 1 changed file with 136 additions and 7 deletions.
143 changes: 136 additions & 7 deletions lib/src/local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![allow(clippy::let_unit_value)]

use std::any::Any;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::error::Error;
use std::fs;
Expand Down Expand Up @@ -272,6 +273,13 @@ impl<'a> FileStates<'a> {
Self::from_sorted(&self.data[range])
}

/// Suppose all entries share the same prefix `dir`, returns file states
/// under the `<dir>/<base>` path.
fn prefixed_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Self {
let range = self.prefixed_range_at(dir, base);
Self::from_sorted(&self.data[range])
}

/// Returns true if this contains no entries.
pub fn is_empty(&self) -> bool {
self.data.is_empty()
Expand All @@ -289,12 +297,36 @@ impl<'a> FileStates<'a> {
Some(state)
}

/// Suppose all entries share the same prefix `dir`, returns file state for
/// the `<dir>/<name>` path.
fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
let pos = self.exact_position_at(dir, name)?;
let (_, state) = file_state_entry_from_proto(&self.data[pos]);
Some(state)
}

fn exact_position(&self, path: &RepoPath) -> Option<usize> {
self.data
.binary_search_by(|entry| RepoPath::from_internal_string(&entry.path).cmp(path))
.ok()
}

fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
debug_assert!(self.paths().all(|path| path.starts_with(dir)));
let slash_len = !dir.is_root() as usize;
let prefix_len = dir.as_internal_file_string().len() + slash_len;
self.data
.binary_search_by(|entry| {
let tail = entry.path.get(prefix_len..).unwrap_or("");
match tail.split_once('/') {
// "<name>/*" > "<name>"
Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
None => tail.cmp(name.as_internal_str()),
}
})
.ok()
}

fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
let start = self
.data
Expand All @@ -304,6 +336,25 @@ impl<'a> FileStates<'a> {
start..(start + len)
}

fn prefixed_range_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Range<usize> {
debug_assert!(self.paths().all(|path| path.starts_with(dir)));
fn coerce<F: Fn(&str) -> &str>(f: F) -> F {
f
}
let slash_len = !dir.is_root() as usize;
let prefix_len = dir.as_internal_file_string().len() + slash_len;
let to_name = coerce(|path| {
let tail = path.get(prefix_len..).unwrap_or("");
tail.split_once('/').map_or(tail, |(name, _)| name)
});
let start = self
.data
.partition_point(|entry| to_name(&entry.path) < base.as_internal_str());
let len = self.data[start..]
.partition_point(|entry| to_name(&entry.path) == base.as_internal_str());
start..(start + len)
}

/// Iterates file state entries sorted by path.
pub fn iter(&self) -> FileStatesIter<'a> {
self.data.iter().map(file_state_entry_from_proto)
Expand Down Expand Up @@ -1122,23 +1173,24 @@ impl FileSnapshotter<'_> {
) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
let file_type = entry.file_type().unwrap();
let file_name = entry.file_name();
let name = file_name
let name_string = file_name
.into_string()
.map_err(|path| SnapshotError::InvalidUtf8Path { path })?;

if RESERVED_DIR_NAMES.contains(&name.as_str()) {
if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
return Ok(None);
}
let path = dir.join(RepoPathComponent::new(&name));
let maybe_current_file_state = file_states.get(&path);
let name = RepoPathComponent::new(&name_string);
let path = dir.join(name);
let maybe_current_file_state = file_states.get_at(dir, name);
if let Some(file_state) = &maybe_current_file_state {
if file_state.file_type == FileType::GitSubmodule {
return Ok(None);
}
}

if file_type.is_dir() {
let file_states = file_states.prefixed(&path);
let file_states = file_states.prefixed_at(dir, name);
if git_ignore.matches(&path.to_internal_dir_string())
|| self.start_tracking_matcher.visit(&path).is_nothing()
{
Expand All @@ -1161,7 +1213,7 @@ impl FileSnapshotter<'_> {
}
// Whether or not the directory path matches, any child file entries
// shouldn't be touched within the current recursion step.
Ok(Some((PresentDirEntryKind::Dir, name)))
Ok(Some((PresentDirEntryKind::Dir, name_string)))
} else if self.matcher.matches(&path) {
if let Some(progress) = self.progress {
progress(&path);
Expand Down Expand Up @@ -1198,7 +1250,7 @@ impl FileSnapshotter<'_> {
maybe_current_file_state.as_ref(),
new_file_state,
)?;
Ok(Some((PresentDirEntryKind::File, name)))
Ok(Some((PresentDirEntryKind::File, name_string)))
} else {
// Special file is not considered present
Ok(None)
Expand Down Expand Up @@ -2349,4 +2401,81 @@ mod tests {
assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
assert_eq!(file_states.get(repo_path("z")), None);
}

#[test]
fn test_file_states_lookup_at() {
let new_state = |size| FileState {
file_type: FileType::Normal {
executable: FileExecutableFlag::default(),
},
mtime: MillisSinceEpoch(0),
size,
};
let new_proto_entry = |path: &str, size| {
file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
};
let data = vec![
new_proto_entry("b/c", 0),
new_proto_entry("b/d/e", 1),
new_proto_entry("b/d#", 2), // '#' < '/'
new_proto_entry("b/e", 3),
new_proto_entry("b#", 4), // '#' < '/'
];
let file_states = FileStates::from_sorted(&data);

// At root
assert_eq!(
file_states.get_at(RepoPath::root(), RepoPathComponent::new("b")),
None
);
assert_eq!(
file_states.get_at(RepoPath::root(), RepoPathComponent::new("b#")),
Some(new_state(4))
);

// At prefixed dir
let prefixed_states =
file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b"));
assert_eq!(
prefixed_states.paths().collect_vec(),
["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("c")),
Some(new_state(0))
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d")),
None
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d#")),
Some(new_state(2))
);

// At nested prefixed dir
let prefixed_states =
prefixed_states.prefixed_at(repo_path("b"), RepoPathComponent::new("d"));
assert_eq!(
prefixed_states.paths().collect_vec(),
["b/d/e"].map(repo_path)
);
assert_eq!(
prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("e")),
Some(new_state(1))
);
assert_eq!(
prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("#")),
None
);

// At prefixed file
let prefixed_states =
file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b#"));
assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
assert_eq!(
prefixed_states.get_at(repo_path("b#"), RepoPathComponent::new("#")),
None
);
}
}

0 comments on commit 11ebf1a

Please sign in to comment.