Skip to content

Commit

Permalink
local_working_copy: optimize path comparison in prefixed file states …
Browse files Browse the repository at this point in the history
…(WIP)

Since all entries in filtered file states share the same directory prefix, we
don't need to compare full file paths.

TODO: add benchmark result
  • Loading branch information
yuja committed Dec 6, 2024
1 parent 486bd78 commit 6f9dc51
Showing 1 changed file with 134 additions and 7 deletions.
141 changes: 134 additions & 7 deletions lib/src/local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![allow(clippy::let_unit_value)]

use std::any::Any;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::error::Error;
use std::fs;
Expand Down Expand Up @@ -272,6 +273,13 @@ impl<'a> FileStates<'a> {
Self::from_sorted(&self.data[range])
}

/// Suppose all entries share the same prefix `dir`, returns file states
/// under the `<dir>/<name>` path.
fn prefixed_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Self {
let range = self.prefixed_range_at(dir, name);
Self::from_sorted(&self.data[range])
}

/// Returns true if this contains no entries.
pub fn is_empty(&self) -> bool {
self.data.is_empty()
Expand All @@ -289,12 +297,35 @@ impl<'a> FileStates<'a> {
Some(state)
}

/// Suppose all entries share the same prefix `dir`, returns file state for
/// the `<dir>/<name>` path.
fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
let pos = self.exact_position_at(dir, name)?;
let (_, state) = file_state_entry_from_proto(&self.data[pos]);
Some(state)
}

fn exact_position(&self, path: &RepoPath) -> Option<usize> {
self.data
.binary_search_by(|entry| RepoPath::from_internal_string(&entry.path).cmp(path))
.ok()
}

fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
debug_assert!(self.paths().all(|path| path.starts_with(dir)));
let prefix_len = dir.as_internal_file_string().len() + (!dir.is_root() as usize);
self.data
.binary_search_by(|entry| {
let tail = entry.path.get(prefix_len..).unwrap_or("");
match tail.split_once('/') {
// "<name>/*" > "<name>"
Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
None => tail.cmp(name.as_internal_str()),
}
})
.ok()
}

fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
let start = self
.data
Expand All @@ -304,6 +335,24 @@ impl<'a> FileStates<'a> {
start..(start + len)
}

fn prefixed_range_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Range<usize> {
debug_assert!(self.paths().all(|path| path.starts_with(dir)));
fn coerce<F: Fn(&str) -> &str>(f: F) -> F {
f
}
let prefix_len = dir.as_internal_file_string().len() + (!dir.is_root() as usize);
let to_name = coerce(|path| {
let tail = path.get(prefix_len..).unwrap_or("");
tail.split_once('/').map_or(tail, |(name, _)| name)
});
let start = self
.data
.partition_point(|entry| to_name(&entry.path) < name.as_internal_str());
let len = self.data[start..]
.partition_point(|entry| to_name(&entry.path) == name.as_internal_str());
start..(start + len)
}

/// Iterates file state entries sorted by path.
pub fn iter(&self) -> FileStatesIter<'a> {
self.data.iter().map(file_state_entry_from_proto)
Expand Down Expand Up @@ -1115,23 +1164,24 @@ impl FileSnapshotter<'_> {
) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
let file_type = entry.file_type().unwrap();
let file_name = entry.file_name();
let name = file_name
let name_string = file_name
.into_string()
.map_err(|path| SnapshotError::InvalidUtf8Path { path })?;

if RESERVED_DIR_NAMES.contains(&name.as_str()) {
if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
return Ok(None);
}
let path = dir.join(RepoPathComponent::new(&name));
let maybe_current_file_state = file_states.get(&path);
let name = RepoPathComponent::new(&name_string);
let path = dir.join(name);
let maybe_current_file_state = file_states.get_at(dir, name);
if let Some(file_state) = &maybe_current_file_state {
if file_state.file_type == FileType::GitSubmodule {
return Ok(None);
}
}

if file_type.is_dir() {
let file_states = file_states.prefixed(&path);
let file_states = file_states.prefixed_at(dir, name);
if git_ignore.matches(&path.to_internal_dir_string())
|| self.start_tracking_matcher.visit(&path).is_nothing()
{
Expand All @@ -1154,7 +1204,7 @@ impl FileSnapshotter<'_> {
}
// Whether or not the directory path matches, any child file entries
// shouldn't be touched within the current recursion step.
Ok(Some((PresentDirEntryKind::Dir, name)))
Ok(Some((PresentDirEntryKind::Dir, name_string)))
} else if self.matcher.matches(&path) {
if let Some(progress) = self.progress {
progress(&path);
Expand Down Expand Up @@ -1191,7 +1241,7 @@ impl FileSnapshotter<'_> {
maybe_current_file_state.as_ref(),
new_file_state,
)?;
Ok(Some((PresentDirEntryKind::File, name)))
Ok(Some((PresentDirEntryKind::File, name_string)))
} else {
Ok(None)
}
Expand Down Expand Up @@ -2339,4 +2389,81 @@ mod tests {
assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
assert_eq!(file_states.get(repo_path("z")), None);
}

#[test]
fn test_file_states_lookup_at() {
let new_state = |size| FileState {
file_type: FileType::Normal {
executable: FileExecutableFlag::default(),
},
mtime: MillisSinceEpoch(0),
size,
};
let new_proto_entry = |path: &str, size| {
file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
};
let data = vec![
new_proto_entry("b/c", 0),
new_proto_entry("b/d/e", 1),
new_proto_entry("b/d#", 2), // '#' < '/'
new_proto_entry("b/e", 3),
new_proto_entry("b#", 4), // '#' < '/'
];
let file_states = FileStates::from_sorted(&data);

// At root
assert_eq!(
file_states.get_at(RepoPath::root(), RepoPathComponent::new("b")),
None
);
assert_eq!(
file_states.get_at(RepoPath::root(), RepoPathComponent::new("b#")),
Some(new_state(4))
);

// At prefixed dir
let prefixed_states =
file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b"));
assert_eq!(
prefixed_states.paths().collect_vec(),
["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("c")),
Some(new_state(0))
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d")),
None
);
assert_eq!(
prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d#")),
Some(new_state(2))
);

// At nested prefixed dir
let prefixed_states =
prefixed_states.prefixed_at(repo_path("b"), RepoPathComponent::new("d"));
assert_eq!(
prefixed_states.paths().collect_vec(),
["b/d/e"].map(repo_path)
);
assert_eq!(
prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("e")),
Some(new_state(1))
);
assert_eq!(
prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("#")),
None
);

// At prefixed file
let prefixed_states =
file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b#"));
assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
assert_eq!(
prefixed_states.get_at(repo_path("b#"), RepoPathComponent::new("#")),
None
);
}
}

0 comments on commit 6f9dc51

Please sign in to comment.