Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

matchers: refactor RepoPathTree (prep for glob matcher) #3508

Merged
merged 7 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions lib/src/fileset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ mod tests {
@r###"
PrefixMatcher {
tree: Dir {
"foo": Dir|File {},
"foo": Prefix {},
},
}
"###);
Expand All @@ -627,7 +627,7 @@ mod tests {
insta::assert_debug_snapshot!(expr.to_matcher(), @r###"
FilesMatcher {
tree: Dir {
"foo": Dir|File {
"foo": File {
"bar": File {},
},
},
Expand All @@ -641,8 +641,8 @@ mod tests {
insta::assert_debug_snapshot!(expr.to_matcher(), @r###"
PrefixMatcher {
tree: Dir {
"bar": Dir|File {
"baz": Dir|File {},
"bar": Prefix {
"baz": Prefix {},
},
},
}
Expand All @@ -664,7 +664,7 @@ mod tests {
},
input2: PrefixMatcher {
tree: Dir {
"bar": Dir|File {},
"bar": Prefix {},
},
},
}
Expand Down Expand Up @@ -714,7 +714,7 @@ mod tests {
},
input2: PrefixMatcher {
tree: Dir {
"bar": Dir|File {},
"bar": Prefix {},
},
},
},
Expand Down
197 changes: 81 additions & 116 deletions lib/src/matchers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#![allow(dead_code, missing_docs)]
#![allow(missing_docs)]

use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
Expand Down Expand Up @@ -122,67 +122,117 @@ impl Matcher for EverythingMatcher {

#[derive(PartialEq, Eq, Debug)]
pub struct FilesMatcher {
tree: RepoPathTree,
tree: RepoPathTree<FilesNodeKind>,
}

impl FilesMatcher {
pub fn new(files: impl IntoIterator<Item = impl AsRef<RepoPath>>) -> Self {
let mut tree = RepoPathTree::new();
let mut tree = RepoPathTree::default();
for f in files {
tree.add_file(f.as_ref());
tree.add(f.as_ref()).value = FilesNodeKind::File;
}
FilesMatcher { tree }
}
}

impl Matcher for FilesMatcher {
fn matches(&self, file: &RepoPath) -> bool {
self.tree.get(file).map(|sub| sub.is_file).unwrap_or(false)
self.tree
.get(file)
.map_or(false, |sub| sub.value == FilesNodeKind::File)
}

fn visit(&self, dir: &RepoPath) -> Visit {
self.tree.get_visit_sets(dir)
self.tree
.get(dir)
.map_or(Visit::Nothing, files_tree_to_visit_sets)
}
}

#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
enum FilesNodeKind {
/// Represents an intermediate directory.
#[default]
Dir,
/// Represents a file (which might also be an intermediate directory.)
File,
}

fn files_tree_to_visit_sets(tree: &RepoPathTree<FilesNodeKind>) -> Visit {
let mut dirs = HashSet::new();
let mut files = HashSet::new();
for (name, sub) in &tree.entries {
// should visit only intermediate directories
if !sub.entries.is_empty() {
dirs.insert(name.clone());
}
if sub.value == FilesNodeKind::File {
files.insert(name.clone());
}
}
Visit::sets(dirs, files)
}

#[derive(Debug)]
pub struct PrefixMatcher {
tree: RepoPathTree,
tree: RepoPathTree<PrefixNodeKind>,
}

impl PrefixMatcher {
#[instrument(skip(prefixes))]
pub fn new(prefixes: impl IntoIterator<Item = impl AsRef<RepoPath>>) -> Self {
let mut tree = RepoPathTree::new();
let mut tree = RepoPathTree::default();
for prefix in prefixes {
let sub = tree.add(prefix.as_ref());
sub.is_dir = true;
sub.is_file = true;
tree.add(prefix.as_ref()).value = PrefixNodeKind::Prefix;
}
PrefixMatcher { tree }
}
}

impl Matcher for PrefixMatcher {
fn matches(&self, file: &RepoPath) -> bool {
self.tree.walk_to(file).any(|(sub, _)| sub.is_file)
self.tree
.walk_to(file)
.any(|(sub, _)| sub.value == PrefixNodeKind::Prefix)
}

fn visit(&self, dir: &RepoPath) -> Visit {
for (sub, tail_path) in self.tree.walk_to(dir) {
// 'is_file' means the current path matches prefix paths
if sub.is_file {
// ancestor of 'dir' matches prefix paths
if sub.value == PrefixNodeKind::Prefix {
return Visit::AllRecursively;
}
// 'dir' found, and is an ancestor of prefix paths
if tail_path.is_root() {
return sub.to_visit_sets();
return prefix_tree_to_visit_sets(sub);
}
}
Visit::Nothing
}
}

#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
enum PrefixNodeKind {
/// Represents an intermediate directory.
#[default]
Dir,
/// Represents a file and prefix directory.
Prefix,
}

fn prefix_tree_to_visit_sets(tree: &RepoPathTree<PrefixNodeKind>) -> Visit {
let mut dirs = HashSet::new();
let mut files = HashSet::new();
for (name, sub) in &tree.entries {
// should visit both intermediate and prefix directories
dirs.insert(name.clone());
if sub.value == PrefixNodeKind::Prefix {
files.insert(name.clone());
}
}
Visit::sets(dirs, files)
}

/// Matches paths that are matched by any of the input matchers.
#[derive(Clone, Debug)]
pub struct UnionMatcher<M1, M2> {
Expand Down Expand Up @@ -338,93 +388,50 @@ impl<M1: Matcher, M2: Matcher> Matcher for IntersectionMatcher<M1, M2> {
}
}

/// Keeps track of which subdirectories and files of each directory need to be
/// visited.
#[derive(PartialEq, Eq)]
struct RepoPathTree {
entries: HashMap<RepoPathComponentBuf, RepoPathTree>,
// is_dir/is_file aren't exclusive, both can be set to true. If entries is not empty,
// is_dir should be set.
is_dir: bool,
is_file: bool,
/// Tree that maps `RepoPath` to value of type `V`.
#[derive(Clone, Default, Eq, PartialEq)]
struct RepoPathTree<V> {
entries: HashMap<RepoPathComponentBuf, RepoPathTree<V>>,
value: V,
}

impl RepoPathTree {
fn new() -> Self {
RepoPathTree {
entries: HashMap::new(),
is_dir: false,
is_file: false,
}
}

fn add(&mut self, dir: &RepoPath) -> &mut RepoPathTree {
impl<V> RepoPathTree<V> {
fn add(&mut self, dir: &RepoPath) -> &mut Self
where
V: Default,
{
dir.components().fold(self, |sub, name| {
// Avoid name.clone() if entry already exists.
if !sub.entries.contains_key(name) {
sub.is_dir = true;
sub.entries.insert(name.to_owned(), RepoPathTree::new());
sub.entries.insert(name.to_owned(), Self::default());
}
sub.entries.get_mut(name).unwrap()
})
}

fn add_dir(&mut self, dir: &RepoPath) {
self.add(dir).is_dir = true;
}

fn add_file(&mut self, file: &RepoPath) {
self.add(file).is_file = true;
}

fn get(&self, dir: &RepoPath) -> Option<&RepoPathTree> {
fn get(&self, dir: &RepoPath) -> Option<&Self> {
dir.components()
.try_fold(self, |sub, name| sub.entries.get(name))
}

fn get_visit_sets(&self, dir: &RepoPath) -> Visit {
self.get(dir)
.map(RepoPathTree::to_visit_sets)
.unwrap_or(Visit::Nothing)
}

/// Walks the tree from the root to the given `dir`, yielding each sub tree
/// and remaining path.
fn walk_to<'a, 'b: 'a>(
&'a self,
dir: &'b RepoPath,
) -> impl Iterator<Item = (&'a RepoPathTree, &'b RepoPath)> + 'a {
) -> impl Iterator<Item = (&'a Self, &'b RepoPath)> + 'a {
iter::successors(Some((self, dir)), |(sub, dir)| {
let mut components = dir.components();
let name = components.next()?;
Some((sub.entries.get(name)?, components.as_path()))
})
}

fn to_visit_sets(&self) -> Visit {
let mut dirs = HashSet::new();
let mut files = HashSet::new();
for (name, sub) in &self.entries {
if sub.is_dir {
dirs.insert(name.clone());
}
if sub.is_file {
files.insert(name.clone());
}
}
Visit::sets(dirs, files)
}
}

impl Debug for RepoPathTree {
impl<V: Debug> Debug for RepoPathTree<V> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let type_name = match (self.is_dir, self.is_file) {
(true, true) => "Dir|File",
(true, false) => "Dir",
(false, true) => "File",
(false, false) => "_",
};
write!(f, "{type_name} ")?;
self.value.fmt(f)?;
f.write_str(" ")?;
f.debug_map()
.entries(
self.entries
Expand All @@ -445,48 +452,6 @@ mod tests {
RepoPath::from_internal_string(value)
}

#[test]
fn test_repo_path_tree_empty() {
let tree = RepoPathTree::new();
assert_eq!(tree.get_visit_sets(RepoPath::root()), Visit::Nothing);
}

#[test]
fn test_repo_path_tree_root() {
let mut tree = RepoPathTree::new();
tree.add_dir(RepoPath::root());
assert_eq!(tree.get_visit_sets(RepoPath::root()), Visit::Nothing);
}

#[test]
fn test_repo_path_tree_dir() {
let mut tree = RepoPathTree::new();
tree.add_dir(repo_path("dir"));
assert_eq!(
tree.get_visit_sets(RepoPath::root()),
Visit::sets(hashset! {RepoPathComponentBuf::from("dir")}, hashset! {}),
);
tree.add_dir(repo_path("dir/sub"));
assert_eq!(
tree.get_visit_sets(repo_path("dir")),
Visit::sets(hashset! {RepoPathComponentBuf::from("sub")}, hashset! {}),
);
}

#[test]
fn test_repo_path_tree_file() {
let mut tree = RepoPathTree::new();
tree.add_file(repo_path("dir/file"));
assert_eq!(
tree.get_visit_sets(RepoPath::root()),
Visit::sets(hashset! {RepoPathComponentBuf::from("dir")}, hashset! {}),
);
assert_eq!(
tree.get_visit_sets(repo_path("dir")),
Visit::sets(hashset! {}, hashset! {RepoPathComponentBuf::from("file")}),
);
}

#[test]
fn test_nothingmatcher() {
let m = NothingMatcher;
Expand Down