From fd502fca182d914fbfc6e9b6902bde8ee23c6872 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 14 Apr 2024 18:26:25 +0900 Subject: [PATCH 1/7] matchers: don't allow dead_code --- lib/src/matchers.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index 41fe9c1e00..b3a005d6b1 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![allow(dead_code, missing_docs)] +#![allow(missing_docs)] use std::collections::{HashMap, HashSet}; use std::fmt::Debug; @@ -369,6 +369,7 @@ impl RepoPathTree { }) } + #[cfg(test)] fn add_dir(&mut self, dir: &RepoPath) { self.add(dir).is_dir = true; } From 380c5e1df6100ebd0e3cf4a264a3d4bf79793e62 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:31:02 +0900 Subject: [PATCH 2/7] matchers: remove tests that directly modify RepoPathTree I'm going to extract generic map from RepoPathTree, and .get_visit_sets() will be inlined into FilesMatcher/PrefixMatcher. These removed tests should be covered by the corresponding matcher tests. --- lib/src/matchers.rs | 47 --------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index b3a005d6b1..3d501be1f0 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -369,11 +369,6 @@ impl RepoPathTree { }) } - #[cfg(test)] - fn add_dir(&mut self, dir: &RepoPath) { - self.add(dir).is_dir = true; - } - fn add_file(&mut self, file: &RepoPath) { self.add(file).is_file = true; } @@ -446,48 +441,6 @@ mod tests { RepoPath::from_internal_string(value) } - #[test] - fn test_repo_path_tree_empty() { - let tree = RepoPathTree::new(); - assert_eq!(tree.get_visit_sets(RepoPath::root()), Visit::Nothing); - } - - #[test] - fn test_repo_path_tree_root() { - let mut tree = RepoPathTree::new(); - tree.add_dir(RepoPath::root()); - assert_eq!(tree.get_visit_sets(RepoPath::root()), Visit::Nothing); - } - - #[test] - fn test_repo_path_tree_dir() { - let mut tree = RepoPathTree::new(); - tree.add_dir(repo_path("dir")); - assert_eq!( - tree.get_visit_sets(RepoPath::root()), - Visit::sets(hashset! {RepoPathComponentBuf::from("dir")}, hashset! {}), - ); - tree.add_dir(repo_path("dir/sub")); - assert_eq!( - tree.get_visit_sets(repo_path("dir")), - Visit::sets(hashset! {RepoPathComponentBuf::from("sub")}, hashset! {}), - ); - } - - #[test] - fn test_repo_path_tree_file() { - let mut tree = RepoPathTree::new(); - tree.add_file(repo_path("dir/file")); - assert_eq!( - tree.get_visit_sets(RepoPath::root()), - Visit::sets(hashset! {RepoPathComponentBuf::from("dir")}, hashset! {}), - ); - assert_eq!( - tree.get_visit_sets(repo_path("dir")), - Visit::sets(hashset! {}, hashset! {RepoPathComponentBuf::from("file")}), - ); - } - #[test] fn test_nothingmatcher() { let m = NothingMatcher; From 97379a02d097974894090e4a86d6ffa36eaf0f72 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:35:54 +0900 Subject: [PATCH 3/7] matchers: inline RepoPathTree::add_file() --- lib/src/matchers.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index 3d501be1f0..79a1f376e5 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -129,7 +129,7 @@ impl FilesMatcher { pub fn new(files: impl IntoIterator>) -> Self { let mut tree = RepoPathTree::new(); for f in files { - tree.add_file(f.as_ref()); + tree.add(f.as_ref()).is_file = true; } FilesMatcher { tree } } @@ -369,10 +369,6 @@ impl RepoPathTree { }) } - fn add_file(&mut self, file: &RepoPath) { - self.add(file).is_file = true; - } - fn get(&self, dir: &RepoPath) -> Option<&RepoPathTree> { dir.components() .try_fold(self, |sub, name| sub.entries.get(name)) From b110ff3a4c7c3ceca13ff2e48f15e43011b03701 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:49:14 +0900 Subject: [PATCH 4/7] matchers: simply derive Default for RepoPathTree Perhaps, I didn't do that because it's important to initialize is_dir/file to false. Since I'm going to extract a generic map-like API, and is_dir/file will be an enum, this won't be a problem. --- lib/src/matchers.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index 79a1f376e5..82cdc93c23 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -127,7 +127,7 @@ pub struct FilesMatcher { impl FilesMatcher { pub fn new(files: impl IntoIterator>) -> Self { - let mut tree = RepoPathTree::new(); + let mut tree = RepoPathTree::default(); for f in files { tree.add(f.as_ref()).is_file = true; } @@ -153,7 +153,7 @@ pub struct PrefixMatcher { impl PrefixMatcher { #[instrument(skip(prefixes))] pub fn new(prefixes: impl IntoIterator>) -> Self { - let mut tree = RepoPathTree::new(); + let mut tree = RepoPathTree::default(); for prefix in prefixes { let sub = tree.add(prefix.as_ref()); sub.is_dir = true; @@ -340,7 +340,7 @@ impl Matcher for IntersectionMatcher { /// Keeps track of which subdirectories and files of each directory need to be /// visited. -#[derive(PartialEq, Eq)] +#[derive(Default, PartialEq, Eq)] struct RepoPathTree { entries: HashMap, // is_dir/is_file aren't exclusive, both can be set to true. If entries is not empty, @@ -350,20 +350,12 @@ struct RepoPathTree { } impl RepoPathTree { - fn new() -> Self { - RepoPathTree { - entries: HashMap::new(), - is_dir: false, - is_file: false, - } - } - fn add(&mut self, dir: &RepoPath) -> &mut RepoPathTree { dir.components().fold(self, |sub, name| { // Avoid name.clone() if entry already exists. if !sub.entries.contains_key(name) { sub.is_dir = true; - sub.entries.insert(name.to_owned(), RepoPathTree::new()); + sub.entries.insert(name.to_owned(), Self::default()); } sub.entries.get_mut(name).unwrap() }) From cd5dfcab3da6fa7373dca6199f303bcb15a38ed3 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:37:03 +0900 Subject: [PATCH 5/7] matchers: inline RepoPathTree::get_visit_sets() --- lib/src/matchers.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index 82cdc93c23..bb6b146762 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -141,7 +141,9 @@ impl Matcher for FilesMatcher { } fn visit(&self, dir: &RepoPath) -> Visit { - self.tree.get_visit_sets(dir) + self.tree + .get(dir) + .map_or(Visit::Nothing, RepoPathTree::to_visit_sets) } } @@ -366,12 +368,6 @@ impl RepoPathTree { .try_fold(self, |sub, name| sub.entries.get(name)) } - fn get_visit_sets(&self, dir: &RepoPath) -> Visit { - self.get(dir) - .map(RepoPathTree::to_visit_sets) - .unwrap_or(Visit::Nothing) - } - /// Walks the tree from the root to the given `dir`, yielding each sub tree /// and remaining path. fn walk_to<'a, 'b: 'a>( From 6ce35403ae30d4fe6aa775cac9fc6fd19c51d1cf Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:40:48 +0900 Subject: [PATCH 6/7] matchers: rewrite RepoPathTree::to_visit_sets() to not depend on is_dir flag The is_dir flag will be removed soon. Since FilesMatcher doesn't set is_dir flag explicitly, is_dir is equivalent to !entries.is_empty(). OTOH, PrefixMatcher always sets is_dir, so all tree nodes are directories. --- lib/src/matchers.rs | 46 +++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index bb6b146762..c48733e82b 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -143,10 +143,25 @@ impl Matcher for FilesMatcher { fn visit(&self, dir: &RepoPath) -> Visit { self.tree .get(dir) - .map_or(Visit::Nothing, RepoPathTree::to_visit_sets) + .map_or(Visit::Nothing, files_tree_to_visit_sets) } } +fn files_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { + let mut dirs = HashSet::new(); + let mut files = HashSet::new(); + for (name, sub) in &tree.entries { + // should visit only intermediate directories + if !sub.entries.is_empty() { + dirs.insert(name.clone()); + } + if sub.is_file { + files.insert(name.clone()); + } + } + Visit::sets(dirs, files) +} + #[derive(Debug)] pub struct PrefixMatcher { tree: RepoPathTree, @@ -178,13 +193,26 @@ impl Matcher for PrefixMatcher { } // 'dir' found, and is an ancestor of prefix paths if tail_path.is_root() { - return sub.to_visit_sets(); + return prefix_tree_to_visit_sets(sub); } } Visit::Nothing } } +fn prefix_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { + let mut dirs = HashSet::new(); + let mut files = HashSet::new(); + for (name, sub) in &tree.entries { + // should visit both intermediate and prefix directories + dirs.insert(name.clone()); + if sub.is_file { + files.insert(name.clone()); + } + } + Visit::sets(dirs, files) +} + /// Matches paths that are matched by any of the input matchers. #[derive(Clone, Debug)] pub struct UnionMatcher { @@ -380,20 +408,6 @@ impl RepoPathTree { Some((sub.entries.get(name)?, components.as_path())) }) } - - fn to_visit_sets(&self) -> Visit { - let mut dirs = HashSet::new(); - let mut files = HashSet::new(); - for (name, sub) in &self.entries { - if sub.is_dir { - dirs.insert(name.clone()); - } - if sub.is_file { - files.insert(name.clone()); - } - } - Visit::sets(dirs, files) - } } impl Debug for RepoPathTree { From 7e48579588ad0dcfe6dd4dfd594bb5ad204ec3f5 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 15 Apr 2024 16:52:37 +0900 Subject: [PATCH 7/7] matchers: turn RepoPathTree into generic map-like type This prepares for adding glob matcher, which will be backed by RepoPathTree>. FilesNodeKind/PrefixNodeKind are basically boolean types, but implemented as enums for better code readability. --- lib/src/fileset.rs | 12 +++---- lib/src/matchers.rs | 85 ++++++++++++++++++++++++++------------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 19ee786986..39712f5890 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -612,7 +612,7 @@ mod tests { @r###" PrefixMatcher { tree: Dir { - "foo": Dir|File {}, + "foo": Prefix {}, }, } "###); @@ -627,7 +627,7 @@ mod tests { insta::assert_debug_snapshot!(expr.to_matcher(), @r###" FilesMatcher { tree: Dir { - "foo": Dir|File { + "foo": File { "bar": File {}, }, }, @@ -641,8 +641,8 @@ mod tests { insta::assert_debug_snapshot!(expr.to_matcher(), @r###" PrefixMatcher { tree: Dir { - "bar": Dir|File { - "baz": Dir|File {}, + "bar": Prefix { + "baz": Prefix {}, }, }, } @@ -664,7 +664,7 @@ mod tests { }, input2: PrefixMatcher { tree: Dir { - "bar": Dir|File {}, + "bar": Prefix {}, }, }, } @@ -714,7 +714,7 @@ mod tests { }, input2: PrefixMatcher { tree: Dir { - "bar": Dir|File {}, + "bar": Prefix {}, }, }, }, diff --git a/lib/src/matchers.rs b/lib/src/matchers.rs index c48733e82b..d319be7cf7 100644 --- a/lib/src/matchers.rs +++ b/lib/src/matchers.rs @@ -122,14 +122,14 @@ impl Matcher for EverythingMatcher { #[derive(PartialEq, Eq, Debug)] pub struct FilesMatcher { - tree: RepoPathTree, + tree: RepoPathTree, } impl FilesMatcher { pub fn new(files: impl IntoIterator>) -> Self { let mut tree = RepoPathTree::default(); for f in files { - tree.add(f.as_ref()).is_file = true; + tree.add(f.as_ref()).value = FilesNodeKind::File; } FilesMatcher { tree } } @@ -137,7 +137,9 @@ impl FilesMatcher { impl Matcher for FilesMatcher { fn matches(&self, file: &RepoPath) -> bool { - self.tree.get(file).map(|sub| sub.is_file).unwrap_or(false) + self.tree + .get(file) + .map_or(false, |sub| sub.value == FilesNodeKind::File) } fn visit(&self, dir: &RepoPath) -> Visit { @@ -147,7 +149,16 @@ impl Matcher for FilesMatcher { } } -fn files_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +enum FilesNodeKind { + /// Represents an intermediate directory. + #[default] + Dir, + /// Represents a file (which might also be an intermediate directory.) + File, +} + +fn files_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { let mut dirs = HashSet::new(); let mut files = HashSet::new(); for (name, sub) in &tree.entries { @@ -155,7 +166,7 @@ fn files_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { if !sub.entries.is_empty() { dirs.insert(name.clone()); } - if sub.is_file { + if sub.value == FilesNodeKind::File { files.insert(name.clone()); } } @@ -164,7 +175,7 @@ fn files_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { #[derive(Debug)] pub struct PrefixMatcher { - tree: RepoPathTree, + tree: RepoPathTree, } impl PrefixMatcher { @@ -172,9 +183,7 @@ impl PrefixMatcher { pub fn new(prefixes: impl IntoIterator>) -> Self { let mut tree = RepoPathTree::default(); for prefix in prefixes { - let sub = tree.add(prefix.as_ref()); - sub.is_dir = true; - sub.is_file = true; + tree.add(prefix.as_ref()).value = PrefixNodeKind::Prefix; } PrefixMatcher { tree } } @@ -182,13 +191,15 @@ impl PrefixMatcher { impl Matcher for PrefixMatcher { fn matches(&self, file: &RepoPath) -> bool { - self.tree.walk_to(file).any(|(sub, _)| sub.is_file) + self.tree + .walk_to(file) + .any(|(sub, _)| sub.value == PrefixNodeKind::Prefix) } fn visit(&self, dir: &RepoPath) -> Visit { for (sub, tail_path) in self.tree.walk_to(dir) { - // 'is_file' means the current path matches prefix paths - if sub.is_file { + // ancestor of 'dir' matches prefix paths + if sub.value == PrefixNodeKind::Prefix { return Visit::AllRecursively; } // 'dir' found, and is an ancestor of prefix paths @@ -200,13 +211,22 @@ impl Matcher for PrefixMatcher { } } -fn prefix_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +enum PrefixNodeKind { + /// Represents an intermediate directory. + #[default] + Dir, + /// Represents a file and prefix directory. + Prefix, +} + +fn prefix_tree_to_visit_sets(tree: &RepoPathTree) -> Visit { let mut dirs = HashSet::new(); let mut files = HashSet::new(); for (name, sub) in &tree.entries { // should visit both intermediate and prefix directories dirs.insert(name.clone()); - if sub.is_file { + if sub.value == PrefixNodeKind::Prefix { files.insert(name.clone()); } } @@ -368,30 +388,28 @@ impl Matcher for IntersectionMatcher { } } -/// Keeps track of which subdirectories and files of each directory need to be -/// visited. -#[derive(Default, PartialEq, Eq)] -struct RepoPathTree { - entries: HashMap, - // is_dir/is_file aren't exclusive, both can be set to true. If entries is not empty, - // is_dir should be set. - is_dir: bool, - is_file: bool, +/// Tree that maps `RepoPath` to value of type `V`. +#[derive(Clone, Default, Eq, PartialEq)] +struct RepoPathTree { + entries: HashMap>, + value: V, } -impl RepoPathTree { - fn add(&mut self, dir: &RepoPath) -> &mut RepoPathTree { +impl RepoPathTree { + fn add(&mut self, dir: &RepoPath) -> &mut Self + where + V: Default, + { dir.components().fold(self, |sub, name| { // Avoid name.clone() if entry already exists. if !sub.entries.contains_key(name) { - sub.is_dir = true; sub.entries.insert(name.to_owned(), Self::default()); } sub.entries.get_mut(name).unwrap() }) } - fn get(&self, dir: &RepoPath) -> Option<&RepoPathTree> { + fn get(&self, dir: &RepoPath) -> Option<&Self> { dir.components() .try_fold(self, |sub, name| sub.entries.get(name)) } @@ -401,7 +419,7 @@ impl RepoPathTree { fn walk_to<'a, 'b: 'a>( &'a self, dir: &'b RepoPath, - ) -> impl Iterator + 'a { + ) -> impl Iterator + 'a { iter::successors(Some((self, dir)), |(sub, dir)| { let mut components = dir.components(); let name = components.next()?; @@ -410,15 +428,10 @@ impl RepoPathTree { } } -impl Debug for RepoPathTree { +impl Debug for RepoPathTree { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let type_name = match (self.is_dir, self.is_file) { - (true, true) => "Dir|File", - (true, false) => "Dir", - (false, true) => "File", - (false, false) => "_", - }; - write!(f, "{type_name} ")?; + self.value.fmt(f)?; + f.write_str(" ")?; f.debug_map() .entries( self.entries