diff --git a/CHANGELOG.md b/CHANGELOG.md index 50438c5db7..ace2489aa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,11 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * `jj git clone some/nested/path` now creates the full directory tree for nested destination paths if they don't exist. +* String patterns now support case‐insensitive matching by suffixing any + pattern kind with `-i`. `mine()` uses case‐insensitive matching on your email + address unconditionally. Only ASCII case folding is currently implemented, + but this will likely change in the future. + ### Fixed bugs ## [0.19.0] - 2024-07-03 diff --git a/cli/src/commands/git/push.rs b/cli/src/commands/git/push.rs index 24b87daa0b..7e317710b8 100644 --- a/cli/src/commands/git/push.rs +++ b/cli/src/commands/git/push.rs @@ -552,7 +552,7 @@ fn find_branches_targeted_by_revisions<'a>( }; let current_branches_expression = RevsetExpression::remote_branches( StringPattern::everything(), - StringPattern::Exact(remote_name.to_owned()), + StringPattern::exact(remote_name), ) .range(&RevsetExpression::commit(wc_commit_id)) .intersection(&RevsetExpression::branches(StringPattern::everything())); diff --git a/docs/revsets.md b/docs/revsets.md index e72d0f5818..8bebbac8c9 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -333,6 +333,9 @@ Functions that perform string matching support the following pattern syntax: * `glob:"pattern"`: Matches strings with Unix-style shell [wildcard `pattern`](https://docs.rs/glob/latest/glob/struct.Pattern.html). +You can append `-i` after the kind to match case‐insensitively (e.g. +`glob-i:"fix*jpeg*"`). + ## Aliases New symbols and functions can be defined in the config file, by using any diff --git a/lib/src/default_index/revset_engine.rs b/lib/src/default_index/revset_engine.rs index 13a93e79a0..76a495222f 100644 --- a/lib/src/default_index/revset_engine.rs +++ b/lib/src/default_index/revset_engine.rs @@ -1052,8 +1052,7 @@ fn build_predicate_fn( RevsetFilterPredicate::Author(pattern) => { let pattern = pattern.clone(); // TODO: Make these functions that take a needle to search for accept some - // syntax for specifying whether it's a regex and whether it's - // case-sensitive. + // syntax for specifying whether it's a regex. box_pure_predicate_fn(move |index, pos| { let entry = index.entry_by_pos(pos); let commit = store.get_commit(&entry.commit_id()).unwrap(); diff --git a/lib/src/revset.rs b/lib/src/revset.rs index fa9bded307..d88a584d4b 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -687,8 +687,11 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: }); map.insert("mine", |function, context| { function.expect_no_arguments()?; + // Email address domains are inherently case‐insensitive, and the local‐parts + // are generally (although not universally) treated as case‐insensitive too, so + // we use a case‐insensitive match here. Ok(RevsetExpression::filter(RevsetFilterPredicate::Author( - StringPattern::Exact(context.user_email.to_owned()), + StringPattern::exact_i(&context.user_email), ))) }); map.insert("committer", |function, _context| { @@ -2518,7 +2521,7 @@ mod tests { assert!(parse("mine(foo)").is_err()); insta::assert_debug_snapshot!( parse("mine()").unwrap(), - @r###"Filter(Author(Exact("test.user@example.com")))"###); + @r###"Filter(Author(ExactI("test.user@example.com")))"###); insta::assert_debug_snapshot!( parse_with_workspace("empty()", &WorkspaceId::default()).unwrap(), @"NotIn(Filter(File(All)))"); diff --git a/lib/src/str_util.rs b/lib/src/str_util.rs index 2cef8bae9b..90f70d117c 100644 --- a/lib/src/str_util.rs +++ b/lib/src/str_util.rs @@ -32,16 +32,26 @@ pub enum StringPatternParseError { GlobPattern(glob::PatternError), } +fn parse_glob(src: &str) -> Result { + glob::Pattern::new(src).map_err(StringPatternParseError::GlobPattern) +} + /// Pattern to be tested against string property like commit description or /// branch name. #[derive(Clone, Debug, Eq, PartialEq)] pub enum StringPattern { - /// Matches strings exactly equal to `string`. + /// Matches strings exactly. Exact(String), - /// Unix-style shell wildcard pattern. - Glob(glob::Pattern), - /// Matches strings that contain `substring`. + /// Matches strings case‐insensitively. + ExactI(String), + /// Matches strings that contain a substring. Substring(String), + /// Matches strings that case‐insensitively contain a substring. + SubstringI(String), + /// Matches with a Unix‐style shell wildcard pattern. + Glob(glob::Pattern), + /// Matches with a case‐insensitive Unix‐style shell wildcard pattern. + GlobI(glob::Pattern), } impl StringPattern { @@ -50,11 +60,12 @@ impl StringPattern { StringPattern::Substring(String::new()) } - /// Parses the given string as a `StringPattern`. Everything before the - /// first ":" is considered the string's prefix. If the prefix is "exact:", - /// "glob:", or "substring:", a pattern of the specified kind is returned. - /// Returns an error if the string has an unrecognized prefix. Otherwise, a - /// `StringPattern::Exact` is returned. + /// Parses the given string as a [`StringPattern`]. Everything before the + /// first ":" is considered the string's prefix. If the prefix is + /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the + /// specified kind is returned. Returns an error if the string has an + /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is + /// returned. pub fn parse(src: &str) -> Result { if let Some((kind, pat)) = src.split_once(':') { StringPattern::from_str_kind(pat, kind) @@ -63,26 +74,48 @@ impl StringPattern { } } - /// Creates pattern that matches exactly. + /// Constructs a pattern that matches exactly. pub fn exact(src: impl Into) -> Self { StringPattern::Exact(src.into()) } - /// Parses the given string as glob pattern. + /// Constructs a pattern that matches case‐insensitively. + pub fn exact_i(src: impl Into) -> Self { + StringPattern::ExactI(src.into()) + } + + /// Constructs a pattern that matches a substring. + pub fn substring(src: impl Into) -> Self { + StringPattern::Substring(src.into()) + } + + /// Constructs a pattern that case‐insensitively matches a substring. + pub fn substring_i(src: impl Into) -> Self { + StringPattern::SubstringI(src.into()) + } + + /// Parses the given string as a glob pattern. pub fn glob(src: &str) -> Result { // TODO: might be better to do parsing and compilation separately since // not all backends would use the compiled pattern object. // TODO: if no meta character found, it can be mapped to Exact. - let pattern = glob::Pattern::new(src).map_err(StringPatternParseError::GlobPattern)?; - Ok(StringPattern::Glob(pattern)) + Ok(StringPattern::Glob(parse_glob(src)?)) } - /// Parses the given string as pattern of the specified `kind`. + /// Parses the given string as a case‐insensitive glob pattern. + pub fn glob_i(src: &str) -> Result { + Ok(StringPattern::GlobI(parse_glob(src)?)) + } + + /// Parses the given string as a pattern of the specified `kind`. pub fn from_str_kind(src: &str, kind: &str) -> Result { match kind { "exact" => Ok(StringPattern::exact(src)), + "exact-i" => Ok(StringPattern::exact_i(src)), + "substring" => Ok(StringPattern::substring(src)), + "substring-i" => Ok(StringPattern::substring_i(src)), "glob" => StringPattern::glob(src), - "substring" => Ok(StringPattern::Substring(src.to_owned())), + "glob-i" => StringPattern::glob_i(src), _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())), } } @@ -96,9 +129,12 @@ impl StringPattern { /// /// This can be used to optimize map lookup by exact key. pub fn as_exact(&self) -> Option<&str> { + // TODO: Handle trivial case‐insensitive patterns here? It might make people + // expect they can use case‐insensitive patterns in contexts where they + // generally can’t. match self { StringPattern::Exact(literal) => Some(literal), - StringPattern::Glob(_) | StringPattern::Substring(_) => None, + _ => None, } } @@ -106,8 +142,11 @@ impl StringPattern { pub fn as_str(&self) -> &str { match self { StringPattern::Exact(literal) => literal, - StringPattern::Glob(pattern) => pattern.as_str(), + StringPattern::ExactI(literal) => literal, StringPattern::Substring(needle) => needle, + StringPattern::SubstringI(needle) => needle, + StringPattern::Glob(pattern) => pattern.as_str(), + StringPattern::GlobI(pattern) => pattern.as_str(), } } @@ -115,22 +154,61 @@ impl StringPattern { /// can't be represented as a glob. pub fn to_glob(&self) -> Option> { // TODO: If we add Regex pattern, it will return None. + // + // TODO: Handle trivial case‐insensitive patterns here? It might make people + // expect they can use case‐insensitive patterns in contexts where they + // generally can’t. match self { StringPattern::Exact(literal) => Some(glob::Pattern::escape(literal).into()), - StringPattern::Glob(pattern) => Some(pattern.as_str().into()), - StringPattern::Substring(needle) if needle.is_empty() => Some("*".into()), StringPattern::Substring(needle) => { - Some(format!("*{}*", glob::Pattern::escape(needle)).into()) + if needle.is_empty() { + Some("*".into()) + } else { + Some(format!("*{}*", glob::Pattern::escape(needle)).into()) + } } + StringPattern::Glob(pattern) => Some(pattern.as_str().into()), + StringPattern::ExactI(_) => None, + StringPattern::SubstringI(_) => None, + StringPattern::GlobI(_) => None, } } /// Returns true if this pattern matches the `haystack`. + /// + /// When matching against a case‐insensitive pattern, only ASCII case + /// differences are currently folded. This may change in the future. pub fn matches(&self, haystack: &str) -> bool { + // TODO: Unicode case folding is complicated and can be locale‐specific. The + // `glob` crate and Gitoxide only deal with ASCII case folding, so we do + // the same here; a more elaborate case folding system will require + // making sure those behave in a matching manner where relevant. + // + // Care will need to be taken regarding normalization and the choice of an + // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure + // that it is compatible with the standard case‐insensitivity of haystack + // components (like internationalized domain names in email addresses). The + // availability of normalization and case folding schemes in database backends + // will also need to be considered. A locale‐specific case folding + // scheme would likely not be appropriate for Jujutsu. + // + // For some discussion of this topic, see: + // match self { StringPattern::Exact(literal) => haystack == literal, - StringPattern::Glob(pattern) => pattern.matches(haystack), + StringPattern::ExactI(literal) => haystack.eq_ignore_ascii_case(literal), StringPattern::Substring(needle) => haystack.contains(needle), + StringPattern::SubstringI(needle) => haystack + .to_ascii_lowercase() + .contains(&needle.to_ascii_lowercase()), + StringPattern::Glob(pattern) => pattern.matches(haystack), + StringPattern::GlobI(pattern) => pattern.matches_with( + haystack, + glob::MatchOptions { + case_sensitive: false, + ..glob::MatchOptions::new() + }, + ), } } @@ -192,6 +270,10 @@ mod tests { StringPattern::parse("substring:foo").unwrap(), StringPattern::from_str_kind("foo", "substring").unwrap() ); + assert_eq!( + StringPattern::parse("substring-i:foo").unwrap(), + StringPattern::from_str_kind("foo", "substring-i").unwrap() + ); // Parse a pattern that contains a : itself. assert_eq!( diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index c48f82628c..86ca9260d0 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -2007,7 +2007,11 @@ fn test_evaluate_expression_branches() { vec![commit1.id().clone()] ); assert_eq!( - resolve_commit_ids(mut_repo, r#"branches(glob:"branch?")"#), + resolve_commit_ids(mut_repo, r#"branches(glob:"Branch?")"#), + vec![] + ); + assert_eq!( + resolve_commit_ids(mut_repo, r#"branches(glob-i:"Branch?")"#), vec![commit2.id().clone(), commit1.id().clone()] ); // Can silently resolve to an empty set if there's no matches @@ -2398,6 +2402,15 @@ fn test_evaluate_expression_author() { resolve_commit_ids(mut_repo, "author(\"email3\")"), vec![commit3.id().clone()] ); + // Can match case‐insensitively + assert_eq!( + resolve_commit_ids(mut_repo, "author(substring-i:Name)"), + vec![ + commit3.id().clone(), + commit2.id().clone(), + commit1.id().clone(), + ] + ); // Searches only among candidates if specified assert_eq!( resolve_commit_ids(mut_repo, "visible_heads() & author(\"name2\")"), @@ -2452,7 +2465,8 @@ fn test_evaluate_expression_mine() { .set_parents(vec![commit2.id().clone()]) .set_author(Signature { name: "name3".to_string(), - email: settings.user_email(), + // Test that matches are case‐insensitive + email: settings.user_email().to_ascii_uppercase(), timestamp, }) .write() @@ -2539,6 +2553,15 @@ fn test_evaluate_expression_committer() { resolve_commit_ids(mut_repo, "committer(\"email3\")"), vec![commit3.id().clone()] ); + // Can match case‐insensitively + assert_eq!( + resolve_commit_ids(mut_repo, "committer(substring-i:Name)"), + vec![ + commit3.id().clone(), + commit2.id().clone(), + commit1.id().clone(), + ] + ); // Searches only among candidates if specified assert_eq!( resolve_commit_ids(mut_repo, "visible_heads() & committer(\"name2\")"),