From 57b423e3d75f596b10495a2eecfac7074953d948 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 8 Apr 2024 16:52:27 +0900 Subject: [PATCH] fileset: relax identifier rule to accept more path-like strings Since fileset is primarily used in CLI, it's better to avoid inner quoting if possible. For example, ".." would have to be quoted in the original grammar derived from the revset. This patch also adds a stricter version of an identifier rule. If we add a symbol alias, it will follow the "strict_identifier" rule. --- lib/src/fileset.pest | 15 +++++++++++---- lib/src/fileset.rs | 3 +-- lib/src/fileset_parser.rs | 17 +++++++++++++++-- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/src/fileset.pest b/lib/src/fileset.pest index 83e17780cc..c06a2ce73f 100644 --- a/lib/src/fileset.pest +++ b/lib/src/fileset.pest @@ -14,10 +14,17 @@ whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" } -// TODO: adjust identifier rule for file names -identifier_part = @{ (ASCII_ALPHANUMERIC | "_" | "/")+ } +// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax +// +, -, ., @, _: commonly used in file name including "." and ".." +// /: path separator +// \: path separator (Windows) +// TODO: accept glob characters as identifier? identifier = @{ - identifier_part ~ (("." | "-" | "+") ~ identifier_part)* + (XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+ +} +strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ } +strict_identifier = @{ + strict_identifier_part ~ ("-" ~ strict_identifier_part)* } string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } @@ -42,7 +49,7 @@ function_arguments = { } // TODO: change rhs to string_literal to require quoting? #2101 -string_pattern = { identifier ~ pattern_kind_op ~ (identifier | string_literal) } +string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) } primary = { "(" ~ whitespace* ~ expression ~ whitespace* ~ ")" diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 1cbcab2f68..31368ae28d 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -396,8 +396,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - // TODO: adjust identifier rule and test the expression parser instead - let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern); + let parse = |text| parse(text, &ctx); // cwd-relative patterns assert_eq!( diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index 10a072cf05..ec41f4db80 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -40,8 +40,9 @@ impl Rule { match self { Rule::EOI => None, Rule::whitespace => None, - Rule::identifier_part => None, Rule::identifier => None, + Rule::strict_identifier_part => None, + Rule::strict_identifier => None, Rule::string_escape => None, Rule::string_content_char => None, Rule::string_content => None, @@ -237,7 +238,7 @@ fn parse_primary_node(pair: Pair) -> FilesetParseResult { } Rule::string_pattern => { let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap(); - assert_eq!(lhs.as_rule(), Rule::identifier); + assert_eq!(lhs.as_rule(), Rule::strict_identifier); assert_eq!(op.as_rule(), Rule::pattern_kind_op); let kind = lhs.as_str(); let value = match rhs.as_rule() { @@ -399,6 +400,18 @@ mod tests { parse_into_kind("dir/foo-bar_0.baz"), Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz")) ); + assert_eq!( + parse_into_kind("cli-reference@.md.snap"), + Ok(ExpressionKind::Identifier("cli-reference@.md.snap")) + ); + assert_eq!( + parse_into_kind("柔術.jj"), + Ok(ExpressionKind::Identifier("柔術.jj")) + ); + assert_eq!( + parse_into_kind(r#"Windows\Path"#), + Ok(ExpressionKind::Identifier(r#"Windows\Path"#)) + ); } #[test]