Skip to content

Commit

Permalink
fileset: relax identifier rule to accept more path-like strings
Browse files Browse the repository at this point in the history
Since fileset is primarily used in CLI, it's better to avoid inner quoting if
possible. For example, ".." would have to be quoted in the original grammar
derived from the revset.

This patch also adds a stricter version of an identifier rule. If we add a
symbol alias, it will follow the "strict_identifier" rule.
  • Loading branch information
yuja committed Apr 9, 2024
1 parent 653173a commit 57b423e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 8 deletions.
15 changes: 11 additions & 4 deletions lib/src/fileset.pest
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,17 @@

whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }

// TODO: adjust identifier rule for file names
identifier_part = @{ (ASCII_ALPHANUMERIC | "_" | "/")+ }
// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax
// +, -, ., @, _: commonly used in file name including "." and ".."
// /: path separator
// \: path separator (Windows)
// TODO: accept glob characters as identifier?
identifier = @{
identifier_part ~ (("." | "-" | "+") ~ identifier_part)*
(XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+
}
strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ }
strict_identifier = @{
strict_identifier_part ~ ("-" ~ strict_identifier_part)*
}

string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
Expand All @@ -42,7 +49,7 @@ function_arguments = {
}

// TODO: change rhs to string_literal to require quoting? #2101
string_pattern = { identifier ~ pattern_kind_op ~ (identifier | string_literal) }
string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) }

primary = {
"(" ~ whitespace* ~ expression ~ whitespace* ~ ")"
Expand Down
3 changes: 1 addition & 2 deletions lib/src/fileset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,7 @@ mod tests {
cwd: Path::new("/ws/cur"),
workspace_root: Path::new("/ws"),
};
// TODO: adjust identifier rule and test the expression parser instead
let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern);
let parse = |text| parse(text, &ctx);

// cwd-relative patterns
assert_eq!(
Expand Down
17 changes: 15 additions & 2 deletions lib/src/fileset_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ impl Rule {
match self {
Rule::EOI => None,
Rule::whitespace => None,
Rule::identifier_part => None,
Rule::identifier => None,
Rule::strict_identifier_part => None,
Rule::strict_identifier => None,
Rule::string_escape => None,
Rule::string_content_char => None,
Rule::string_content => None,
Expand Down Expand Up @@ -237,7 +238,7 @@ fn parse_primary_node(pair: Pair<Rule>) -> FilesetParseResult<ExpressionNode> {
}
Rule::string_pattern => {
let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap();
assert_eq!(lhs.as_rule(), Rule::identifier);
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
let kind = lhs.as_str();
let value = match rhs.as_rule() {
Expand Down Expand Up @@ -399,6 +400,18 @@ mod tests {
parse_into_kind("dir/foo-bar_0.baz"),
Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz"))
);
assert_eq!(
parse_into_kind("[email protected]"),
Ok(ExpressionKind::Identifier("[email protected]"))
);
assert_eq!(
parse_into_kind("柔術.jj"),
Ok(ExpressionKind::Identifier("柔術.jj"))
);
assert_eq!(
parse_into_kind(r#"Windows\Path"#),
Ok(ExpressionKind::Identifier(r#"Windows\Path"#))
);
}

#[test]
Expand Down

0 comments on commit 57b423e

Please sign in to comment.