From 528ccb318e084d10d4951ef976b3c0b1729e0413 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 10 Apr 2024 18:47:56 +0900 Subject: [PATCH] fileset: fall back to bare pattern/string if no operator-like character found While I like strict parsing, it's not uncommon that we have to deal with file names containing spaces, and doubly-quoted strings such as '"Foo Bar"' look ugly. So, this patch adds an exception that accepts top-level bare strings. This parsing rule is specific to command arguments, and won't be enabled when loading fileset aliases. --- cli/src/cli_util.rs | 2 +- cli/src/commands/debug.rs | 2 +- docs/filesets.md | 9 ++++ lib/src/fileset.pest | 14 +++++ lib/src/fileset.rs | 20 ++++--- lib/src/fileset_parser.rs | 109 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 147 insertions(+), 9 deletions(-) diff --git a/cli/src/cli_util.rs b/cli/src/cli_util.rs index eee53c53b3..4be76be21c 100644 --- a/cli/src/cli_util.rs +++ b/cli/src/cli_util.rs @@ -748,7 +748,7 @@ impl WorkspaceCommandHelper { let ctx = self.fileset_parse_context(); let expressions: Vec<_> = file_args .iter() - .map(|arg| fileset::parse(arg, &ctx)) + .map(|arg| fileset::parse_maybe_bare(arg, &ctx)) .try_collect()?; Ok(FilesetExpression::union_all(expressions)) } diff --git a/cli/src/commands/debug.rs b/cli/src/commands/debug.rs index 624e5e772d..deadfe601e 100644 --- a/cli/src/commands/debug.rs +++ b/cli/src/commands/debug.rs @@ -151,7 +151,7 @@ fn cmd_debug_fileset( let workspace_command = command.workspace_helper(ui)?; let ctx = workspace_command.fileset_parse_context(); - let expression = fileset::parse(&args.path, &ctx)?; + let expression = fileset::parse_maybe_bare(&args.path, &ctx)?; writeln!(ui.stdout(), "-- Parsed:")?; writeln!(ui.stdout(), "{expression:#?}")?; writeln!(ui.stdout())?; diff --git a/docs/filesets.md b/docs/filesets.md index 4fb9bcc1fa..9b68c724da 100644 --- a/docs/filesets.md +++ b/docs/filesets.md @@ -12,6 +12,15 @@ consists of file patterns, operators, and functions. ui.allow-filesets = true ``` +Many `jj` commands accept fileset expressions as positional arguments. File +names passed to these commands must be quoted if they contain whitespace or meta +characters. However, as a special case, quotes can be omitted if the expression +has no operators nor function calls. For example: + +* `jj diff 'Foo Bar'` (shell quotes are required, but inner quotes are optional) +* `jj diff '~"Foo Bar"'` (both shell and inner quotes are required) +* `jj diff '"Foo(1)"'` (both shell and inner quotes are required) + ## File patterns The following patterns are supported: diff --git a/lib/src/fileset.pest b/lib/src/fileset.pest index c06a2ce73f..a57da837ae 100644 --- a/lib/src/fileset.pest +++ b/lib/src/fileset.pest @@ -27,6 +27,14 @@ strict_identifier = @{ strict_identifier_part ~ ("-" ~ strict_identifier_part)* } +// TODO: accept glob characters? +// TODO: accept more ASCII meta characters such as "#" and ","? +bare_string = @{ + ( ASCII_ALPHANUMERIC + | " " | "+" | "-" | "." | "@" | "_" | "/" | "\\" + | '\u{80}'..'\u{10ffff}' )+ +} + string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } string_content_char = @{ !("\"" | "\\") ~ ANY } string_content = @{ string_content_char+ } @@ -50,6 +58,7 @@ function_arguments = { // TODO: change rhs to string_literal to require quoting? #2101 string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) } +bare_string_pattern = { strict_identifier ~ pattern_kind_op ~ bare_string } primary = { "(" ~ whitespace* ~ expression ~ whitespace* ~ ")" @@ -65,3 +74,8 @@ expression = { } program = _{ SOI ~ whitespace* ~ expression ~ whitespace* ~ EOI } +program_or_bare_string = _{ + SOI ~ ( whitespace* ~ expression ~ whitespace* ~ EOI + | bare_string_pattern ~ EOI + | bare_string ~ EOI ) +} diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 21e008a67a..2c94684d7d 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -462,9 +462,15 @@ fn resolve_expression( } } -/// Parses text into `FilesetExpression`. -pub fn parse(text: &str, ctx: &FilesetParseContext) -> FilesetParseResult { - let node = fileset_parser::parse_program(text)?; +/// Parses text into `FilesetExpression` with bare string fallback. +/// +/// If the text can't be parsed as a fileset expression, and if it doesn't +/// contain any operator-like characters, it will be parsed as a file path. +pub fn parse_maybe_bare( + text: &str, + ctx: &FilesetParseContext, +) -> FilesetParseResult { + let node = fileset_parser::parse_program_or_bare_string(text)?; // TODO: add basic tree substitution pass to eliminate redundant expressions resolve_expression(ctx, &node) } @@ -483,7 +489,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - let parse = |text| parse(text, &ctx); + let parse = |text| parse_maybe_bare(text, &ctx); // cwd-relative patterns assert_eq!( @@ -535,7 +541,7 @@ mod tests { cwd: Path::new("/ws/cur*"), workspace_root: Path::new("/ws"), }; - let parse = |text| parse(text, &ctx); + let parse = |text| parse_maybe_bare(text, &ctx); let glob_expr = |dir: &str, pattern: &str| { FilesetExpression::pattern(FilePattern::FileGlob { dir: repo_path_buf(dir), @@ -618,7 +624,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - let parse = |text| parse(text, &ctx); + let parse = |text| parse_maybe_bare(text, &ctx); assert_eq!(parse("all()").unwrap(), FilesetExpression::all()); assert_eq!(parse("none()").unwrap(), FilesetExpression::none()); @@ -644,7 +650,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - let parse = |text| parse(text, &ctx); + let parse = |text| parse_maybe_bare(text, &ctx); insta::assert_debug_snapshot!(parse("~x").unwrap(), @r###" Difference( diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index ec41f4db80..fd7d4aa8e2 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -43,6 +43,7 @@ impl Rule { Rule::identifier => None, Rule::strict_identifier_part => None, Rule::strict_identifier => None, + Rule::bare_string => None, Rule::string_escape => None, Rule::string_content_char => None, Rule::string_content => None, @@ -58,9 +59,11 @@ impl Rule { Rule::function_name => None, Rule::function_arguments => None, Rule::string_pattern => None, + Rule::bare_string_pattern => None, Rule::primary => None, Rule::expression => None, Rule::program => None, + Rule::program_or_bare_string => None, } } } @@ -296,12 +299,39 @@ fn parse_expression_node(pair: Pair) -> FilesetParseResult } /// Parses text into expression tree. No name resolution is made at this stage. +#[cfg(test)] // TODO: alias will be parsed with no bare_string fallback pub fn parse_program(text: &str) -> FilesetParseResult { let mut pairs = FilesetParser::parse(Rule::program, text)?; let first = pairs.next().unwrap(); parse_expression_node(first) } +/// Parses text into expression tree with bare string fallback. No name +/// resolution is made at this stage. +/// +/// If the text can't be parsed as a fileset expression, and if it doesn't +/// contain any operator-like characters, it will be parsed as a file path. +pub fn parse_program_or_bare_string(text: &str) -> FilesetParseResult { + let mut pairs = FilesetParser::parse(Rule::program_or_bare_string, text)?; + let first = pairs.next().unwrap(); + let span = first.as_span(); + let expr = match first.as_rule() { + Rule::expression => return parse_expression_node(first), + Rule::bare_string_pattern => { + let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap(); + assert_eq!(lhs.as_rule(), Rule::strict_identifier); + assert_eq!(op.as_rule(), Rule::pattern_kind_op); + assert_eq!(rhs.as_rule(), Rule::bare_string); + let kind = lhs.as_str(); + let value = rhs.as_str().to_owned(); + ExpressionKind::StringPattern { kind, value } + } + Rule::bare_string => ExpressionKind::String(first.as_str().to_owned()), + r => panic!("unexpected program or bare string rule: {r:?}"), + }; + Ok(ExpressionNode::new(expr, span)) +} + pub fn expect_no_arguments(function: &FunctionCallNode) -> FilesetParseResult<()> { if function.args.is_empty() { Ok(()) @@ -325,10 +355,20 @@ mod tests { .map_err(|err| err.kind) } + fn parse_maybe_bare_into_kind(text: &str) -> Result { + parse_program_or_bare_string(text) + .map(|node| node.kind) + .map_err(|err| err.kind) + } + fn parse_normalized(text: &str) -> FilesetParseResult { parse_program(text).map(normalize_tree) } + fn parse_maybe_bare_normalized(text: &str) -> FilesetParseResult { + parse_program_or_bare_string(text).map(normalize_tree) + } + /// Drops auxiliary data from parsed tree so it can be compared with other. fn normalize_tree(node: ExpressionNode) -> ExpressionNode { fn empty_span() -> pest::Span<'static> { @@ -561,6 +601,75 @@ mod tests { assert!(parse_normalized("foo(a,,b)").is_err()); } + #[test] + fn test_parse_bare_string() { + // Valid expression should be parsed as such + assert_eq!( + parse_maybe_bare_into_kind(" valid "), + Ok(ExpressionKind::Identifier("valid")) + ); + assert_eq!( + parse_maybe_bare_normalized("f(x)&y").unwrap(), + parse_normalized("f(x)&y").unwrap() + ); + + // Bare string + assert_eq!( + parse_maybe_bare_into_kind("Foo Bar.txt"), + Ok(ExpressionKind::String("Foo Bar.txt".to_owned())) + ); + assert_eq!( + parse_maybe_bare_into_kind(r#"Windows\Path with space"#), + Ok(ExpressionKind::String( + r#"Windows\Path with space"#.to_owned() + )) + ); + assert_eq!( + parse_maybe_bare_into_kind("柔 術 . j j"), + Ok(ExpressionKind::String("柔 術 . j j".to_owned())) + ); + assert_eq!( + parse_maybe_bare_into_kind("Unicode emoji 💩"), + Ok(ExpressionKind::String("Unicode emoji 💩".to_owned())) + ); + assert_eq!( + parse_maybe_bare_into_kind("looks like & expression"), + Err(FilesetParseErrorKind::SyntaxError) + ); + assert_eq!( + parse_maybe_bare_into_kind("unbalanced_parens("), + Err(FilesetParseErrorKind::SyntaxError) + ); + + // Bare string pattern + assert_eq!( + parse_maybe_bare_into_kind("foo: bar baz"), + Ok(ExpressionKind::StringPattern { + kind: "foo", + value: " bar baz".to_owned() + }) + ); + assert_eq!( + parse_maybe_bare_into_kind("foo:bar:baz"), + Err(FilesetParseErrorKind::SyntaxError) + ); + assert_eq!( + parse_maybe_bare_into_kind("foo:"), + Err(FilesetParseErrorKind::SyntaxError) + ); + assert_eq!( + parse_maybe_bare_into_kind(r#"foo:"unclosed quote"#), + Err(FilesetParseErrorKind::SyntaxError) + ); + + // Surrounding spaces are simply preserved. They could be trimmed, but + // space is valid bare_string character. + assert_eq!( + parse_maybe_bare_into_kind(" No trim "), + Ok(ExpressionKind::String(" No trim ".to_owned())) + ); + } + #[test] fn test_parse_error() { insta::assert_snapshot!(parse_program("foo|").unwrap_err().to_string(), @r###"