From 56adb923b6f1e50f6acd87b7464287b72487c2a5 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Thu, 4 Apr 2024 00:25:14 +0900 Subject: [PATCH] templater: relax operator precedence rule to reduce possibility of large reparse After upgrading pest from 2.7.8 to 2.7.9, I noticed CLI tests got significantly slow (something around 40sec -> 60sec on my laptop.) I suspect this would be caused by detailed error state tracking introduced in 2.7.9, but it's also true that our template grammar exercises such code path. My understanding is that PEG is basically a top down parsing with unlimited lookahead. Before this change, the default commit_summary template would be parsed as follows: 1. parse the outermost separate(..) as "term" 2. "concat" rule can't continue, so 3. reparse the whole string as "expression" Because this pattern is not uncommon, I think it's better to rewrite the grammar to avoid large retry. With this patch, our tests runs within ~50sec under debug build. It appears to save a few milliseconds in release build, but my development environment isn't quiet enough to say the difference is significant. --- cli/src/template.pest | 8 ++------ cli/src/template_parser.rs | 17 ++++++++++------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cli/src/template.pest b/cli/src/template.pest index b40f71a6ad..89e24cc171 100644 --- a/cli/src/template.pest +++ b/cli/src/template.pest @@ -71,14 +71,10 @@ expression = { ~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ term)* } -// Use 'term' instead of 'expression' to disallow 'x || y ++ z'. It can be -// parsed, but the precedence isn't obvious. -concat = _{ - term ~ (whitespace* ~ concat_op ~ whitespace* ~ term)+ +template = { + expression ~ (whitespace* ~ concat_op ~ whitespace* ~ expression)* } -template = { concat | expression } - program = _{ SOI ~ whitespace* ~ template? ~ whitespace* ~ EOI } function_alias_declaration = { diff --git a/cli/src/template_parser.rs b/cli/src/template_parser.rs index 8c42717a0f..60347b9852 100644 --- a/cli/src/template_parser.rs +++ b/cli/src/template_parser.rs @@ -52,7 +52,6 @@ impl Rule { Rule::primary => None, Rule::term => None, Rule::expression => None, - Rule::concat => None, Rule::template => None, Rule::program => None, Rule::function_alias_declaration => None, @@ -476,7 +475,6 @@ fn parse_template_node(pair: Pair) -> TemplateParseResult let mut nodes: Vec<_> = inner .filter_map(|pair| match pair.as_rule() { Rule::concat_op => None, - Rule::term => Some(parse_term_node(pair)), Rule::expression => Some(parse_expression_node(pair)), r => panic!("unexpected template item rule {r:?}"), }) @@ -1070,11 +1068,16 @@ mod tests { parse_normalized("x || (y && (z.h()))").unwrap(), ); - // Top-level expression is allowed, but not in concatenation - assert!(parse_template(r"x && y").is_ok()); - assert!(parse_template(r"f(x && y)").is_ok()); - assert!(parse_template(r"x && y ++ z").is_err()); - assert!(parse_template(r"(x && y) ++ z").is_ok()); + // Logical operator bounds more tightly than concatenation. This might + // not be so intuitive, but should be harmless. + assert_eq!( + parse_normalized(r"x && y ++ z").unwrap(), + parse_normalized(r"(x && y) ++ z").unwrap(), + ); + assert_eq!( + parse_normalized(r"x ++ y || z").unwrap(), + parse_normalized(r"x ++ (y || z)").unwrap(), + ); // Expression span assert_eq!(parse_template(" ! x ").unwrap().span.as_str(), "! x");