From 56adb923b6f1e50f6acd87b7464287b72487c2a5 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Thu, 4 Apr 2024 00:25:14 +0900
Subject: [PATCH] templater: relax operator precedence rule to reduce
 possibility of large reparse

After upgrading pest from 2.7.8 to 2.7.9, I noticed CLI tests got significantly
slow (something around 40sec -> 60sec on my laptop.) I suspect this would be
caused by detailed error state tracking introduced in 2.7.9, but it's also true
that our template grammar exercises such code path.

My understanding is that PEG is basically a top down parsing with unlimited
lookahead. Before this change, the default commit_summary template would be
parsed as follows:
 1. parse the outermost separate(..) as "term"
 2. "concat" rule can't continue, so
 3. reparse the whole string as "expression"
Because this pattern is not uncommon, I think it's better to rewrite the
grammar to avoid large retry.

With this patch, our tests runs within ~50sec under debug build. It appears to
save a few milliseconds in release build, but my development environment isn't
quiet enough to say the difference is significant.
---
 cli/src/template.pest      |  8 ++------
 cli/src/template_parser.rs | 17 ++++++++++-------
 2 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/cli/src/template.pest b/cli/src/template.pest
index b40f71a6ad..89e24cc171 100644
--- a/cli/src/template.pest
+++ b/cli/src/template.pest
@@ -71,14 +71,10 @@ expression = {
   ~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ term)*
 }
 
-// Use 'term' instead of 'expression' to disallow 'x || y ++ z'. It can be
-// parsed, but the precedence isn't obvious.
-concat = _{
-  term ~ (whitespace* ~ concat_op ~ whitespace* ~ term)+
+template = {
+  expression ~ (whitespace* ~ concat_op ~ whitespace* ~ expression)*
 }
 
-template = { concat | expression }
-
 program = _{ SOI ~ whitespace* ~ template? ~ whitespace* ~ EOI }
 
 function_alias_declaration = {
diff --git a/cli/src/template_parser.rs b/cli/src/template_parser.rs
index 8c42717a0f..60347b9852 100644
--- a/cli/src/template_parser.rs
+++ b/cli/src/template_parser.rs
@@ -52,7 +52,6 @@ impl Rule {
             Rule::primary => None,
             Rule::term => None,
             Rule::expression => None,
-            Rule::concat => None,
             Rule::template => None,
             Rule::program => None,
             Rule::function_alias_declaration => None,
@@ -476,7 +475,6 @@ fn parse_template_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode>
     let mut nodes: Vec<_> = inner
         .filter_map(|pair| match pair.as_rule() {
             Rule::concat_op => None,
-            Rule::term => Some(parse_term_node(pair)),
             Rule::expression => Some(parse_expression_node(pair)),
             r => panic!("unexpected template item rule {r:?}"),
         })
@@ -1070,11 +1068,16 @@ mod tests {
             parse_normalized("x || (y && (z.h()))").unwrap(),
         );
 
-        // Top-level expression is allowed, but not in concatenation
-        assert!(parse_template(r"x && y").is_ok());
-        assert!(parse_template(r"f(x && y)").is_ok());
-        assert!(parse_template(r"x && y ++ z").is_err());
-        assert!(parse_template(r"(x && y) ++ z").is_ok());
+        // Logical operator bounds more tightly than concatenation. This might
+        // not be so intuitive, but should be harmless.
+        assert_eq!(
+            parse_normalized(r"x && y ++ z").unwrap(),
+            parse_normalized(r"(x && y) ++ z").unwrap(),
+        );
+        assert_eq!(
+            parse_normalized(r"x ++ y || z").unwrap(),
+            parse_normalized(r"x ++ (y || z)").unwrap(),
+        );
 
         // Expression span
         assert_eq!(parse_template(" ! x ").unwrap().span.as_str(), "! x");