Remove EndOfFileTrivia and always collect trivia after top-level pa…

…rsing (#733) Checks boxes in #640 #638 This adapts `ParserFunction` to always attempt parsing the leading trivia after we had a succesful match. This has a nice side benefit of not overwriting tokens that would allow for more progress when collecting the final trivia (unlike previous [AttemptedParseRule, EndOfFileTrivia] sequence). I'm not so happy with the resulting tree shape, however; IMO we should either: 1. return a "tree" that has both parents - the first one being the expected parsed rule node but also the second parent being the "leading trivia" 2. or at least introduce a notion of a virtual EndOfFile token that this final end-of-file trivia can belong to. Otherwise, a lone, final LeadingTrivia child belonging to the rule node seems misplaced and it's not intuitive for me to find the remaining trivia in that place. I'm open for other suggestions!
NomicFoundation · Jan 12, 2024 · 954a236 · 954a236
1 parent 330700c
commit 954a236
Show file tree

Hide file tree

Showing 70 changed files with 2,126 additions and 613 deletions.
diff --git a/crates/codegen/parser/runtime/src/napi/templates/ast_selectors.rs.jinja2 b/crates/codegen/parser/runtime/src/napi/templates/ast_selectors.rs.jinja2
@@ -278,10 +278,7 @@ impl Selector {
                 RustNamedNode {
                     name: _,
                     node: RustNode::Rule(rule),
-                } if matches!(
-                    rule.kind,
-                    RuleKind::LeadingTrivia | RuleKind::TrailingTrivia | RuleKind::EndOfFileTrivia
-                ) => {
+                } if rule.kind.is_trivia() => {
                     // skip trivia, since it's not part of the AST
                     self.index += 1;
                     continue;

diff --git a/crates/codegen/parser/runtime/src/support/parser_function.rs b/crates/codegen/parser/runtime/src/support/parser_function.rs
@@ -2,6 +2,7 @@ use std::rc::Rc;
 
 use crate::cst::{self, NamedNode};
 use crate::kinds::TokenKind;
+use crate::lexer::Lexer;
 use crate::parse_error::ParseError;
 use crate::parse_output::ParseOutput;
 use crate::support::context::ParserContext;
@@ -12,16 +13,43 @@ pub trait ParserFunction<L>
 where
     Self: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
 {
-    fn parse(&self, language: &L, input: &str) -> ParseOutput;
+    fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput;
 }
 
 impl<L, F> ParserFunction<L> for F
 where
+    L: Lexer,
     F: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
 {
-    fn parse(&self, language: &L, input: &str) -> ParseOutput {
+    fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput {
         let mut stream = ParserContext::new(input);
-        let result = self(language, &mut stream);
+        let mut result = self(language, &mut stream);
+
+        // For a succesful/recovered parse, collect any remaining trivia as part of the parse result
+        // TODO(#737): Remove this once we unconditionally collect trivia
+        if collect_trivia {
+            if let ParserResult::Match(r#match) = &mut result {
+                let [topmost] = r#match.nodes.as_mut_slice() else {
+                    unreachable!(
+                        "Match at the top level of a parse does not have exactly one Rule node"
+                    )
+                };
+
+                let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
+                    ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
+                        Some(eof_trivia.nodes)
+                    }
+                    _ => None,
+                };
+
+                if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
+                    let mut new_children = rule.children.clone();
+                    new_children.extend(eof_trivia);
+
+                    topmost.node = cst::Node::rule(rule.kind, new_children);
+                }
+            }
+        }
 
         let is_incomplete = matches!(result, ParserResult::IncompleteMatch(_));
         let is_recovering = matches!(result, ParserResult::SkippedUntil(_));
@@ -67,7 +95,6 @@ where
 
                 let start = stream.position();
 
-                let errors = stream.into_errors();
                 // Mark the rest of the unconsumed stream as skipped and report an error
                 // NOTE: IncompleteMatch internally consumes the stream when picked via choice,
                 // so needs a separate check here.
@@ -81,7 +108,8 @@ where
                         cst::Node::token(TokenKind::SKIPPED, input[start.utf8..].to_string());
                     let mut new_children = topmost_rule.children.clone();
                     new_children.push(NamedNode::anonymous(skipped_node));
-                    let mut errors = errors;
+
+                    let mut errors = stream.into_errors();
                     errors.push(ParseError::new_covering_range(
                         start..input.into(),
                         expected_tokens,
@@ -93,6 +121,8 @@ where
                     }
                 } else {
                     let parse_tree = cst::Node::Rule(topmost_rule);
+                    let errors = stream.into_errors();
+
                     // Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes
                     debug_assert_eq!(
                         errors.is_empty(),

diff --git a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2
@@ -101,7 +101,12 @@ impl Language {
     pub fn parse(&self, kind: RuleKind, input: &str) -> ParseOutput {
         match kind {
             {%- for parser_name, _code in code.parser_functions -%}
-                RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input),
+            {# TODO(#737): Remove the special case once we stop generating RuleKind for trivia #}
+            {%- if parser_name is ending_with("Trivia") -%}
+                RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, false),
+            {%- else -%}
+                RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, true),
+            {%- endif -%}
             {%- endfor -%}
         }
     }

diff --git a/crates/solidity/inputs/language/src/grammar.rs b/crates/solidity/inputs/language/src/grammar.rs
@@ -22,10 +22,9 @@ pub trait GrammarConstructorDslV2 {
 }
 
 impl GrammarConstructorDslV2 for Grammar {
-    #[allow(clippy::too_many_lines)] // TODO: Remove me once the hack below is removed
     fn from_dsl_v2(lang: &model::Language) -> Grammar {
         // Collect language items into a lookup table to speed up resolution
-        let mut items: HashMap<_, _> = lang
+        let items: HashMap<_, _> = lang
             .items_with_section()
             .map(|(_, topic, item)| {
                 (
@@ -35,39 +34,6 @@ impl GrammarConstructorDslV2 for Grammar {
             })
             .collect();
 
-        // TODO(#638): To minimize regression in the parser migration, we keep the existing DSL v1 model
-        // of SourceUnit being followed by `EndOfFileTrivia`.
-        items.insert(
-            Identifier::from("SourceUnit"),
-            (
-                None,
-                model::Item::Struct {
-                    item: Rc::new(model::StructItem {
-                        name: Identifier::from("SourceUnit"),
-                        enabled: None,
-                        error_recovery: None,
-                        fields: IndexMap::from_iter([
-                            (
-                                Identifier::from("members"),
-                                model::Field::Optional {
-                                    reference: Identifier::from("SourceUnitMembers"),
-
-                                    enabled: None,
-                                },
-                            ),
-                            (
-                                Identifier::from("eof_trivia"),
-                                model::Field::Optional {
-                                    reference: Identifier::from("EndOfFileTrivia"),
-                                    enabled: None,
-                                },
-                            ),
-                        ]),
-                    }),
-                },
-            ),
-        );
-
         let mut resolved = HashMap::new();
         let mut ctx = ResolveCtx {
             items: &items,
@@ -84,16 +50,6 @@ impl GrammarConstructorDslV2 for Grammar {
             def: resolve_trivia(lang.trailing_trivia.clone(), &mut ctx),
         }) as Rc<dyn TriviaParserDefinition>;
 
-        let eof_trivia = Rc::new(NamedTriviaParser {
-            name: "EndOfFileTrivia",
-            def: resolve_trivia(lang.leading_trivia.clone(), &mut ctx),
-        }) as Rc<dyn TriviaParserDefinition>;
-
-        ctx.resolved.insert(
-            Identifier::from("EndOfFileTrivia"),
-            eof_trivia.clone().into(),
-        );
-
         for (_lex_ctx, item) in items.values() {
             resolve_grammar_element(item.name(), &mut ctx);
         }
@@ -151,7 +107,7 @@ impl GrammarConstructorDslV2 for Grammar {
             trailing_trivia_parser: trailing_trivia.clone(),
             elements: resolved_items
                 .chain(
-                    [leading_trivia, trailing_trivia, eof_trivia]
+                    [leading_trivia, trailing_trivia]
                         .into_iter()
                         .map(|elem| (elem.name(), elem.into())),
                 )
@@ -321,10 +277,6 @@ struct ResolveCtx<'a> {
 
 #[allow(clippy::too_many_lines)] // FIXME: Simplify me when we simplify the v2-to-v1 interface
 fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> GrammarElement {
-    if ident.as_str() == "EndOfFileTrivia" {
-        return ctx.resolved.get(ident).unwrap().clone();
-    }
-
     let (lex_ctx, elem) = ctx.items.get(ident).expect("Missing item");
 
     // FIXME: Don't leak

diff --git a/crates/solidity/outputs/cargo/crate/src/generated/kinds.rs b/crates/solidity/outputs/cargo/crate/src/generated/kinds.rs