From daff07c3d28380b1101c698074f7b2680ea9fd2f Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Wed, 5 Jun 2024 20:57:31 +0200 Subject: [PATCH] Reduce as much as possible state when walking the grammar in PG --- .../codegen/runtime/generator/src/parser.rs | 73 ++++++++++--------- .../generator/src/parser/grammar/resolver.rs | 47 ++++++------ 2 files changed, 61 insertions(+), 59 deletions(-) diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index e16fd5aad2..7f93604ccc 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -15,12 +15,11 @@ use codegen::{ }; use grammar::{ GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef, - TriviaParserDefinitionRef, }; use crate::parser::codegen::KeywordScannerAtomic; use crate::parser::grammar::resolver::Resolution; -use crate::parser::grammar::ResolveCtx; +use crate::parser::grammar::{GrammarElement, ResolveCtx, TriviaParserDefinitionRef}; /// Newtype for the already generated Rust code, not to be confused with regular strings. #[derive(Serialize, Default, Clone)] @@ -63,11 +62,6 @@ struct ParserAccumulatorState { // Defines the `Lexer::next_terminal` method. scanner_contexts: BTreeMap, - /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap, // (name of parser, code) - /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap, // (name of parser, code) - /// Makes sure to codegen the scanner functions that are referenced by other scanners. top_level_scanner_names: BTreeSet, /// The current context of a parent scanner/parser being processed. @@ -107,6 +101,7 @@ impl ParserAccumulatorState { .expect("context must be set with `set_current_context`") } + #[allow(clippy::too_many_lines)] // FIXME fn into_model(mut self, resolved: &Resolution) -> ParserModel { // Lookup table for all scanners; used to generate trie scanners. let all_scanners: BTreeMap<_, _> = resolved @@ -205,9 +200,44 @@ impl ParserAccumulatorState { }) .collect(); + // FIXME: Move this to a separate collector for clarity + // and to limit mutable state needed for the lexer model + let mut trivia_parser_functions = BTreeMap::default(); + let mut parser_functions = BTreeMap::default(); + for (_, item) in resolved.items() { + match item { + GrammarElement::TriviaParserDefinition(parser) => { + trivia_parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + GrammarElement::ParserDefinition(parser) if !parser.is_inline() => { + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + + GrammarElement::PrecedenceParserDefinition(parser) => { + // While it's not common to parse a precedence expression as a standalone nonterminal, + // we generate a function for completeness. + for (name, code) in parser.to_precedence_expression_parser_code() { + parser_functions.insert(name.clone(), RustCode(code.to_string())); + } + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + _ => {} + } + } + ParserModel { - parser_functions: self.parser_functions, - trivia_parser_functions: self.trivia_parser_functions, + // These are directly collected from the flat resolved items + parser_functions, + trivia_parser_functions, // These are derived from the accumulated state scanner_contexts: contexts, scanner_functions, @@ -219,39 +249,14 @@ impl ParserAccumulatorState { impl GrammarVisitor for ParserAccumulatorState { fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { self.set_current_context(parser.context().clone()); - - self.trivia_parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); } fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { - // Have to set this regardless so that we can collect referenced scanners self.set_current_context(parser.context().clone()); - - if !parser.is_inline() { - self.parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); - } } fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) { self.set_current_context(parser.context().clone()); - - // While it's not common to parse a precedence expression as a standalone nonterminal, - // we generate a function for completeness. - for (name, code) in parser.to_precedence_expression_parser_code() { - self.parser_functions - .insert(name.clone(), RustCode(code.to_string())); - } - - self.parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); } fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) { diff --git a/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs index d809eb7ba2..c82dabecd8 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs @@ -112,9 +112,6 @@ pub struct Resolution { /// Original items as defined by the DSL v2. items: HashMap, resolved: HashMap, - // Trivia are defined separately from the main grammar - leading_trivia: Rc, - trailing_trivia: Rc, } impl ResolveCtx { @@ -140,15 +137,25 @@ impl ResolveCtx { resolve_grammar_element(item.name(), &mut ctx); } + // Trivia is defined separately from the main grammar + let leading_trivia = Rc::new(NamedTriviaParser { + name: Identifier::from("LeadingTrivia"), + def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), + }); + + let trailing_trivia = Rc::new(NamedTriviaParser { + name: Identifier::from("TrailingTrivia"), + def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), + }); + + for trivia in [leading_trivia, trailing_trivia] { + ctx.resolved.insert( + trivia.name().clone(), + GrammarElement::TriviaParserDefinition(trivia), + ); + } + Resolution { - leading_trivia: Rc::new(NamedTriviaParser { - name: Identifier::from("LeadingTrivia"), - def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), - }) as Rc, - trailing_trivia: Rc::new(NamedTriviaParser { - name: Identifier::from("TrailingTrivia"), - def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), - }) as Rc, items: ctx.items, resolved: ctx.resolved, } @@ -167,21 +174,11 @@ impl Resolution { /// Collects the already resolved item into a [`Grammar`]. pub fn to_grammar(&self) -> Grammar { - let resolved_items = self - .resolved - .iter() - .map(|(name, elem)| (name.clone(), elem.clone())); - - let leading_trivia = Rc::clone(&self.leading_trivia); - let trailing_trivia = Rc::clone(&self.trailing_trivia); - Grammar { - elements: resolved_items - .chain( - [leading_trivia, trailing_trivia] - .into_iter() - .map(|elem| (elem.name().clone(), elem.into())), - ) + elements: self + .resolved + .iter() + .map(|(name, elem)| (name.clone(), elem.clone())) .collect(), } }