From 57208b64bb6ab2bec887c77a458dc9890c3c2c63 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Wed, 5 Jun 2024 21:21:12 +0200 Subject: [PATCH] Separate the collectors in the PG --- .../codegen/runtime/generator/src/parser.rs | 144 ++++++++++-------- 1 file changed, 80 insertions(+), 64 deletions(-) diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index 7f93604ccc..ea154f369e 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -25,19 +25,20 @@ use crate::parser::grammar::{GrammarElement, ResolveCtx, TriviaParserDefinitionR #[derive(Serialize, Default, Clone)] struct RustCode(String); +/// The main model for the parser + lexer code generation. #[derive(Default, Serialize)] pub struct ParserModel { /// Defines the top-level scanner functions in `Language`. - scanner_functions: BTreeMap, // (name of scanner, code) + scanner_functions: BTreeMap, // Defines the `Lexer::next_terminal` method. scanner_contexts: BTreeMap, /// Defines the top-level compound scanners used when lexing in `Language`. - keyword_compound_scanners: BTreeMap, // (name of the KW scanner, code) + keyword_compound_scanners: BTreeMap, /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap, // (name of parser, code) + parser_functions: BTreeMap, /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap, // (name of parser, code) + trivia_parser_functions: BTreeMap, } #[derive(Default, Serialize)] @@ -57,10 +58,25 @@ struct ScannerContextModel { delimiters: BTreeMap, } +impl ParserModel { + pub fn from_language(language: &Rc) -> Self { + // First, resolve the grammar structure from the flat list of items + let resolved = ResolveCtx::resolve(language); + + // Collect all parser functions + let parser_fns = ParserFunctions::collect(&resolved); + // Collect all scanner contexts and their scanners + let mut acc = ScannerContextCollector::default(); + resolved.to_grammar().accept_visitor(&mut acc); + + // Combine everything into the final model + acc.into_model(&resolved, parser_fns) + } +} + #[derive(Default)] -struct ParserAccumulatorState { - // Defines the `Lexer::next_terminal` method. - scanner_contexts: BTreeMap, +struct ScannerContextCollector { + scanner_contexts: BTreeMap, /// Makes sure to codegen the scanner functions that are referenced by other scanners. top_level_scanner_names: BTreeSet, @@ -69,40 +85,26 @@ struct ParserAccumulatorState { } #[derive(Default)] -struct ScannerContextAccumulatorState { - /// Set of delimiter pairs for this context that are used in delimited error recovery. +struct ScannerContextCollectorState { delimiters: BTreeMap, scanner_definitions: BTreeSet, keyword_scanner_defs: BTreeMap>, } -impl ParserModel { - pub fn from_language(language: &Rc) -> Self { - // First, we construct the DSLv1 model from the DSLv2 definition... - let resolved = ResolveCtx::resolve(language); - let grammar = resolved.to_grammar(); - // ...which we then transform into the parser model - let mut acc = ParserAccumulatorState::default(); - grammar.accept_visitor(&mut acc); - - acc.into_model(&resolved) - } -} - -impl ParserAccumulatorState { +impl ScannerContextCollector { fn set_current_context(&mut self, name: Identifier) { self.current_context_name = Some(name.clone()); self.scanner_contexts.entry(name).or_default(); } - fn current_context(&mut self) -> &mut ScannerContextAccumulatorState { + fn current_context(&mut self) -> &mut ScannerContextCollectorState { self.scanner_contexts .get_mut(self.current_context_name.as_ref().unwrap()) .expect("context must be set with `set_current_context`") } - #[allow(clippy::too_many_lines)] // FIXME - fn into_model(mut self, resolved: &Resolution) -> ParserModel { + // TODO: Separate it to a function that combines the accumulated state with the parser_fns + fn into_model(mut self, resolved: &Resolution, parser_fns: ParserFunctions) -> ParserModel { // Lookup table for all scanners; used to generate trie scanners. let all_scanners: BTreeMap<_, _> = resolved .items() @@ -200,53 +202,19 @@ impl ParserAccumulatorState { }) .collect(); - // FIXME: Move this to a separate collector for clarity - // and to limit mutable state needed for the lexer model - let mut trivia_parser_functions = BTreeMap::default(); - let mut parser_functions = BTreeMap::default(); - for (_, item) in resolved.items() { - match item { - GrammarElement::TriviaParserDefinition(parser) => { - trivia_parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); - } - GrammarElement::ParserDefinition(parser) if !parser.is_inline() => { - parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); - } - - GrammarElement::PrecedenceParserDefinition(parser) => { - // While it's not common to parse a precedence expression as a standalone nonterminal, - // we generate a function for completeness. - for (name, code) in parser.to_precedence_expression_parser_code() { - parser_functions.insert(name.clone(), RustCode(code.to_string())); - } - parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); - } - _ => {} - } - } - ParserModel { - // These are directly collected from the flat resolved items - parser_functions, - trivia_parser_functions, // These are derived from the accumulated state scanner_contexts: contexts, scanner_functions, keyword_compound_scanners, + // These are directly collected from the flat resolved items + parser_functions: parser_fns.parser_functions, + trivia_parser_functions: parser_fns.trivia_parser_functions, } } } -impl GrammarVisitor for ParserAccumulatorState { +impl GrammarVisitor for ScannerContextCollector { fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { self.set_current_context(parser.context().clone()); } @@ -298,3 +266,51 @@ impl GrammarVisitor for ParserAccumulatorState { }; } } + +/// Collects all parser functions from the resolved grammar. +struct ParserFunctions { + parser_functions: BTreeMap, + trivia_parser_functions: BTreeMap, +} + +impl ParserFunctions { + fn collect(resolved: &Resolution) -> Self { + let mut parser_functions = BTreeMap::default(); + let mut trivia_parser_functions = BTreeMap::default(); + + for (_, item) in resolved.items() { + match item { + GrammarElement::TriviaParserDefinition(parser) => { + trivia_parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + GrammarElement::ParserDefinition(parser) if !parser.is_inline() => { + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + + GrammarElement::PrecedenceParserDefinition(parser) => { + // While it's not common to parse a precedence expression as a standalone nonterminal, + // we generate a function for completeness. + for (name, code) in parser.to_precedence_expression_parser_code() { + parser_functions.insert(name.clone(), RustCode(code.to_string())); + } + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + _ => {} + } + } + + Self { + parser_functions, + trivia_parser_functions, + } + } +}