From 83cfe98732fd9c9fe0545daf9e6ee00f6622377e Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Fri, 21 Jun 2024 13:24:32 +0200 Subject: [PATCH] Disentangle grammar resolution and related PG code (#1018) Ticks the box in #638 (_Keyword trie inclusion should be reworked to not require synthetic rules over all keywords_) This exposes the resolution step rather than treating it as an implementation detail and doesn't try to shoehorn the DSL v2 items to the old `Grammar` model as much that the PG was based on. Moreover, this breaks up the existing grammar visitor and tries to collect or calculate more properties upfront directly from DSL v2 in order to be more explicit that they not need to depend on the collector state that tracked everything. I didn't submit it initially because I felt I could polish it slightly further (see the `TODO` note) but since I'm focused on EDR now and this cleans up the last important box in #638 (the other one is simply reverting some rules for string literals), I think it's worth reviewing in the current state. --- Cargo.lock | 1 + crates/codegen/runtime/generator/Cargo.toml | 1 + .../runtime/generator/src/parser/grammar.rs | 5 +- .../grammar/{constructor.rs => resolver.rs} | 268 ++++++++---------- .../runtime/generator/src/parser/mod.rs | 189 +++++++----- 5 files changed, 250 insertions(+), 214 deletions(-) rename crates/codegen/runtime/generator/src/parser/grammar/{constructor.rs => resolver.rs} (81%) diff --git a/Cargo.lock b/Cargo.lock index 5af448800a..b2e44ddb27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,6 +415,7 @@ dependencies = [ "semver", "serde", "strum", + "strum_macros", ] [[package]] diff --git a/crates/codegen/runtime/generator/Cargo.toml b/crates/codegen/runtime/generator/Cargo.toml index 2359b504ac..73487defca 100644 --- a/crates/codegen/runtime/generator/Cargo.toml +++ b/crates/codegen/runtime/generator/Cargo.toml @@ -17,6 +17,7 @@ quote = { workspace = true } semver = { workspace = true } serde = { workspace = true } strum = { workspace = true } +strum_macros = { workspace = true } [lints] workspace = true diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index c6f3d1bb03..99a0ccd043 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -9,14 +9,15 @@ use std::rc::Rc; use codegen_language_definition::model::{self, Identifier}; -pub mod constructor; pub mod parser_definition; pub mod precedence_parser_definition; +pub mod resolver; pub mod scanner_definition; pub mod visitor; pub use parser_definition::*; pub use precedence_parser_definition::*; +pub use resolver::ResolveCtx; pub use scanner_definition::*; pub use visitor::*; @@ -35,7 +36,7 @@ impl Grammar { } #[allow(clippy::enum_variant_names)] // this will be removed soon -#[derive(Clone)] +#[derive(Clone, strum_macros::EnumTryAs)] pub enum GrammarElement { ScannerDefinition(ScannerDefinitionRef), KeywordScannerDefinition(Rc), diff --git a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs similarity index 81% rename from crates/codegen/runtime/generator/src/parser/grammar/constructor.rs rename to crates/codegen/runtime/generator/src/parser/grammar/resolver.rs index 573f802aed..6d1062c42a 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs @@ -6,7 +6,7 @@ use std::ops::Deref; use std::rc::Rc; use codegen_language_definition::model::{ - self, BuiltInLabel, FieldsErrorRecovery, Identifier, Item, + self, BuiltInLabel, FieldsErrorRecovery, Identifier, Item, Language, }; use indexmap::IndexMap; use once_cell::sync::Lazy; @@ -17,99 +17,7 @@ use crate::parser::grammar::{ TriviaParserDefinition, }; -impl Grammar { - /// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`]. - pub fn from_dsl_v2(lang: &model::Language) -> Grammar { - // Collect language items into a lookup table to speed up resolution - let items: HashMap<_, _> = lang - .topics() - .flat_map(|topic| { - topic.items.iter().map(|item| { - ( - item.name().clone(), - (topic.lexical_context.clone(), item.clone()), - ) - }) - }) - .collect(); - - let mut resolved = HashMap::new(); - let mut ctx = ResolveCtx { - items: &items, - resolved: &mut resolved, - }; - - let leading_trivia = Rc::new(NamedTriviaParser { - name: Identifier::from("LeadingTrivia"), - def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), - }) as Rc; - - let trailing_trivia = Rc::new(NamedTriviaParser { - name: Identifier::from("TrailingTrivia"), - def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), - }) as Rc; - - for (_lex_ctx, item) in items.values() { - resolve_grammar_element(item.name(), &mut ctx); - } - - // TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie, - // we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group - // keywords by their lexical context. - let mut keywords_per_ctxt = HashMap::new(); - for (ident, (lex_ctx, item)) in &items { - let lex_ctx = lex_ctx.clone().unwrap_or(Identifier::from("Default")); - if let Item::Keyword { .. } = item { - keywords_per_ctxt - .entry(lex_ctx) - .or_insert_with(Vec::new) - .push(ident); - } - } - for (lex_ctx, mut keywords) in keywords_per_ctxt { - keywords.sort_unstable_by_key(|kw| kw.as_str()); - - let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords")); - let all_keywords = model::EnumItem { - name: parser_name.clone(), - enabled: None, - variants: keywords - .iter() - .map(|&ident| model::EnumVariant { - reference: ident.clone(), - enabled: None, - }) - .collect(), - }; - - let def = resolve_choice(all_keywords, &mut ctx); - ctx.resolved.insert( - parser_name.clone(), - GrammarElement::ParserDefinition(Rc::new(NamedParserThunk { - name: parser_name, - context: lex_ctx, - is_inline: true, - def: OnceCell::from(def), - })), - ); - } - - let resolved_items = ctx - .resolved - .iter() - .map(|(name, elem)| (name.clone(), elem.clone())); - - Grammar { - elements: resolved_items - .chain( - [leading_trivia, trailing_trivia] - .into_iter() - .map(|elem| (elem.name().clone(), elem.into())), - ) - .collect(), - } - } -} +static DEFAULT_LEX_CTXT: Lazy = Lazy::new(|| Identifier::from("Default")); #[derive(Debug)] struct NamedTriviaParser { @@ -123,8 +31,7 @@ impl TriviaParserDefinition for NamedTriviaParser { } fn context(&self) -> &Identifier { - static DEFAULT: Lazy = Lazy::new(|| Identifier::from("Default")); - &DEFAULT + &DEFAULT_LEX_CTXT } fn node(&self) -> &ParserDefinitionNode { @@ -198,55 +105,130 @@ impl ParserThunk { } } -struct ResolveCtx<'a> { - items: &'a HashMap, Item)>, - resolved: &'a mut HashMap, +pub struct ResolveCtx { + items: HashMap, + resolved: HashMap, } -#[allow(clippy::too_many_lines)] // FIXME(#638): Simplify me when we simplify the v2-to-v1 interface -fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> GrammarElement { - let (lex_ctx, elem) = ctx.items.get(ident).expect("Missing item"); +pub struct Resolution { + /// Original items as defined by the DSL v2. + items: HashMap, + resolved: HashMap, +} - let lex_ctx = lex_ctx - .clone() - .unwrap_or_else(|| Identifier::from("Default")); +impl ResolveCtx { + pub fn resolve(lang: &Language) -> Resolution { + // Collect language items into a lookup table to speed up resolution + let items: HashMap<_, _> = lang + .topics() + .flat_map(|topic| { + topic.items.iter().map(|item| { + let lex_ctxt = topic.lexical_context.as_ref().unwrap_or(&DEFAULT_LEX_CTXT); - // The nonterminals are mutually recursive (so will be the resolution of their definitions), - // so make sure to insert a thunk for nonterminals to resolve to break the cycle. - let inserted_thunk = match (elem, ctx.resolved.contains_key(ident)) { - ( - Item::Struct { .. } - | Item::Enum { .. } - | Item::Repeated { .. } - | Item::Separated { .. }, - false, - ) => { - let thunk = Rc::new(NamedParserThunk { - name: ident.clone(), - context: lex_ctx.clone(), - is_inline: false, - def: OnceCell::new(), - }); + (item.name().clone(), (lex_ctxt.clone(), item.clone())) + }) + }) + .collect(); + + let mut ctx = ResolveCtx { + items, + resolved: HashMap::new(), + }; + + for item in lang.items() { + resolve_grammar_element(item.name(), &mut ctx); + } + + // Trivia is defined separately from the main grammar + let leading_trivia = Rc::new(NamedTriviaParser { + name: Identifier::from("LeadingTrivia"), + def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), + }); + + let trailing_trivia = Rc::new(NamedTriviaParser { + name: Identifier::from("TrailingTrivia"), + def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), + }); + + for trivia in [leading_trivia, trailing_trivia] { ctx.resolved.insert( - ident.clone(), - (Rc::clone(&thunk) as Rc).into(), + trivia.name().clone(), + GrammarElement::TriviaParserDefinition(trivia), ); - Some(ParserThunk::Regular(thunk)) } + + Resolution { + items: ctx.items, + resolved: ctx.resolved, + } + } +} + +impl Resolution { + /// Returns the lexical context in which the item was defined. + pub fn lex_ctx(&self, name: &Identifier) -> &Identifier { + &self.items[name].0 + } + + /// Returns the resolved items. + pub fn items(&self) -> impl Iterator { + self.resolved.iter() + } + + /// Collects the already resolved item into a [`Grammar`]. + pub fn to_grammar(&self) -> Grammar { + Grammar { + elements: self + .resolved + .iter() + .map(|(name, elem)| (name.clone(), elem.clone())) + .collect(), + } + } +} + +/// Inserts a thunk for the given item in a context to be resolved later and returns it if it was inserted. +fn insert_parser_thunk( + item: &Item, + lex_ctx: &Identifier, + ctx: &mut ResolveCtx, +) -> Option { + match (item, ctx.resolved.contains_key(item.name())) { (Item::Precedence { .. }, false) => { let thunk = Rc::new(NamedPrecedenceParserThunk { - name: ident.clone(), + name: item.name().clone(), context: lex_ctx.clone(), def: OnceCell::new(), }); ctx.resolved.insert( - ident.clone(), + item.name().clone(), (Rc::clone(&thunk) as Rc).into(), ); Some(ParserThunk::Precedence(thunk)) } + (item, false) if item.is_nonterminal() => { + let thunk = Rc::new(NamedParserThunk { + name: item.name().clone(), + context: lex_ctx.clone(), + is_inline: false, + def: OnceCell::new(), + }); + ctx.resolved.insert( + item.name().clone(), + (Rc::clone(&thunk) as Rc).into(), + ); + Some(ParserThunk::Regular(thunk)) + } _ => None, - }; + } +} + +fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx) -> GrammarElement { + let (lex_ctx, elem) = ctx.items.get(ident).cloned().expect("Missing item"); + + // The nonterminals are mutually recursive (so will be the resolution of their definitions), + // so make sure to insert a thunk for nonterminals to resolve to break the cycle. + let inserted_thunk = insert_parser_thunk(&elem, &lex_ctx, ctx); match (inserted_thunk, ctx.resolved.get(ident)) { // Already resolved @@ -293,7 +275,7 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram .set(resolve_precedence(item.deref().clone(), &lex_ctx, ctx)) .unwrap(); } - _ => unreachable!("Only nonterminals can be resolved here"), + _ => unreachable!("{ident}: Only nonterminals can be resolved here"), }; ctx.resolved.get(ident).cloned().unwrap() @@ -301,17 +283,16 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram // First time resolving a terminal named `ident` (None, None) => { let named_scanner = match elem { - Item::Trivia { item } => Rc::clone(item) as Rc<_>, - Item::Fragment { item } => Rc::clone(item) as Rc<_>, - Item::Token { item } => Rc::clone(item) as Rc<_>, Item::Keyword { item } => { // Keywords are special scanners and are handled separately - let resolved = - GrammarElement::KeywordScannerDefinition(Rc::clone(item) as Rc<_>); + let resolved = GrammarElement::KeywordScannerDefinition(item as Rc<_>); ctx.resolved.insert(ident.clone(), resolved.clone()); return resolved; } - _ => unreachable!("Only terminals can be resolved here"), + Item::Token { item } => item as Rc<_>, + Item::Trivia { item } => item as Rc<_>, + Item::Fragment { item } => item as Rc<_>, + _ => unreachable!("{ident}: Only terminals can be resolved here"), }; let resolved = GrammarElement::ScannerDefinition(named_scanner); @@ -325,7 +306,7 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram fn resolve_trivia( parser: model::TriviaParser, kind: TriviaKind, - ctx: &mut ResolveCtx<'_>, + ctx: &mut ResolveCtx, ) -> ParserDefinitionNode { match parser { model::TriviaParser::Optional { parser } => { @@ -366,7 +347,7 @@ fn resolve_trivia( } } -fn resolve_field(field: model::Field, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode { +fn resolve_field(field: model::Field, ctx: &mut ResolveCtx) -> ParserDefinitionNode { match field { model::Field::Required { reference } => { resolve_grammar_element(&reference, ctx).into_parser_def_node() @@ -383,7 +364,7 @@ fn resolve_sequence_like( enabled: Option, fields: IndexMap, error_recovery: Option, - ctx: &mut ResolveCtx<'_>, + ctx: &mut ResolveCtx, ) -> ParserDefinitionNode { let (terminator, delimiters) = match error_recovery { Some(FieldsErrorRecovery { @@ -464,7 +445,7 @@ fn resolve_sequence_like( .versioned(enabled) } -fn resolve_choice(item: model::EnumItem, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode { +fn resolve_choice(item: model::EnumItem, ctx: &mut ResolveCtx) -> ParserDefinitionNode { let variants = item .variants .into_iter() @@ -479,7 +460,7 @@ fn resolve_choice(item: model::EnumItem, ctx: &mut ResolveCtx<'_>) -> ParserDefi .versioned(item.enabled) } -fn resolve_repeated(item: model::RepeatedItem, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode { +fn resolve_repeated(item: model::RepeatedItem, ctx: &mut ResolveCtx) -> ParserDefinitionNode { let reference = Box::new(resolve_grammar_element(&item.reference, ctx).into_parser_def_node()); let repeated = Labeled::with_builtin_label(BuiltInLabel::Item, reference); @@ -491,7 +472,7 @@ fn resolve_repeated(item: model::RepeatedItem, ctx: &mut ResolveCtx<'_>) -> Pars } } -fn resolve_separated(item: model::SeparatedItem, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode { +fn resolve_separated(item: model::SeparatedItem, ctx: &mut ResolveCtx) -> ParserDefinitionNode { let reference = resolve_grammar_element(&item.reference, ctx).into_parser_def_node(); let separator = resolve_grammar_element(&item.separator, ctx).into_parser_def_node(); @@ -510,7 +491,7 @@ fn resolve_separated(item: model::SeparatedItem, ctx: &mut ResolveCtx<'_>) -> Pa fn resolve_precedence( item: model::PrecedenceItem, lex_ctx: &Identifier, - ctx: &mut ResolveCtx<'_>, + ctx: &mut ResolveCtx, ) -> PrecedenceParserDefinitionNode { let primaries: Vec<_> = item .primary_expressions @@ -522,7 +503,10 @@ fn resolve_precedence( }) .collect(); let primary_expression = Box::new(match primaries.len() { - 0 => panic!("Precedence operator has no primary expressions"), + 0 => panic!( + "Precedence operator {item} has no primary expressions", + item = item.name + ), _ => ParserDefinitionNode::Choice(Labeled::with_builtin_label( BuiltInLabel::Variant, primaries, diff --git a/crates/codegen/runtime/generator/src/parser/mod.rs b/crates/codegen/runtime/generator/src/parser/mod.rs index 7a55c6a045..21aa1fd0eb 100644 --- a/crates/codegen/runtime/generator/src/parser/mod.rs +++ b/crates/codegen/runtime/generator/src/parser/mod.rs @@ -14,29 +14,31 @@ use codegen::{ PrecedenceParserDefinitionCodegen as _, Trie, }; use grammar::{ - Grammar, GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, - PrecedenceParserDefinitionRef, ScannerDefinitionRef, TriviaParserDefinitionRef, + GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef, }; use crate::parser::codegen::KeywordItemAtom; +use crate::parser::grammar::resolver::Resolution; +use crate::parser::grammar::{GrammarElement, ResolveCtx, TriviaParserDefinitionRef}; /// Newtype for the already generated Rust code, not to be confused with regular strings. #[derive(Serialize, Default, Clone)] struct RustCode(String); +/// The main model for the parser + lexer code generation. #[derive(Default, Serialize)] pub struct ParserModel { /// Defines the top-level scanner functions in `Language`. - scanner_functions: BTreeMap, // (name of scanner, code) + scanner_functions: BTreeMap, // Defines the `Lexer::next_terminal` method. scanner_contexts: BTreeMap, /// Defines the top-level compound scanners used when lexing in `Language`. - keyword_compound_scanners: BTreeMap, // (name of the KW scanner, code) + keyword_compound_scanners: BTreeMap, /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap, // (name of parser, code) + parser_functions: BTreeMap, /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap, // (name of parser, code) + trivia_parser_functions: BTreeMap, } #[derive(Default, Serialize)] @@ -56,57 +58,79 @@ struct ScannerContextModel { delimiters: BTreeMap, } -#[derive(Default)] -struct ParserAccumulatorState { - // Defines the `Lexer::next_terminal` method. - scanner_contexts: BTreeMap, +impl ParserModel { + pub fn from_language(language: &Rc) -> Self { + // First, resolve the grammar structure from the flat list of items + let resolved = ResolveCtx::resolve(language); - /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap, // (name of parser, code) - /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap, // (name of parser, code) + // Collect all parser functions + let parser_fns = ParserFunctions::collect(&resolved); + // Collect all scanner contexts and their scanners + let mut acc = ScannerContextCollector::default(); + resolved.to_grammar().accept_visitor(&mut acc); + let lexer_model = acc.into_model(&resolved); + // Combine everything into the final model + ParserModel { + scanner_functions: lexer_model.scanner_functions, + scanner_contexts: lexer_model.scanner_contexts, + keyword_compound_scanners: lexer_model.keyword_compound_scanners, + parser_functions: parser_fns.parser_functions, + trivia_parser_functions: parser_fns.trivia_parser_functions, + } + } +} + +#[derive(Default)] +struct ScannerContextCollector { + scanner_contexts: BTreeMap, /// Makes sure to codegen the scanner functions that are referenced by other scanners. top_level_scanner_names: BTreeSet, - /// Lookup table for all scanners; used to generate trie scanners. - all_scanners: BTreeMap, /// The current context of a parent scanner/parser being processed. current_context_name: Option, } #[derive(Default)] -struct ScannerContextAccumulatorState { - /// Set of delimiter pairs for this context that are used in delimited error recovery. +struct ScannerContextCollectorState { delimiters: BTreeMap, scanner_definitions: BTreeSet, keyword_scanner_defs: BTreeMap>, } -impl ParserModel { - pub fn from_language(language: &Rc) -> Self { - // First, we construct the DSLv1 model from the DSLv2 definition... - let grammar = Grammar::from_dsl_v2(language); - // ...which we then transform into the parser model - let mut acc = ParserAccumulatorState::default(); - grammar.accept_visitor(&mut acc); - - acc.into_model() - } -} - -impl ParserAccumulatorState { +impl ScannerContextCollector { fn set_current_context(&mut self, name: Identifier) { self.current_context_name = Some(name.clone()); self.scanner_contexts.entry(name).or_default(); } - fn current_context(&mut self) -> &mut ScannerContextAccumulatorState { + fn current_context(&mut self) -> &mut ScannerContextCollectorState { self.scanner_contexts .get_mut(self.current_context_name.as_ref().unwrap()) .expect("context must be set with `set_current_context`") } - fn into_model(self) -> ParserModel { + // Transforms the accumulated state into the final model. + fn into_model(mut self, resolved: &Resolution) -> LexerModel { + // Lookup table for all scanners; used to generate trie scanners. + let all_scanners: BTreeMap<_, _> = resolved + .items() + .filter_map(|(_, item)| item.try_as_scanner_definition_ref()) + .map(|scanner| (scanner.name().clone(), Rc::clone(scanner))) + .collect(); + + for kw_scanner_def in resolved + .items() + .filter_map(|(_, item)| item.try_as_keyword_scanner_definition_ref()) + { + let lex_ctxt = resolved.lex_ctx(&kw_scanner_def.name); + + self.scanner_contexts + .entry(lex_ctxt.clone()) + .or_default() + .keyword_scanner_defs + .insert(kw_scanner_def.name.clone(), Rc::clone(kw_scanner_def)); + } + let contexts = self .scanner_contexts .into_iter() @@ -120,7 +144,7 @@ impl ParserAccumulatorState { let mut literal_trie = Trie::new(); for scanner_name in &context.scanner_definitions { - let scanner = &self.all_scanners[scanner_name]; + let scanner = &all_scanners[scanner_name]; let literals = scanner.literals().unwrap_or_default(); if literals.is_empty() { @@ -157,8 +181,7 @@ impl ParserAccumulatorState { .collect::>(); // Expose the scanner functions that... - let scanner_functions = self - .all_scanners + let scanner_functions = all_scanners .iter() .filter(|(name, scanner)| { // are compound (do not consist of only literals) @@ -185,10 +208,8 @@ impl ParserAccumulatorState { }) .collect(); - ParserModel { - parser_functions: self.parser_functions, - trivia_parser_functions: self.trivia_parser_functions, - // These are derived from the accumulated state + // These are derived from the accumulated state + LexerModel { scanner_contexts: contexts, scanner_functions, keyword_compound_scanners, @@ -196,46 +217,17 @@ impl ParserAccumulatorState { } } -impl GrammarVisitor for ParserAccumulatorState { - fn scanner_definition_enter(&mut self, scanner: &ScannerDefinitionRef) { - self.all_scanners - .insert(scanner.name().clone(), Rc::clone(scanner)); - } - +impl GrammarVisitor for ScannerContextCollector { fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { self.set_current_context(parser.context().clone()); - - self.trivia_parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); } fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { - // Have to set this regardless so that we can collect referenced scanners self.set_current_context(parser.context().clone()); - if !parser.is_inline() { - self.parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); - } } fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) { self.set_current_context(parser.context().clone()); - - // While it's not common to parse a precedence expression as a standalone nonterminal, - // we generate a function for completeness. - for (name, code) in parser.to_precedence_expression_parser_code() { - self.parser_functions - .insert(name.clone(), RustCode(code.to_string())); - } - - self.parser_functions.insert( - parser.name().clone(), - RustCode(parser.to_parser_code().to_string()), - ); } fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) { @@ -248,6 +240,8 @@ impl GrammarVisitor for ParserAccumulatorState { .insert(scanner.name().clone()); } ParserDefinitionNode::KeywordScannerDefinition(scanner) => { + // In addition to the context a keyword is defined in, we also + // need to include reachable ones for the current lexical context self.current_context() .keyword_scanner_defs .insert(scanner.name.clone(), Rc::clone(scanner)); @@ -275,3 +269,58 @@ impl GrammarVisitor for ParserAccumulatorState { }; } } + +/// Represents a final model used for generating lexer/scanner code. +struct LexerModel { + scanner_functions: BTreeMap, + scanner_contexts: BTreeMap, + keyword_compound_scanners: BTreeMap, +} + +/// Collects all parser functions from the resolved grammar. +struct ParserFunctions { + parser_functions: BTreeMap, + trivia_parser_functions: BTreeMap, +} + +impl ParserFunctions { + fn collect(resolved: &Resolution) -> Self { + let mut parser_functions = BTreeMap::default(); + let mut trivia_parser_functions = BTreeMap::default(); + + for (_, item) in resolved.items() { + match item { + GrammarElement::TriviaParserDefinition(parser) => { + trivia_parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + GrammarElement::ParserDefinition(parser) if !parser.is_inline() => { + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + + GrammarElement::PrecedenceParserDefinition(parser) => { + // While it's not common to parse a precedence expression as a standalone nonterminal, + // we generate a function for completeness. + for (name, code) in parser.to_precedence_expression_parser_code() { + parser_functions.insert(name.clone(), RustCode(code.to_string())); + } + parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); + } + _ => {} + } + } + + Self { + parser_functions, + trivia_parser_functions, + } + } +}