diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index 028666f033..203b69e8a3 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -15,11 +15,12 @@ use codegen::{ PrecedenceParserDefinitionCodegen as _, Trie, }; use grammar::{ - Grammar, GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, - PrecedenceParserDefinitionRef, ScannerDefinitionRef, TriviaParserDefinitionRef, + GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef, + ScannerDefinitionRef, TriviaParserDefinitionRef, }; use crate::parser::codegen::KeywordScannerAtomic; +use crate::parser::grammar::ResolveCtx; /// Newtype for the already generated Rust code, not to be confused with regular strings. #[derive(Serialize, Default, Clone)] @@ -89,7 +90,8 @@ struct ScannerContextAccumulatorState { impl ParserModel { pub fn from_language(language: &Rc) -> Self { // First, we construct the DSLv1 model from the DSLv2 definition... - let grammar = Grammar::from_dsl_v2(language); + let resolved = ResolveCtx::resolve(language); + let grammar = resolved.to_grammar(); // ...which we then transform into the parser model let mut acc = ParserAccumulatorState::default(); grammar.accept_visitor(&mut acc); @@ -219,6 +221,7 @@ impl GrammarVisitor for ParserAccumulatorState { fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { // Have to set this regardless so that we can collect referenced scanners self.set_current_context(parser.context().clone()); + if !parser.is_inline() { self.parser_functions.insert( parser.name().clone(), diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index c6f3d1bb03..d67b3c821f 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -9,14 +9,15 @@ use std::rc::Rc; use codegen_language_definition::model::{self, Identifier}; -pub mod constructor; pub mod parser_definition; pub mod precedence_parser_definition; +pub mod resolver; pub mod scanner_definition; pub mod visitor; pub use parser_definition::*; pub use precedence_parser_definition::*; +pub use resolver::ResolveCtx; pub use scanner_definition::*; pub use visitor::*; diff --git a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs similarity index 94% rename from crates/codegen/runtime/generator/src/parser/grammar/constructor.rs rename to crates/codegen/runtime/generator/src/parser/grammar/resolver.rs index efba7331bc..9d698bab22 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/resolver.rs @@ -17,80 +17,6 @@ use crate::parser::grammar::{ static DEFAULT_LEX_CTXT: Lazy = Lazy::new(|| Identifier::from("Default")); -impl Grammar { - /// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`]. - pub fn from_dsl_v2(lang: &model::Language) -> Grammar { - let mut ctx = ResolveCtx::new(lang); - - let leading_trivia = Rc::new(NamedTriviaParser { - name: Identifier::from("LeadingTrivia"), - def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), - }) as Rc; - - let trailing_trivia = Rc::new(NamedTriviaParser { - name: Identifier::from("TrailingTrivia"), - def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), - }) as Rc; - - for item in lang.items() { - resolve_grammar_element(item.name(), &mut ctx); - } - - // TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie, - // we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group - // keywords by their lexical context. - let mut keywords_per_ctxt = HashMap::new(); - for (ident, (lex_ctx, item)) in &ctx.items { - if let Item::Keyword { .. } = item { - keywords_per_ctxt - .entry(lex_ctx.clone()) - .or_insert_with(BTreeSet::new) - .insert(ident.clone()); - } - } - for (lex_ctx, keywords) in keywords_per_ctxt { - let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords")); - let all_keywords = model::EnumItem { - name: parser_name.clone(), - enabled: None, - variants: keywords - .iter() - .map(|ident| model::EnumVariant { - reference: ident.clone(), - enabled: None, - }) - .collect(), - }; - - let def = resolve_choice(all_keywords, &mut ctx); - ctx.resolved.insert( - parser_name.clone(), - GrammarElement::ParserDefinition(Rc::new(NamedParserThunk { - name: parser_name, - context: lex_ctx.clone(), - is_inline: true, - def: OnceCell::from(def), - })), - ); - } - - let resolved_items = ctx - .resolved - .iter() - .map(|(name, elem)| (name.clone(), elem.clone())); - - Grammar { - elements: resolved_items - .chain( - [leading_trivia, trailing_trivia] - .into_iter() - .map(|elem| (elem.name().clone(), elem.into())), - ) - .collect(), - } - } -} - #[derive(Debug)] struct NamedTriviaParser { name: Identifier, @@ -177,13 +103,20 @@ impl ParserThunk { } } -struct ResolveCtx { +pub struct ResolveCtx { items: HashMap, resolved: HashMap, } +pub struct Resolution { + resolved: HashMap, + // Trivia are defined separately from the main grammar + leading_trivia: Rc, + trailing_trivia: Rc, +} + impl ResolveCtx { - pub fn new(language: &Language) -> Self { + fn new(language: &Language) -> Self { // Collect language items into a lookup table to speed up resolution let items: HashMap<_, _> = language .topics() @@ -201,6 +134,87 @@ impl ResolveCtx { resolved: HashMap::new(), } } + + pub fn resolve(lang: &Language) -> Resolution { + let mut ctx = ResolveCtx::new(lang); + + for item in lang.items() { + resolve_grammar_element(item.name(), &mut ctx); + } + + // TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie, + // we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group + // keywords by their lexical context. + let mut keywords_per_ctxt = HashMap::new(); + for (ident, (lex_ctx, item)) in &ctx.items { + if let Item::Keyword { .. } = item { + keywords_per_ctxt + .entry(lex_ctx.clone()) + .or_insert_with(BTreeSet::new) + .insert(ident.clone()); + } + } + for (lex_ctx, keywords) in keywords_per_ctxt { + let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords")); + let all_keywords = model::EnumItem { + name: parser_name.clone(), + enabled: None, + variants: keywords + .iter() + .map(|ident| model::EnumVariant { + reference: ident.clone(), + enabled: None, + }) + .collect(), + }; + + let def = resolve_choice(all_keywords, &mut ctx); + ctx.resolved.insert( + parser_name.clone(), + GrammarElement::ParserDefinition(Rc::new(NamedParserThunk { + name: parser_name, + context: lex_ctx.clone(), + is_inline: true, + def: OnceCell::from(def), + })), + ); + } + + Resolution { + leading_trivia: Rc::new(NamedTriviaParser { + name: Identifier::from("LeadingTrivia"), + def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), + }) as Rc, + trailing_trivia: Rc::new(NamedTriviaParser { + name: Identifier::from("TrailingTrivia"), + def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), + }) as Rc, + resolved: ctx.resolved, + } + } +} + +impl Resolution { + /// Collects the already resolved item into a [`Grammar`]. + pub fn to_grammar(&self) -> Grammar { + let resolved_items = self + .resolved + .iter() + .map(|(name, elem)| (name.clone(), elem.clone())); + + let leading_trivia = Rc::clone(&self.leading_trivia); + let trailing_trivia = Rc::clone(&self.trailing_trivia); + + Grammar { + elements: resolved_items + .chain( + [leading_trivia, trailing_trivia] + .into_iter() + .map(|elem| (elem.name().clone(), elem.into())), + ) + .collect(), + } + } } #[allow(clippy::too_many_lines)] // FIXME(#638): Simplify me when we simplify the v2-to-v1 interface