diff --git a/Cargo.lock b/Cargo.lock index 5c2ad3b528..cd15934b41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,6 +395,7 @@ dependencies = [ "indexmap", "infra_utils", "itertools", + "once_cell", "proc-macro2", "quote", "semver", diff --git a/crates/codegen/language/definition/src/model/terminals/keyword.rs b/crates/codegen/language/definition/src/model/terminals/keyword.rs index 29167cf14f..e8439b1142 100644 --- a/crates/codegen/language/definition/src/model/terminals/keyword.rs +++ b/crates/codegen/language/definition/src/model/terminals/keyword.rs @@ -17,8 +17,9 @@ pub struct KeywordItem { #[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)] pub struct KeywordDefinition { pub enabled: Option, + /// When the keyword is reserved, i.e. can't be used in other position (e.g. as a name) pub reserved: Option, - + // Underlying keyword scanner (i.e. identifier scanner) pub value: KeywordValue, } diff --git a/crates/codegen/language/definition/src/model/utils/identifier.rs b/crates/codegen/language/definition/src/model/utils/identifier.rs index bf7403a4fd..7d03a77629 100644 --- a/crates/codegen/language/definition/src/model/utils/identifier.rs +++ b/crates/codegen/language/definition/src/model/utils/identifier.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::ops::Deref; use proc_macro2::{Literal, TokenStream}; @@ -81,3 +82,9 @@ impl WriteOutputTokens for Identifier { } } } + +impl quote::IdentFragment for Identifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.value.fmt(f) + } +} diff --git a/crates/codegen/runtime/generator/Cargo.toml b/crates/codegen/runtime/generator/Cargo.toml index d911a27d15..07ea3ec029 100644 --- a/crates/codegen/runtime/generator/Cargo.toml +++ b/crates/codegen/runtime/generator/Cargo.toml @@ -12,6 +12,7 @@ indexmap = { workspace = true } Inflector = { workspace = true } infra_utils = { workspace = true } itertools = { workspace = true } +once_cell = { workspace = true } proc-macro2 = { workspace = true } quote = { workspace = true } semver = { workspace = true } diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index 067bef7c63..82c8192503 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; -use codegen_language_definition::model::Language; +use codegen_language_definition::model::{Identifier, Language, VersionSpecifier}; use quote::{format_ident, quote}; use semver::Version; use serde::Serialize; @@ -14,6 +14,7 @@ mod parser_definition; mod precedence_parser_definition; mod scanner_definition; mod trie; +mod versioned; use grammar::{ Grammar, GrammarVisitor, KeywordScannerAtomic, KeywordScannerDefinitionRef, @@ -26,94 +27,168 @@ use precedence_parser_definition::PrecedenceParserDefinitionExtensions as _; use scanner_definition::ScannerDefinitionExtensions as _; use trie::Trie; +/// Newtype for the already generated Rust code, not to be confused with regular strings. +#[derive(Serialize, Default, Clone)] +struct RustCode(String); + #[derive(Default, Serialize)] pub struct ParserModel { - /// Defines the `Language::SUPPORTED_VERSIONS` field. - all_versions: BTreeSet, /// Constructs inner `Language` the state to evaluate the version-dependent branches. referenced_versions: BTreeSet, /// Defines the `NonTerminalKind` enum variants. - nonterminal_kinds: BTreeSet<&'static str>, + nonterminal_kinds: BTreeSet, /// Defines the `TerminalKind` enum variants. - terminal_kinds: BTreeSet<&'static str>, + terminal_kinds: BTreeSet, /// Defines `TerminalKind::is_trivia` method. - trivia_scanner_names: BTreeSet<&'static str>, + trivia_scanner_names: BTreeSet, /// Defines `NodeLabel` enum variants. labels: BTreeSet, /// Defines the top-level scanner functions in `Language`. - scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code) + scanner_functions: BTreeMap, // (name of scanner, code) // Defines the `LexicalContext(Type)` enum and type-level variants. - scanner_contexts: BTreeMap<&'static str, ScannerContext>, + scanner_contexts: BTreeMap, /// Defines the top-level compound scanners used when lexing in `Language`. - keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code) + keyword_compound_scanners: BTreeMap, // (name of the KW scanner, code) /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) + parser_functions: BTreeMap, // (name of parser, code) /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) - - // Internal state: - /// Makes sure to codegen the scanner functions that are referenced by other scanners. - #[serde(skip)] - top_level_scanner_names: BTreeSet<&'static str>, - /// Lookup table for all scanners; used to generate trie scanners. - #[serde(skip)] - all_scanners: BTreeMap<&'static str, ScannerDefinitionRef>, - /// The current context of a parent scanner/parser being processed. - #[serde(skip)] - current_context_name: &'static str, + trivia_parser_functions: BTreeMap, // (name of parser, code) } #[derive(Default, Serialize)] -struct ScannerContext { +struct ScannerContextModel { /// Rust code for the trie scanner that matches literals. - literal_scanner: String, + literal_scanner: RustCode, /// Names of the compound scanners that are keywords. // Values (Rust code) is only used to generate the top-level `keyword_compound_scanners`. - keyword_compound_scanners: BTreeMap<&'static str, String>, + keyword_compound_scanners: BTreeMap, /// Rust code for the trie scanner that matches keywords - keyword_trie_scanner: String, + keyword_trie_scanner: RustCode, /// Names of the scanners for identifiers that can be promoted to keywords. - promotable_identifier_scanners: BTreeSet<&'static str>, + promotable_identifier_scanners: BTreeSet, /// Names of the scanners that are compound (do not consist of only literals). - compound_scanner_names: Vec<&'static str>, + compound_scanner_names: Vec, /// Set of delimiter pairs for this context that are used in delimited error recovery. - delimiters: BTreeMap<&'static str, &'static str>, - // Internal state: - #[serde(skip)] - scanner_definitions: BTreeSet<&'static str>, - #[serde(skip)] - keyword_scanner_defs: BTreeMap<&'static str, KeywordScannerDefinitionRef>, + delimiters: BTreeMap, +} + +#[derive(Default)] +struct ParserAccumulatorState { + /// Constructs inner `Language` the state to evaluate the version-dependent branches. + referenced_versions: BTreeSet, + + /// Defines the `NonTerminalKind` enum variants. + nonterminal_kinds: BTreeSet, + /// Defines the `TerminalKind` enum variants. + terminal_kinds: BTreeSet, + /// Defines `TerminalKind::is_trivia` method. + trivia_scanner_names: BTreeSet, + /// Defines `NodeLabel` enum variants. + labels: BTreeSet, + + // Defines the `LexicalContext(Type)` enum and type-level variants. + scanner_contexts: BTreeMap, + + /// Defines the top-level parser functions in `Language`. + parser_functions: BTreeMap, // (name of parser, code) + /// Defines the top-level trivia parser functions in `Language`. + trivia_parser_functions: BTreeMap, // (name of parser, code) + + /// Makes sure to codegen the scanner functions that are referenced by other scanners. + top_level_scanner_names: BTreeSet, + /// Lookup table for all scanners; used to generate trie scanners. + all_scanners: BTreeMap, + /// The current context of a parent scanner/parser being processed. + current_context_name: Option, +} + +#[derive(Default)] +struct ScannerContextAccumulatorState { + /// Set of delimiter pairs for this context that are used in delimited error recovery. + delimiters: BTreeMap, + scanner_definitions: BTreeSet, + keyword_scanner_defs: BTreeMap, } impl ParserModel { pub fn from_language(language: &Rc) -> Self { + // First, we construct the DSLv1 model from the DSLv2 definition... let grammar = Grammar::from_dsl_v2(language); + // ...which we then transform into the parser model + let mut acc = ParserAccumulatorState::default(); + grammar.accept_visitor(&mut acc); - let mut model = Self::default(); - grammar.accept_visitor(&mut model); - - model + acc.into_model() } +} - fn set_current_context(&mut self, name: &'static str) { - self.current_context_name = name; +impl ParserAccumulatorState { + fn set_current_context(&mut self, name: Identifier) { + self.current_context_name = Some(name.clone()); self.scanner_contexts.entry(name).or_default(); } - fn current_context(&mut self) -> &mut ScannerContext { + fn current_context(&mut self) -> &mut ScannerContextAccumulatorState { self.scanner_contexts - .get_mut(&self.current_context_name) + .get_mut(self.current_context_name.as_ref().unwrap()) .expect("context must be set with `set_current_context`") } -} -impl GrammarVisitor for ParserModel { - fn grammar_leave(&mut self, _grammar: &Grammar) { + fn into_model(mut self) -> ParserModel { + let contexts = self + .scanner_contexts + .into_iter() + .map(|(name, context)| { + let mut acc = ScannerContextModel { + delimiters: context.delimiters, + ..Default::default() + }; + + // Process literals into trie and compound scanners + let mut literal_trie = Trie::new(); + + for scanner_name in &context.scanner_definitions { + let scanner = &self.all_scanners[scanner_name]; + + let literals = scanner.literals(); + if literals.is_empty() { + acc.compound_scanner_names.push(scanner_name.clone()); + } else { + for literal in literals { + literal_trie.insert(&literal, Rc::clone(scanner)); + } + } + } + acc.literal_scanner = RustCode(literal_trie.to_scanner_code().to_string()); + + acc.promotable_identifier_scanners = context + .keyword_scanner_defs + .values() + .map(|def| def.identifier_scanner().clone()) + .collect(); + + let mut keyword_trie = Trie::new(); + for (name, def) in &context.keyword_scanner_defs { + match KeywordScannerAtomic::try_from_def(def) { + Some(atomic) => keyword_trie.insert(atomic.value(), atomic.clone()), + None => { + acc.keyword_compound_scanners + .insert(name.clone(), RustCode(def.to_scanner_code().to_string())); + } + } + } + + acc.keyword_trie_scanner = RustCode(keyword_trie.to_scanner_code().to_string()); + + (name, acc) + }) + .collect::>(); + // Expose the scanner functions that... - self.scanner_functions = self + let scanner_functions = self .all_scanners .iter() .filter(|(name, scanner)| { @@ -122,57 +197,22 @@ impl GrammarVisitor for ParserModel { // but make sure to also include a scanner that is referenced by other scanners, even if not compound !self.top_level_scanner_names.contains(*name) }) - .map(|(name, scanner)| (*name, scanner.to_scanner_code().to_string())) + .map(|(name, scanner)| { + ( + name.clone(), + RustCode(scanner.to_scanner_code().to_string()), + ) + }) .collect(); - for context in self.scanner_contexts.values_mut() { - let mut literal_trie = Trie::new(); - - for scanner_name in &context.scanner_definitions { - let scanner = &self.all_scanners[*scanner_name]; - - let literals = scanner.literals(); - if literals.is_empty() { - context.compound_scanner_names.push(scanner_name); - } else { - for literal in literals { - literal_trie.insert(&literal, Rc::clone(scanner)); - } - } - } - - context.literal_scanner = literal_trie.to_scanner_code().to_string(); - - context.promotable_identifier_scanners = context - .keyword_scanner_defs - .values() - .map(|def| def.identifier_scanner()) - .collect(); - - let mut keyword_trie = Trie::new(); - for (name, def) in &context.keyword_scanner_defs { - match KeywordScannerAtomic::try_from_def(def) { - Some(atomic) => keyword_trie.insert(atomic.value(), atomic.clone()), - None => { - context - .keyword_compound_scanners - .insert(name, def.to_scanner_code().to_string()); - } - } - } - - context.keyword_trie_scanner = keyword_trie.to_scanner_code().to_string(); - } - // Collect all of the keyword scanners into a single list to be defined at top-level - self.keyword_compound_scanners = self - .scanner_contexts + let keyword_compound_scanners = contexts .values() .flat_map(|context| { context .keyword_compound_scanners .iter() - .map(|(name, code)| (*name, code.clone())) + .map(|(name, code)| (name.clone(), code.clone())) }) .collect(); @@ -189,41 +229,62 @@ impl GrammarVisitor for ParserModel { self.labels.remove("leading_trivia"); self.labels.remove("trailing_trivia"); - // Just being anal about tidying up :) - self.all_scanners.clear(); - self.current_context_name = ""; + ParserModel { + referenced_versions: self.referenced_versions, + nonterminal_kinds: self.nonterminal_kinds, + terminal_kinds: self.terminal_kinds, + trivia_scanner_names: self.trivia_scanner_names, + labels: self.labels, + parser_functions: self.parser_functions, + trivia_parser_functions: self.trivia_parser_functions, + // These are derived from the accumulated state + scanner_contexts: contexts, + scanner_functions, + keyword_compound_scanners, + } } +} +impl GrammarVisitor for ParserAccumulatorState { fn scanner_definition_enter(&mut self, scanner: &ScannerDefinitionRef) { - self.all_scanners.insert(scanner.name(), Rc::clone(scanner)); + self.all_scanners + .insert(scanner.name().clone(), Rc::clone(scanner)); } fn keyword_scanner_definition_enter(&mut self, scanner: &KeywordScannerDefinitionRef) { for def in scanner.definitions() { let versions = def.enabled.iter().chain(def.reserved.iter()); - self.referenced_versions.extend( - versions - .map(|vqr| &vqr.from) - // "Removed from 0.0.0" is an alias for "never"; it's never directly checked - .filter(|v| *v != &Version::new(0, 0, 0)) - .cloned(), - ); + for version in versions { + match version { + VersionSpecifier::Never => {} + VersionSpecifier::From { from } => { + self.referenced_versions.insert(from.clone()); + } + VersionSpecifier::Till { till } => { + self.referenced_versions.insert(till.clone()); + } + VersionSpecifier::Range { from, till } => { + self.referenced_versions.insert(from.clone()); + self.referenced_versions.insert(till.clone()); + } + } + } } } fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { - self.set_current_context(parser.context()); + self.set_current_context(parser.context().clone()); let trivia_scanners = { use crate::parser::grammar::visitor::Visitable; #[derive(Default)] struct CollectTriviaScanners { - scanner_names: BTreeSet<&'static str>, + scanner_names: BTreeSet, } impl crate::parser::grammar::visitor::GrammarVisitor for CollectTriviaScanners { fn scanner_definition_enter(&mut self, node: &ScannerDefinitionRef) { - self.scanner_names.insert(node.name()); + self.scanner_names.insert(node.name().clone()); } } @@ -233,80 +294,105 @@ impl GrammarVisitor for ParserModel { }; self.trivia_scanner_names.extend(trivia_scanners); - self.trivia_parser_functions - .insert(parser.name(), parser.to_parser_code().to_string()); + self.trivia_parser_functions.insert( + parser.name().clone(), + RustCode(parser.to_parser_code().to_string()), + ); } fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { // Have to set this regardless so that we can collect referenced scanners - self.set_current_context(parser.context()); + self.set_current_context(parser.context().clone()); if !parser.is_inline() { - self.nonterminal_kinds.insert(parser.name()); + self.nonterminal_kinds.insert(parser.name().clone()); let code = parser.to_parser_code(); self.parser_functions.insert( - parser.name(), - { - let nonterminal_kind = format_ident!("{}", parser.name()); - quote! { #code.with_kind(NonTerminalKind::#nonterminal_kind) } - } - .to_string(), + parser.name().clone(), + RustCode( + { + let nonterminal_kind = format_ident!("{}", parser.name()); + quote! { #code.with_kind(NonTerminalKind::#nonterminal_kind) } + } + .to_string(), + ), ); } } fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) { - self.set_current_context(parser.context()); - self.nonterminal_kinds.insert(parser.name()); + self.set_current_context(parser.context().clone()); + self.nonterminal_kinds.insert(parser.name().clone()); for (_, name, _) in &parser.node().operators { - self.nonterminal_kinds.insert(name); + self.nonterminal_kinds.insert(name.clone()); } // While it's not common to parse a precedence expression as a standalone rule, // we generate a function for completeness. for (name, code) in parser.to_precedence_expression_parser_code() { - self.parser_functions.insert(name, code.to_string()); + self.parser_functions + .insert(name.clone(), RustCode(code.to_string())); } self.parser_functions.insert( - parser.name(), - { - let code = parser.to_parser_code(); - let nonterminal_kind = format_ident!("{}", parser.name()); - quote! { #code.with_kind(NonTerminalKind::#nonterminal_kind) } - } - .to_string(), + parser.name().clone(), + RustCode( + { + let code = parser.to_parser_code(); + let nonterminal_kind = format_ident!("{}", parser.name()); + quote! { #code.with_kind(NonTerminalKind::#nonterminal_kind) } + } + .to_string(), + ), ); } fn scanner_definition_node_enter(&mut self, node: &ScannerDefinitionNode) { - if let ScannerDefinitionNode::Versioned(_, version_quality_ranges) = node { - for vqr in version_quality_ranges { - self.referenced_versions.insert(vqr.from.clone()); + if let ScannerDefinitionNode::Versioned(_, version_specifier) = node { + match version_specifier { + VersionSpecifier::Never => {} + VersionSpecifier::From { from } => { + self.referenced_versions.insert(from.clone()); + } + VersionSpecifier::Till { till } => { + self.referenced_versions.insert(till.clone()); + } + VersionSpecifier::Range { from, till } => { + self.referenced_versions.insert(from.clone()); + self.referenced_versions.insert(till.clone()); + } } } } fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) { match node { - ParserDefinitionNode::Versioned(_, version_quality_ranges) => { - for vqr in version_quality_ranges { - self.referenced_versions.insert(vqr.from.clone()); + ParserDefinitionNode::Versioned(_, version_specifier) => match version_specifier { + VersionSpecifier::Never => {} + VersionSpecifier::From { from } => { + self.referenced_versions.insert(from.clone()); } - } + VersionSpecifier::Till { till } => { + self.referenced_versions.insert(till.clone()); + } + VersionSpecifier::Range { from, till } => { + self.referenced_versions.insert(from.clone()); + self.referenced_versions.insert(till.clone()); + } + }, ParserDefinitionNode::ScannerDefinition(scanner) => { - self.top_level_scanner_names.insert(scanner.name()); - self.terminal_kinds.insert(scanner.name()); + self.top_level_scanner_names.insert(scanner.name().clone()); + self.terminal_kinds.insert(scanner.name().clone()); self.current_context() .scanner_definitions - .insert(scanner.name()); + .insert(scanner.name().clone()); } ParserDefinitionNode::KeywordScannerDefinition(scanner) => { - self.terminal_kinds.insert(scanner.name()); + self.terminal_kinds.insert(scanner.name().clone()); self.current_context() .keyword_scanner_defs - .insert(scanner.name(), Rc::clone(scanner)); + .insert(scanner.name().clone(), Rc::clone(scanner)); } // Collect labels: @@ -345,7 +431,7 @@ impl GrammarVisitor for ParserModel { delimiters.get(close).is_none(), "Cannot use a closing delimiter as an opening one" ); - delimiters.insert(open, close); + delimiters.insert(open.clone(), close.clone()); } _ => {} }; diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index 6fea152c4e..38e074f88b 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -6,19 +6,18 @@ use std::collections::{BTreeSet, HashMap}; +use codegen_language_definition::model::Identifier; use semver::Version; pub mod constructor; pub mod parser_definition; pub mod precedence_parser_definition; pub mod scanner_definition; -pub mod version_quality; pub mod visitor; pub use parser_definition::*; pub use precedence_parser_definition::*; pub use scanner_definition::*; -pub use version_quality::*; pub use visitor::*; pub struct Grammar { @@ -26,7 +25,7 @@ pub struct Grammar { pub versions: BTreeSet, pub leading_trivia_parser: TriviaParserDefinitionRef, pub trailing_trivia_parser: TriviaParserDefinitionRef, - pub elements: HashMap<&'static str, GrammarElement>, + pub elements: HashMap, } impl Grammar { diff --git a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs index 007cd2d6b9..0d7256e9ea 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs @@ -7,13 +7,13 @@ use std::rc::Rc; use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier, Item}; use indexmap::IndexMap; +use once_cell::sync::Lazy; use crate::parser::grammar::{ - DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition, - KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, - ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition, + DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition, Labeled, + ParserDefinition, ParserDefinitionNode, PrecedenceParserDefinition, PrecedenceParserDefinitionNode, ScannerDefinition, ScannerDefinitionNode, - TriviaParserDefinition, VersionQuality, VersionQualityRange, + TriviaParserDefinition, }; impl Grammar { @@ -39,12 +39,12 @@ impl Grammar { }; let leading_trivia = Rc::new(NamedTriviaParser { - name: "LeadingTrivia", + name: Identifier::from("LeadingTrivia"), def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx), }) as Rc; let trailing_trivia = Rc::new(NamedTriviaParser { - name: "TrailingTrivia", + name: Identifier::from("TrailingTrivia"), def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx), }) as Rc; @@ -85,8 +85,8 @@ impl Grammar { ctx.resolved.insert( parser_name.clone(), GrammarElement::ParserDefinition(Rc::new(NamedParserThunk { - name: parser_name.to_string().leak(), - context: lex_ctx.to_string().leak(), + name: parser_name, + context: lex_ctx, is_inline: true, def: OnceCell::from(def), })), @@ -96,7 +96,7 @@ impl Grammar { let resolved_items = ctx .resolved .iter() - .map(|(name, elem)| (name.to_string().leak() as &_, elem.clone())); + .map(|(name, elem)| (name.clone(), elem.clone())); Grammar { name: lang.name.to_string(), @@ -107,7 +107,7 @@ impl Grammar { .chain( [leading_trivia, trailing_trivia] .into_iter() - .map(|elem| (elem.name(), elem.into())), + .map(|elem| (elem.name().clone(), elem.into())), ) .collect(), } @@ -116,13 +116,13 @@ impl Grammar { #[derive(Debug)] struct NamedScanner { - name: &'static str, + name: Identifier, def: ScannerDefinitionNode, } impl ScannerDefinition for NamedScanner { - fn name(&self) -> &'static str { - self.name + fn name(&self) -> &Identifier { + &self.name } fn node(&self) -> &ScannerDefinitionNode { &self.def @@ -131,39 +131,39 @@ impl ScannerDefinition for NamedScanner { #[derive(Debug)] struct NamedKeywordScanner { - name: &'static str, - identifier_scanner_name: &'static str, - defs: Vec, + name: Identifier, + identifier_scanner_name: Identifier, + defs: Vec, } impl KeywordScannerDefinition for NamedKeywordScanner { - fn name(&self) -> &'static str { - self.name + fn name(&self) -> &Identifier { + &self.name } - fn definitions(&self) -> &[KeywordScannerDefinitionVersionedNode] { + fn definitions(&self) -> &[model::KeywordDefinition] { &self.defs } - fn identifier_scanner(&self) -> &'static str { - self.identifier_scanner_name + fn identifier_scanner(&self) -> &Identifier { + &self.identifier_scanner_name } } #[derive(Debug)] struct NamedTriviaParser { - name: &'static str, + name: Identifier, def: ParserDefinitionNode, } impl TriviaParserDefinition for NamedTriviaParser { - fn name(&self) -> &'static str { - self.name + fn name(&self) -> &Identifier { + &self.name } - fn context(&self) -> &'static str { - // NOTE: - "Default" + fn context(&self) -> &Identifier { + static DEFAULT: Lazy = Lazy::new(|| Identifier::from("Default")); + &DEFAULT } fn node(&self) -> &ParserDefinitionNode { @@ -173,19 +173,19 @@ impl TriviaParserDefinition for NamedTriviaParser { #[derive(Debug)] struct NamedParserThunk { - name: &'static str, - context: &'static str, + name: Identifier, + context: Identifier, is_inline: bool, def: OnceCell, } impl ParserDefinition for NamedParserThunk { - fn name(&self) -> &'static str { - self.name + fn name(&self) -> &Identifier { + &self.name } - fn context(&self) -> &'static str { - self.context + fn context(&self) -> &Identifier { + &self.context } fn is_inline(&self) -> bool { @@ -199,17 +199,17 @@ impl ParserDefinition for NamedParserThunk { #[derive(Debug)] struct NamedPrecedenceParserThunk { - name: &'static str, - context: &'static str, + name: Identifier, + context: Identifier, def: OnceCell, } impl PrecedenceParserDefinition for NamedPrecedenceParserThunk { - fn name(&self) -> &'static str { - self.name + fn name(&self) -> &Identifier { + &self.name } - fn context(&self) -> &'static str { - self.context + fn context(&self) -> &Identifier { + &self.context } fn node(&self) -> &PrecedenceParserDefinitionNode { @@ -237,37 +237,6 @@ impl ParserThunk { } } -fn enabled_to_range(spec: impl Into>) -> Vec { - let Some(spec) = spec.into() else { - return vec![]; - }; - - match spec { - model::VersionSpecifier::Never => vec![VersionQualityRange { - from: semver::Version::new(0, 0, 0), - quality: VersionQuality::Removed, - }], - model::VersionSpecifier::From { from } => vec![VersionQualityRange { - from, - quality: VersionQuality::Introduced, - }], - model::VersionSpecifier::Till { till } => vec![VersionQualityRange { - from: till, - quality: VersionQuality::Removed, - }], - model::VersionSpecifier::Range { from, till } => vec![ - VersionQualityRange { - from, - quality: VersionQuality::Introduced, - }, - VersionQualityRange { - from: till, - quality: VersionQuality::Removed, - }, - ], - } -} - struct ResolveCtx<'a> { items: &'a HashMap, Item)>, resolved: &'a mut HashMap, @@ -277,10 +246,9 @@ struct ResolveCtx<'a> { fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> GrammarElement { let (lex_ctx, elem) = ctx.items.get(ident).expect("Missing item"); - // FIXME: Don't leak let lex_ctx = lex_ctx - .as_ref() - .map_or("Default", |l| l.to_string().leak() as &_); + .clone() + .unwrap_or_else(|| Identifier::from("Default")); // The non-terminals are mutually recursive (so will be the resolution of their definitions), // so make sure to insert a thunk for non-terminals to resolve to break the cycle. @@ -293,8 +261,8 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram false, ) => { let thunk = Rc::new(NamedParserThunk { - name: ident.to_string().leak(), - context: lex_ctx, + name: ident.clone(), + context: lex_ctx.clone(), is_inline: false, def: OnceCell::new(), }); @@ -306,8 +274,8 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram } (Item::Precedence { .. }, false) => { let thunk = Rc::new(NamedPrecedenceParserThunk { - name: ident.to_string().leak(), - context: lex_ctx, + name: ident.clone(), + context: lex_ctx.clone(), def: OnceCell::new(), }); ctx.resolved.insert( @@ -361,7 +329,7 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram Item::Precedence { item } => { thunk .as_precedence_def() - .set(resolve_precedence(item.deref().clone(), lex_ctx, ctx)) + .set(resolve_precedence(item.deref().clone(), &lex_ctx, ctx)) .unwrap(); } _ => unreachable!("Only non-terminals can be resolved here"), @@ -373,36 +341,22 @@ fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> Gram (None, None) => { let named_scanner = match elem { Item::Trivia { item } => NamedScanner { - name: ident.to_string().leak(), + name: ident.clone(), def: resolve_scanner(item.scanner.clone(), ctx), }, Item::Fragment { item } => NamedScanner { - name: ident.to_string().leak(), + name: ident.clone(), def: resolve_fragment(item.deref().clone(), ctx), }, Item::Token { item } => NamedScanner { - name: ident.to_string().leak(), + name: ident.clone(), def: resolve_token(item.deref().clone(), ctx), }, Item::Keyword { item } => { - let defs: Vec<_> = item - .definitions - .iter() - .cloned() - .map(|def| { - let value = resolve_keyword_value(def.value); - KeywordScannerDefinitionVersionedNode { - value, - enabled: enabled_to_range(def.enabled), - reserved: enabled_to_range(def.reserved), - } - }) - .collect(); - let kw_scanner = NamedKeywordScanner { - name: ident.to_string().leak(), - identifier_scanner_name: item.identifier.to_string().leak(), - defs, + name: ident.clone(), + identifier_scanner_name: item.identifier.clone(), + defs: item.definitions.clone(), }; // Keywords are special scanners and are handled separately @@ -487,21 +441,6 @@ fn resolve_token(token: model::TokenItem, ctx: &mut ResolveCtx<'_>) -> ScannerDe } } -fn resolve_keyword_value(value: model::KeywordValue) -> KeywordScannerDefinitionNode { - match value { - model::KeywordValue::Sequence { values } => KeywordScannerDefinitionNode::Sequence( - values.into_iter().map(resolve_keyword_value).collect(), - ), - model::KeywordValue::Choice { values } => KeywordScannerDefinitionNode::Choice( - values.into_iter().map(resolve_keyword_value).collect(), - ), - model::KeywordValue::Optional { value } => { - KeywordScannerDefinitionNode::Optional(Box::new(resolve_keyword_value(*value))) - } - model::KeywordValue::Atom { atom } => KeywordScannerDefinitionNode::Atom(atom), - } -} - fn resolve_trivia( parser: model::TriviaParser, kind: TriviaKind, @@ -689,7 +628,7 @@ fn resolve_separated(item: model::SeparatedItem, ctx: &mut ResolveCtx<'_>) -> Pa fn resolve_precedence( item: model::PrecedenceItem, - lex_ctx: &'static str, + lex_ctx: &Identifier, ctx: &mut ResolveCtx<'_>, ) -> PrecedenceParserDefinitionNode { let primaries: Vec<_> = item @@ -709,35 +648,20 @@ fn resolve_precedence( )), }); - #[allow(clippy::items_after_statements)] // simple and specific to this site - fn model_to_enum(model: model::OperatorModel) -> PrecedenceOperatorModel { - match model { - model::OperatorModel::BinaryLeftAssociative => { - PrecedenceOperatorModel::BinaryLeftAssociative - } - model::OperatorModel::BinaryRightAssociative => { - PrecedenceOperatorModel::BinaryRightAssociative - } - model::OperatorModel::Prefix => PrecedenceOperatorModel::Prefix, - model::OperatorModel::Postfix => PrecedenceOperatorModel::Postfix, - } - } - let mut operators = vec![]; let mut precedence_expression_names = Vec::with_capacity(item.precedence_expressions.len()); for expr in item.precedence_expressions { let name = &expr.name; - // TODO(#638): Don't leak - let leaked_name = name.to_string().leak() as &_; + let name = name.clone(); - precedence_expression_names.push(leaked_name); + precedence_expression_names.push(name.clone()); // Register it as a regular parser with a given name, however we need to // define it as a choice over the "operator" sequences // Then, when returning, we should actually return a node ref pointing to that combined parser // And ideally, we shouldn't even use the "enabled" mode of the original DSL let thunk = Rc::new(NamedParserThunk { - name: leaked_name, - context: lex_ctx, + name: name.clone(), + context: lex_ctx.clone(), is_inline: true, def: OnceCell::new(), }); @@ -769,7 +693,7 @@ fn resolve_precedence( }; all_operators.push(def.clone()); - operators.push((model_to_enum(model), leaked_name, def)); + operators.push((model, name.clone(), def)); } // Register the combined parser definition to appease the codegen and to mark terminals @@ -781,7 +705,7 @@ fn resolve_precedence( ))) .unwrap(); assert!( - !ctx.resolved.contains_key(name), + !ctx.resolved.contains_key(&name), "Encountered a duplicate Precedence Expression named {name} when resolving" ); ctx.resolved.insert( @@ -846,7 +770,7 @@ trait VersionWrapped { impl VersionWrapped for ParserDefinitionNode { fn versioned(self, enabled: Option) -> Self { if let Some(enabled) = enabled { - Self::Versioned(Box::new(self), enabled_to_range(enabled)) + Self::Versioned(Box::new(self), enabled) } else { self } @@ -856,7 +780,7 @@ impl VersionWrapped for ParserDefinitionNode { impl VersionWrapped for ScannerDefinitionNode { fn versioned(self, enabled: Option) -> Self { if let Some(enabled) = enabled { - Self::Versioned(Box::new(self), enabled_to_range(enabled)) + Self::Versioned(Box::new(self), enabled) } else { self } diff --git a/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs index 640b838fc1..e9c6985de6 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs @@ -1,12 +1,11 @@ use std::fmt::Debug; use std::rc::Rc; -use codegen_language_definition::model; +use codegen_language_definition::model::{self, Identifier}; use crate::parser::grammar::visitor::{GrammarVisitor, Visitable}; use crate::parser::grammar::{ KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef, - VersionQualityRange, }; /// A named wrapper, used to give a name to a [`ParserDefinitionNode`]. @@ -25,9 +24,9 @@ impl std::ops::Deref for Labeled { } pub trait ParserDefinition: Debug { - fn name(&self) -> &'static str; + fn name(&self) -> &Identifier; fn node(&self) -> &ParserDefinitionNode; - fn context(&self) -> &'static str; + fn context(&self) -> &Identifier; fn is_inline(&self) -> bool; } @@ -41,9 +40,9 @@ impl Visitable for ParserDefinitionRef { } pub trait TriviaParserDefinition: Debug { - fn name(&self) -> &'static str; + fn name(&self) -> &Identifier; fn node(&self) -> &ParserDefinitionNode; - fn context(&self) -> &'static str; + fn context(&self) -> &Identifier; } pub type TriviaParserDefinitionRef = Rc; @@ -76,7 +75,7 @@ impl From for DelimitedRecoveryTokenThreshold { #[derive(Clone, Debug)] pub enum ParserDefinitionNode { - Versioned(Box, Vec), + Versioned(Box, model::VersionSpecifier), Optional(Box), ZeroOrMore(Labeled>), OneOrMore(Labeled>), diff --git a/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs index f26143d8a3..4ee572b6d3 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs @@ -1,12 +1,15 @@ use std::fmt::Debug; use std::rc::Rc; +use codegen_language_definition::model::Identifier; +pub use codegen_language_definition::model::OperatorModel as PrecedenceOperatorModel; + use crate::parser::grammar::{GrammarVisitor, ParserDefinitionNode, Visitable}; pub trait PrecedenceParserDefinition: Debug { - fn name(&self) -> &'static str; + fn name(&self) -> &Identifier; fn node(&self) -> &PrecedenceParserDefinitionNode; - fn context(&self) -> &'static str; + fn context(&self) -> &Identifier; } pub type PrecedenceParserDefinitionRef = Rc; @@ -21,12 +24,8 @@ impl Visitable for PrecedenceParserDefinitionRef { #[derive(Clone, Debug)] pub struct PrecedenceParserDefinitionNode { pub primary_expression: Box, - pub operators: Vec<( - PrecedenceOperatorModel, - &'static str, // name - ParserDefinitionNode, - )>, - pub precedence_expression_names: Vec<&'static str>, + pub operators: Vec<(PrecedenceOperatorModel, Identifier, ParserDefinitionNode)>, + pub precedence_expression_names: Vec, } impl Visitable for PrecedenceParserDefinitionNode { @@ -35,11 +34,3 @@ impl Visitable for PrecedenceParserDefinitionNode { self.primary_expression.accept_visitor(visitor); } } - -#[derive(Clone, Debug)] -pub enum PrecedenceOperatorModel { - BinaryLeftAssociative, - BinaryRightAssociative, - Prefix, - Postfix, -} diff --git a/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs index 63a01c44a0..b964c67b01 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs @@ -1,10 +1,12 @@ use std::fmt::Debug; use std::rc::Rc; -use crate::parser::grammar::{GrammarVisitor, VersionQualityRange, Visitable}; +use codegen_language_definition::model::{self, Identifier}; + +use crate::parser::grammar::{GrammarVisitor, Visitable}; pub trait ScannerDefinition: Debug { - fn name(&self) -> &'static str; + fn name(&self) -> &Identifier; fn node(&self) -> &ScannerDefinitionNode; } @@ -19,7 +21,7 @@ impl Visitable for ScannerDefinitionRef { #[derive(Clone, Debug)] pub enum ScannerDefinitionNode { - Versioned(Box, Vec), + Versioned(Box, model::VersionSpecifier), Optional(Box), ZeroOrMore(Box), OneOrMore(Box), @@ -67,9 +69,9 @@ impl Visitable for ScannerDefinitionNode { } pub trait KeywordScannerDefinition: Debug { - fn name(&self) -> &'static str; - fn identifier_scanner(&self) -> &'static str; - fn definitions(&self) -> &[KeywordScannerDefinitionVersionedNode]; + fn name(&self) -> &Identifier; + fn identifier_scanner(&self) -> &Identifier; + fn definitions(&self) -> &[model::KeywordDefinition]; } pub type KeywordScannerDefinitionRef = Rc; @@ -80,37 +82,18 @@ impl Visitable for KeywordScannerDefinitionRef { } } -#[derive(Debug)] -pub struct KeywordScannerDefinitionVersionedNode { - // Underlying keyword scanner (i.e. identifier scanner) - pub value: KeywordScannerDefinitionNode, - /// When the keyword scanner is enabled - pub enabled: Vec, - /// When the keyword is reserved, i.e. can't be used in other position (e.g. as a name) - pub reserved: Vec, -} - -#[derive(Clone, Debug)] -pub enum KeywordScannerDefinitionNode { - Optional(Box), - Sequence(Vec), - Choice(Vec), - Atom(String), - // No repeatable combinators, because keywords are assumed to be finite -} - -impl From for ScannerDefinitionNode { - fn from(val: KeywordScannerDefinitionNode) -> Self { +impl From for ScannerDefinitionNode { + fn from(val: model::KeywordValue) -> Self { match val { - KeywordScannerDefinitionNode::Optional(node) => { - ScannerDefinitionNode::Optional(Box::new((*node).into())) + model::KeywordValue::Optional { value } => { + ScannerDefinitionNode::Optional(Box::new((*value).into())) } - KeywordScannerDefinitionNode::Sequence(nodes) => { - ScannerDefinitionNode::Sequence(nodes.into_iter().map(Into::into).collect()) + model::KeywordValue::Sequence { values } => { + ScannerDefinitionNode::Sequence(values.into_iter().map(Into::into).collect()) } - KeywordScannerDefinitionNode::Atom(string) => ScannerDefinitionNode::Literal(string), - KeywordScannerDefinitionNode::Choice(nodes) => { - ScannerDefinitionNode::Choice(nodes.into_iter().map(Into::into).collect()) + model::KeywordValue::Atom { atom } => ScannerDefinitionNode::Literal(atom), + model::KeywordValue::Choice { values } => { + ScannerDefinitionNode::Choice(values.into_iter().map(Into::into).collect()) } } } @@ -128,8 +111,8 @@ impl KeywordScannerAtomic { /// Wraps the keyword scanner definition if it is a single atom value. pub fn try_from_def(def: &KeywordScannerDefinitionRef) -> Option { match def.definitions() { - [KeywordScannerDefinitionVersionedNode { - value: KeywordScannerDefinitionNode::Atom(_), + [model::KeywordDefinition { + value: model::KeywordValue::Atom { .. }, .. }] => Some(Self(Rc::clone(def))), _ => None, @@ -146,14 +129,17 @@ impl std::ops::Deref for KeywordScannerAtomic { } impl KeywordScannerAtomic { - pub fn definition(&self) -> &KeywordScannerDefinitionVersionedNode { - let def = &self.0.definitions().first(); - def.expect("KeywordScannerAtomic should have exactly one definition") + pub fn definition(&self) -> &model::KeywordDefinition { + self.0 + .definitions() + .first() + .expect("KeywordScannerAtomic should have exactly one definition") } + pub fn value(&self) -> &str { match self.definition() { - KeywordScannerDefinitionVersionedNode { - value: KeywordScannerDefinitionNode::Atom(atom), + model::KeywordDefinition { + value: model::KeywordValue::Atom { atom }, .. } => atom, _ => unreachable!("KeywordScannerAtomic should have a single atom value"), diff --git a/crates/codegen/runtime/generator/src/parser/grammar/version_quality.rs b/crates/codegen/runtime/generator/src/parser/grammar/version_quality.rs deleted file mode 100644 index 37b4210567..0000000000 --- a/crates/codegen/runtime/generator/src/parser/grammar/version_quality.rs +++ /dev/null @@ -1,13 +0,0 @@ -use semver::Version; - -#[derive(Clone, Debug, Copy, PartialEq, Eq, strum_macros::Display)] -pub enum VersionQuality { - Introduced, - Removed, -} - -#[derive(Clone, Debug)] -pub struct VersionQualityRange { - pub from: Version, - pub quality: VersionQuality, -} diff --git a/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs index 4422e067ff..de796d29d5 100644 --- a/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs @@ -1,11 +1,10 @@ +use codegen_language_definition::model; use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use crate::parser::grammar::{ - KeywordScannerDefinitionNode, KeywordScannerDefinitionRef, ScannerDefinitionNode, -}; -use crate::parser::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::grammar::{KeywordScannerDefinitionRef, ScannerDefinitionNode}; use crate::parser::scanner_definition::ScannerDefinitionNodeExtensions; +use crate::parser::versioned::VersionedQuote; pub trait KeywordScannerDefinitionExtensions { fn to_scanner_code(&self) -> TokenStream; @@ -21,8 +20,8 @@ impl KeywordScannerDefinitionExtensions for KeywordScannerDefinitionRef { .iter() .map(|versioned_kw| { let scanner = versioned_kw.value.to_scanner_code(); - let enabled_cond = versioned_kw.enabled.as_bool_expr(); - let reserved_cond = versioned_kw.reserved.as_bool_expr(); + let enabled_cond = versioned_kw.enabled.as_ref().as_bool_expr(); + let reserved_cond = versioned_kw.reserved.as_ref().as_bool_expr(); // Simplify the emitted code if we trivially know that reserved or enabled is true match (&*reserved_cond.to_string(), &*enabled_cond.to_string()) { @@ -80,7 +79,7 @@ impl KeywordScannerDefinitionExtensions for KeywordScannerDefinitionRef { } } -impl KeywordScannerDefinitionExtensions for KeywordScannerDefinitionNode { +impl KeywordScannerDefinitionExtensions for model::KeywordValue { fn to_scanner_code(&self) -> TokenStream { // This is a subset; let's reuse that ScannerDefinitionNode::from(self.clone()).to_scanner_code() diff --git a/crates/codegen/runtime/generator/src/parser/parser_definition.rs b/crates/codegen/runtime/generator/src/parser/parser_definition.rs index 4cb5df8791..f105770794 100644 --- a/crates/codegen/runtime/generator/src/parser/parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/parser_definition.rs @@ -1,12 +1,12 @@ +use codegen_language_definition::model::{Identifier, VersionSpecifier}; use inflector::Inflector; use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use semver::Version; use crate::parser::grammar::{ - Labeled, ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, VersionQuality, - VersionQualityRange, + Labeled, ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, }; +use crate::parser::versioned::{Versioned as _, VersionedQuote as _}; pub trait ParserDefinitionExtensions { fn to_parser_code(&self) -> TokenStream; @@ -14,7 +14,7 @@ pub trait ParserDefinitionExtensions { impl ParserDefinitionExtensions for ParserDefinitionRef { fn to_parser_code(&self) -> TokenStream { - self.node().applicable_version_quality_ranges().wrap_code( + self.node().version_specifier().to_conditional_code( self.node().to_parser_code(self.context(), false), Some(quote! { ParserResult::disabled() }), ) @@ -28,13 +28,12 @@ impl ParserDefinitionExtensions for TriviaParserDefinitionRef { } pub trait ParserDefinitionNodeExtensions { - fn to_parser_code(&self, context_name: &'static str, is_trivia: bool) -> TokenStream; - fn applicable_version_quality_ranges(&self) -> Vec; + fn to_parser_code(&self, context_name: &Identifier, is_trivia: bool) -> TokenStream; } impl ParserDefinitionNodeExtensions for ParserDefinitionNode { #[allow(clippy::too_many_lines)] // giant switch over parser definition node types - fn to_parser_code(&self, context_name: &'static str, is_trivia: bool) -> TokenStream { + fn to_parser_code(&self, context_name: &Identifier, is_trivia: bool) -> TokenStream { let context = format_ident!("{context_name}"); let lex_ctx = quote! { LexicalContextType::#context }; @@ -102,7 +101,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { ( value.to_parser_code(context_name, is_trivia), label.clone(), - value.applicable_version_quality_ranges(), + value.version_specifier(), ) })), }, @@ -111,7 +110,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { let parser = make_choice_versioned(value.iter().map(|node| { ( node.to_parser_code(context_name, is_trivia), - node.applicable_version_quality_ranges(), + node.version_specifier(), ) })); @@ -205,7 +204,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { let threshold = threshold.0; let parser = body.to_parser_code(context_name, is_trivia); - let body_parser = body.applicable_version_quality_ranges().wrap_code( + let body_parser = body.version_specifier().to_conditional_code( quote! { seq.elem(#parser .recover_until_with_nested_delims::<_, #lex_ctx>(input, @@ -270,7 +269,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { }; let parser = body.to_parser_code(context_name, is_trivia); - let body_parser = body.applicable_version_quality_ranges().wrap_code( + let body_parser = body.version_specifier().to_conditional_code( quote! { seq.elem( #parser @@ -298,81 +297,18 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { } } } - - fn applicable_version_quality_ranges(&self) -> Vec { - match self { - ParserDefinitionNode::Versioned(_, version_quality_ranges) => { - version_quality_ranges.clone() - } - - ParserDefinitionNode::Optional(value) - | ParserDefinitionNode::ZeroOrMore(Labeled { value, .. }) - | ParserDefinitionNode::OneOrMore(Labeled { value, .. }) => { - value.applicable_version_quality_ranges() - } - - _ => vec![], - } - } -} - -pub trait VersionQualityRangeVecExtensions { - fn wrap_code(&self, if_true: TokenStream, if_false: Option) -> TokenStream; - // Quotes a boolean expression that is satisfied for the given version quality ranges - fn as_bool_expr(&self) -> TokenStream; -} - -impl VersionQualityRangeVecExtensions for Vec { - fn as_bool_expr(&self) -> TokenStream { - if self.is_empty() { - quote!(true) - } else { - // Optimize for legibility; return `false` for "never enabled" - match self.as_slice() { - [VersionQualityRange { - from, - quality: VersionQuality::Removed, - }] if from == &Version::new(0, 0, 0) => return quote!(false), - _ => {} - } - - let flags = self.iter().map(|vqr| { - let flag = format_ident!( - "version_is_at_least_{v}", - v = &vqr.from.to_string().replace('.', "_") - ); - if vqr.quality == VersionQuality::Introduced { - quote! { self.#flag } - } else { - quote! { !self.#flag } - } - }); - quote! { #(#flags)&&* } - } - } - - fn wrap_code(&self, if_true: TokenStream, if_false: Option) -> TokenStream { - if self.is_empty() { - if_true - } else { - let condition = self.as_bool_expr(); - - let else_part = if_false.map(|if_false| quote! { else { #if_false } }); - quote! { if #condition { #if_true } #else_part } - } - } } pub fn make_sequence(parsers: impl IntoIterator) -> TokenStream { make_sequence_versioned( parsers .into_iter() - .map(|parser| (parser, String::new(), vec![])), + .map(|parser| (parser, String::new(), None)), ) } -pub fn make_sequence_versioned( - parsers: impl IntoIterator)>, +pub fn make_sequence_versioned<'a>( + parsers: impl IntoIterator)>, ) -> TokenStream { let parsers = parsers .into_iter() @@ -385,7 +321,7 @@ pub fn make_sequence_versioned( quote! { seq.elem_labeled(EdgeLabel::#label, #parser)?; } }; - versions.wrap_code(code, None) + versions.to_conditional_code(code, None) }) .collect::>(); quote! { @@ -397,16 +333,16 @@ pub fn make_sequence_versioned( } pub fn make_choice(parsers: impl IntoIterator) -> TokenStream { - make_choice_versioned(parsers.into_iter().map(|parser| (parser, vec![]))) + make_choice_versioned(parsers.into_iter().map(|parser| (parser, None))) } -fn make_choice_versioned( - parsers: impl IntoIterator)>, +fn make_choice_versioned<'a>( + parsers: impl IntoIterator)>, ) -> TokenStream { let parsers = parsers .into_iter() .map(|(parser, versions)| { - versions.wrap_code( + versions.to_conditional_code( quote! { let result = #parser; choice.consider(input, result)?; diff --git a/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs index 9a3950a107..e572d66d7a 100644 --- a/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs @@ -1,3 +1,4 @@ +use codegen_language_definition::model::Identifier; use inflector::Inflector; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; @@ -14,7 +15,7 @@ pub trait PrecedenceParserDefinitionExtensions { /// Emit a helper parser function for each precedence expression that ensures the main parser /// identifies a single node of the expected type, with a child node being the expected /// precedence expression. - fn to_precedence_expression_parser_code(&self) -> Vec<(&'static str, TokenStream)>; + fn to_precedence_expression_parser_code(&self) -> Vec<(Identifier, TokenStream)>; } impl PrecedenceParserDefinitionExtensions for PrecedenceParserDefinitionRef { @@ -25,7 +26,7 @@ impl PrecedenceParserDefinitionExtensions for PrecedenceParserDefinitionRef { ) } - fn to_precedence_expression_parser_code(&self) -> Vec<(&'static str, TokenStream)> { + fn to_precedence_expression_parser_code(&self) -> Vec<(Identifier, TokenStream)> { let mut res = vec![]; let parser_name = format_ident!("{}", self.name().to_snake_case()); let nonterminal_name = format_ident!("{}", self.name().to_pascal_case()); @@ -51,14 +52,14 @@ impl PrecedenceParserDefinitionExtensions for PrecedenceParserDefinitionRef { _ => ParserResult::no_match(vec![]), } }; - res.push((*name, code)); + res.push((name.clone(), code)); } res } } pub trait PrecedenceParserDefinitionNodeExtensions { - fn to_parser_code(&self, context_name: &'static str, expression_kind: Ident) -> TokenStream; + fn to_parser_code(&self, context_name: &Identifier, expression_kind: Ident) -> TokenStream; } impl PrecedenceParserDefinitionNodeExtensions for PrecedenceParserDefinitionNode { @@ -103,7 +104,7 @@ impl PrecedenceParserDefinitionNodeExtensions for PrecedenceParserDefinitionNode // is independent of the grammar. #[allow(clippy::too_many_lines)] // Repetition-heavy with 4 kinds of precedence operators - fn to_parser_code(&self, context_name: &'static str, expression_kind: Ident) -> TokenStream { + fn to_parser_code(&self, context_name: &Identifier, expression_kind: Ident) -> TokenStream { let mut prefix_operator_parsers: Vec = Vec::new(); let mut postfix_operator_parsers: Vec = Vec::new(); let mut binary_operator_parsers: Vec = Vec::new(); diff --git a/crates/codegen/runtime/generator/src/parser/scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs index f02f90b6eb..efcb4327dd 100644 --- a/crates/codegen/runtime/generator/src/parser/scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs @@ -5,7 +5,7 @@ use proc_macro2::TokenStream; use quote::{format_ident, quote}; use crate::parser::grammar::{ScannerDefinitionNode, ScannerDefinitionRef}; -use crate::parser::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::versioned::VersionedQuote; pub trait ScannerDefinitionExtensions { fn to_scanner_code(&self) -> TokenStream; @@ -51,7 +51,7 @@ impl ScannerDefinitionNodeExtensions for ScannerDefinitionNode { match self { ScannerDefinitionNode::Versioned(body, version_quality_ranges) => { let body = body.to_scanner_code(); - version_quality_ranges.wrap_code(body, Some(quote! { false })) + Some(version_quality_ranges).to_conditional_code(body, Some(quote! { false })) } ScannerDefinitionNode::Optional(node) => { diff --git a/crates/codegen/runtime/generator/src/parser/trie.rs b/crates/codegen/runtime/generator/src/parser/trie.rs index 59971c6e92..e6d92136b6 100644 --- a/crates/codegen/runtime/generator/src/parser/trie.rs +++ b/crates/codegen/runtime/generator/src/parser/trie.rs @@ -1,14 +1,12 @@ use std::collections::BTreeMap; use std::fmt::Debug; +use codegen_language_definition::model::KeywordDefinition; use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use crate::parser::grammar::{ - KeywordScannerAtomic, KeywordScannerDefinitionVersionedNode, ScannerDefinitionNode, - ScannerDefinitionRef, VersionQualityRange, -}; -use crate::parser::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::grammar::{KeywordScannerAtomic, ScannerDefinitionRef}; +use crate::parser::versioned::{Versioned as _, VersionedQuote as _}; #[derive(Clone, Debug, Default)] pub struct Trie { @@ -92,26 +90,6 @@ impl Trie { } } -trait VersionWrapped { - fn applicable_version_quality_ranges(&self) -> Vec; -} - -impl VersionWrapped for ScannerDefinitionNode { - fn applicable_version_quality_ranges(&self) -> Vec { - match self { - ScannerDefinitionNode::Versioned(_, version_quality_ranges) => { - version_quality_ranges.clone() - } - - ScannerDefinitionNode::Optional(node) - | ScannerDefinitionNode::ZeroOrMore(node) - | ScannerDefinitionNode::OneOrMore(node) => node.applicable_version_quality_ranges(), - - _ => vec![], - } - } -} - /// Used together with [`Trie`]. Represents the payload of a trie node and can be used to customize /// the emitted code. /// @@ -126,7 +104,7 @@ impl Payload for ScannerDefinitionRef { fn to_leaf_code(&self) -> TokenStream { let kind = format_ident!("{}", self.name()); - self.node().applicable_version_quality_ranges().wrap_code( + self.node().version_specifier().to_conditional_code( quote! { Some(TerminalKind::#kind) }, Some(Self::default_case()), ) @@ -141,12 +119,12 @@ impl Payload for KeywordScannerAtomic { fn to_leaf_code(&self) -> TokenStream { let kind = format_ident!("{}", self.name()); - let KeywordScannerDefinitionVersionedNode { + let KeywordDefinition { enabled, reserved, .. } = self.definition(); - let enabled_cond = enabled.as_bool_expr(); - let reserved_cond = reserved.as_bool_expr(); + let enabled_cond = enabled.as_ref().as_bool_expr(); + let reserved_cond = reserved.as_ref().as_bool_expr(); // Simplify the emitted code if we trivially know that reserved or enabled is true match (&*reserved_cond.to_string(), &*enabled_cond.to_string()) { diff --git a/crates/codegen/runtime/generator/src/parser/versioned.rs b/crates/codegen/runtime/generator/src/parser/versioned.rs new file mode 100644 index 0000000000..eb2f3b52ed --- /dev/null +++ b/crates/codegen/runtime/generator/src/parser/versioned.rs @@ -0,0 +1,96 @@ +use codegen_language_definition::model::VersionSpecifier; +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use semver::Version; + +use crate::parser::grammar::{Labeled, ParserDefinitionNode, ScannerDefinitionNode}; + +pub trait Versioned { + fn version_specifier(&self) -> Option<&VersionSpecifier>; +} + +impl Versioned for ParserDefinitionNode { + fn version_specifier(&self) -> Option<&VersionSpecifier> { + match self { + ParserDefinitionNode::Versioned(_, version_specifier) => Some(version_specifier), + + ParserDefinitionNode::Optional(value) + | ParserDefinitionNode::ZeroOrMore(Labeled { value, .. }) + | ParserDefinitionNode::OneOrMore(Labeled { value, .. }) => value.version_specifier(), + + _ => None, + } + } +} + +impl Versioned for ScannerDefinitionNode { + fn version_specifier(&self) -> Option<&VersionSpecifier> { + match self { + ScannerDefinitionNode::Versioned(_, version_quality_ranges) => { + Some(version_quality_ranges) + } + + ScannerDefinitionNode::Optional(node) + | ScannerDefinitionNode::ZeroOrMore(node) + | ScannerDefinitionNode::OneOrMore(node) => node.version_specifier(), + + _ => None, + } + } +} + +pub trait VersionedQuote { + /// Depending on the `as_bool_expr` result, wraps the given code in an `if` block and optionally includes an `else` block + fn to_conditional_code( + &self, + if_true: TokenStream, + if_false: Option, + ) -> TokenStream; + /// Quotes a boolean expression that is satisfied for the given version quality ranges + fn as_bool_expr(&self) -> TokenStream; +} + +impl VersionedQuote for Option<&VersionSpecifier> { + fn to_conditional_code( + &self, + if_true: TokenStream, + if_false: Option, + ) -> TokenStream { + if self.is_none() { + if_true + } else { + let condition = self.as_bool_expr(); + + let else_part = if_false.map(|if_false| quote! { else { #if_false } }); + quote! { if #condition { #if_true } #else_part } + } + } + + fn as_bool_expr(&self) -> TokenStream { + let to_version_flag_name = |v: &Version| { + format_ident!( + "version_is_at_least_{v}", + v = &v.to_string().replace('.', "_") + ) + }; + + match self { + // No constraints imposed, so always enabled + None => quote!(true), + Some(VersionSpecifier::Never) => quote!(false), + Some(VersionSpecifier::From { from }) => { + let flag = to_version_flag_name(from); + quote! { self.#flag } + } + Some(VersionSpecifier::Till { till }) => { + let flag = to_version_flag_name(till); + quote! { ! self.#flag } + } + Some(VersionSpecifier::Range { from, till }) => { + let from_flag = to_version_flag_name(from); + let till_flag = to_version_flag_name(till); + quote! { self.#from_flag && ! self.#till_flag } + } + } + } +}