From fd12bdce6140e4653a6499bb1074c50b1d23a6d1 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 13:51:49 +0200 Subject: [PATCH 1/6] Document each field in the runtime generator model --- crates/codegen/runtime/generator/src/model.rs | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/crates/codegen/runtime/generator/src/model.rs b/crates/codegen/runtime/generator/src/model.rs index cef9f2a15d..9c7ac083ad 100644 --- a/crates/codegen/runtime/generator/src/model.rs +++ b/crates/codegen/runtime/generator/src/model.rs @@ -23,43 +23,66 @@ use crate::trie::Trie; #[derive(Default, Serialize)] pub struct RuntimeModel { + /// Defines the `Language::SUPPORTED_VERSIONS` field. all_versions: BTreeSet, + /// Constructs inner `Language` the state to evaluate the version-dependent branches. referenced_versions: BTreeSet, + /// Defines the `RuleKind` enum variants. rule_kinds: BTreeSet<&'static str>, + /// Defines the `TokenKind`` enum variants. token_kinds: BTreeSet<&'static str>, + /// Defines `TokenKind::is_trivia` method. trivia_scanner_names: BTreeSet<&'static str>, + /// Defines `NodeLabel` enum variants. labels: BTreeSet, + /// Defines the top-level scanner functions in `Language`. scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code) + // Defines the `LexicalContext(Type)` enum and type-level variants. scanner_contexts: BTreeMap<&'static str, ScannerContext>, + /// Defines the top-level compound scanners used when lexing in `Language`. keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code) + /// Defines the top-level parser functions in `Language`. parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) + /// Defines the top-level trivia parser functions in `Language`. trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) ast: AstModel, + // Internal state: + /// Makes sure to codegen the scanner functions that are referenced by other scanners. #[serde(skip)] top_level_scanner_names: BTreeSet<&'static str>, + /// Lookup table for all scanners; used to generate trie scanners. #[serde(skip)] all_scanners: BTreeMap<&'static str, ScannerDefinitionRef>, + /// The current context of a parent scanner/parser being processed. #[serde(skip)] current_context_name: &'static str, } #[derive(Default, Serialize)] struct ScannerContext { - #[serde(skip)] - scanner_definitions: BTreeSet<&'static str>, + /// Rust code for the trie scanner that matches literals. literal_scanner: String, + /// Names of the compound scanners that are keywords. + // Values (Rust code) is only used to generate the top-level `keyword_compound_scanners`. keyword_compound_scanners: BTreeMap<&'static str, String>, + /// Rust code for the trie scanner that matches keywords keyword_trie_scanner: String, - #[serde(skip)] - keyword_scanner_defs: BTreeMap<&'static str, KeywordScannerDefinitionRef>, + /// Names of the scanners for identifiers that can be promoted to keywords. promotable_identifier_scanners: BTreeSet<&'static str>, + /// Names of the scanners that are compound (do not consist of only literals). compound_scanner_names: Vec<&'static str>, + /// Set of delimiter pairs for this context that are used in delimited error recovery. delimiters: BTreeMap<&'static str, &'static str>, + // Internal state: + #[serde(skip)] + scanner_definitions: BTreeSet<&'static str>, + #[serde(skip)] + keyword_scanner_defs: BTreeMap<&'static str, KeywordScannerDefinitionRef>, } impl RuntimeModel { From 4f0c4272d904a040b4f7f66b6b38267adb2e2073 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 16:16:49 +0200 Subject: [PATCH 2/6] refactor: Bundle parser-related logic in the parser generator controller --- .../runtime/cargo/src/runtime/kinds.rs.jinja2 | 12 +- .../cargo/src/runtime/language.rs.jinja2 | 18 +- crates/codegen/runtime/generator/src/lib.rs | 8 +- crates/codegen/runtime/generator/src/model.rs | 341 +---------------- .../codegen/runtime/generator/src/parser.rs | 351 ++++++++++++++++++ .../keyword_scanner_definition.rs | 4 +- .../src/{ => parser}/parser_definition.rs | 0 .../precedence_parser_definition.rs | 4 +- .../src/{ => parser}/scanner_definition.rs | 2 +- .../generator/src/{ => parser}/trie.rs | 2 +- 10 files changed, 381 insertions(+), 361 deletions(-) create mode 100644 crates/codegen/runtime/generator/src/parser.rs rename crates/codegen/runtime/generator/src/{ => parser}/keyword_scanner_definition.rs (95%) rename crates/codegen/runtime/generator/src/{ => parser}/parser_definition.rs (100%) rename crates/codegen/runtime/generator/src/{ => parser}/precedence_parser_definition.rs (99%) rename crates/codegen/runtime/generator/src/{ => parser}/scanner_definition.rs (98%) rename crates/codegen/runtime/generator/src/{ => parser}/trie.rs (98%) diff --git a/crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2 b/crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2 index e45543aee6..92d5eeef8c 100644 --- a/crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2 +++ b/crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2 @@ -21,7 +21,7 @@ pub enum RuleKind { Stub2, Stub3, {%- else -%} - {%- for variant in model.rule_kinds -%} + {%- for variant in model.parser.rule_kinds -%} {# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #} {{ variant }}, {%- endfor -%} @@ -63,7 +63,7 @@ pub enum NodeLabel { Stub2, Stub3, {%- else -%} - {% for variant in model.labels -%} + {% for variant in model.parser.labels -%} {{ variant | pascal_case }}, {%- endfor -%} {%- endif -%} @@ -95,7 +95,7 @@ pub enum TokenKind { Stub2, Stub3, {%- else -%} - {%- for variant in model.token_kinds -%} + {%- for variant in model.parser.token_kinds -%} {# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #} {{ variant }}, {%- endfor -%} @@ -109,7 +109,7 @@ impl metaslang_cst::TerminalKind for TokenKind { {%- else -%} matches!( self, - {%- for variant in model.trivia_scanner_names -%} + {%- for variant in model.parser.trivia_scanner_names -%} | Self::{{ variant }} {%- endfor -%} ) @@ -125,7 +125,7 @@ pub(crate) enum LexicalContext { Stub2, Stub3, {%- else -%} - {%- for context_name, _ in model.scanner_contexts %} + {%- for context_name, _ in model.parser.scanner_contexts %} {{ context_name }}, {%- endfor %} {%- endif -%} @@ -140,7 +140,7 @@ pub(crate) trait IsLexicalContext { #[allow(non_snake_case)] pub(crate) mod LexicalContextType { {%- if not rendering_in_stubs -%} - {%- for context_name, _ in model.scanner_contexts %} + {%- for context_name, _ in model.parser.scanner_contexts %} pub struct {{ context_name }}; impl super::IsLexicalContext for {{ context_name }} { diff --git a/crates/codegen/runtime/cargo/src/runtime/language.rs.jinja2 b/crates/codegen/runtime/cargo/src/runtime/language.rs.jinja2 index ef0248d1ab..c5de7c6091 100644 --- a/crates/codegen/runtime/cargo/src/runtime/language.rs.jinja2 +++ b/crates/codegen/runtime/cargo/src/runtime/language.rs.jinja2 @@ -29,7 +29,7 @@ use crate::parser_support::{ #[cfg_attr(feature = "slang_napi_interfaces", napi(namespace = "language"))] pub struct Language { {%- if not rendering_in_stubs -%} - {%- for version in model.referenced_versions -%} + {%- for version in model.parser.referenced_versions -%} pub(crate) version_is_at_least_{{ version | replace(from=".", to="_") }}: bool, {%- endfor -%} {%- endif -%} @@ -67,7 +67,7 @@ impl Language { if Self::SUPPORTED_VERSIONS.binary_search(&version).is_ok() { Ok(Self { {%- if not rendering_in_stubs -%} - {%- for version in model.referenced_versions %} + {%- for version in model.parser.referenced_versions %} version_is_at_least_{{ version | replace(from=".", to="_") }}: Version::new({{ version | split(pat=".") | join(sep=", ") }}) <= version, {%- endfor -%} {%- endif -%} @@ -89,12 +89,12 @@ impl Language { * Parser Functions ********************************************/ - {% for parser_name, parser_code in model.parser_functions %} + {% for parser_name, parser_code in model.parser.parser_functions %} #[allow(unused_assignments, unused_parens)] fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} } {% endfor %} - {% for parser_name, parser_code in model.trivia_parser_functions %} + {% for parser_name, parser_code in model.parser.trivia_parser_functions %} #[allow(unused_assignments, unused_parens)] fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} } {% endfor %} @@ -103,12 +103,12 @@ impl Language { * Scanner Functions ********************************************/ - {% for scanner_name, scanner_code in model.scanner_functions %} + {% for scanner_name, scanner_code in model.parser.scanner_functions %} #[allow(unused_assignments, unused_parens)] fn {{ scanner_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> bool { {{ scanner_code }} } {% endfor %} - {%- for keyword_name, keyword_code in model.keyword_compound_scanners %} + {%- for keyword_name, keyword_code in model.parser.keyword_compound_scanners %} #[inline] fn {{ keyword_name | snake_case }}(&self, input: &mut ParserContext<'_>, ident: &str) -> KeywordScan { {{ keyword_code }} } {% endfor %} @@ -120,7 +120,7 @@ impl Language { unreachable!("Attempting to parse in stubs: {kind}: {input}") {%- else -%} match kind { - {%- for parser_name, _ in model.parser_functions -%} + {%- for parser_name, _ in model.parser.parser_functions -%} RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input), {%- endfor -%} } @@ -150,7 +150,7 @@ impl Lexer for Language { unreachable!("Invoking delimiters in stubs.") {%- else -%} match LexCtx::value() { - {%- for context_name, context in model.scanner_contexts %} + {%- for context_name, context in model.parser.scanner_contexts %} LexicalContext::{{ context_name }} => &[ {%- for open, close in context.delimiters %} (TokenKind::{{ open }}, TokenKind::{{ close }}), @@ -183,7 +183,7 @@ impl Lexer for Language { } match LexCtx::value() { - {%- for context_name, context in model.scanner_contexts %} + {%- for context_name, context in model.parser.scanner_contexts %} LexicalContext::{{ context_name }} => { if let Some(kind) = {{ context.literal_scanner }} { furthest_position = input.position(); diff --git a/crates/codegen/runtime/generator/src/lib.rs b/crates/codegen/runtime/generator/src/lib.rs index c4f43dd95f..24d3e5e046 100644 --- a/crates/codegen/runtime/generator/src/lib.rs +++ b/crates/codegen/runtime/generator/src/lib.rs @@ -1,5 +1,3 @@ -#![allow(clippy::too_many_lines)] - use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -12,12 +10,8 @@ use serde::Serialize; use crate::model::RuntimeModel; mod ast; -mod keyword_scanner_definition; mod model; -mod parser_definition; -mod precedence_parser_definition; -mod scanner_definition; -mod trie; +mod parser; pub enum OutputLanguage { Cargo, diff --git a/crates/codegen/runtime/generator/src/model.rs b/crates/codegen/runtime/generator/src/model.rs index 9c7ac083ad..a241ff48a7 100644 --- a/crates/codegen/runtime/generator/src/model.rs +++ b/crates/codegen/runtime/generator/src/model.rs @@ -1,354 +1,27 @@ -#![allow(clippy::too_many_lines)] - -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; use std::rc::Rc; -use codegen_grammar::{ - Grammar, GrammarConstructorDslV2, GrammarVisitor, KeywordScannerAtomic, - KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, - PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, - TriviaParserDefinitionRef, -}; use codegen_language_definition::model::Language; -use quote::{format_ident, quote}; use semver::Version; use serde::Serialize; use crate::ast::AstModel; -use crate::keyword_scanner_definition::KeywordScannerDefinitionExtensions; -use crate::parser_definition::ParserDefinitionExtensions; -use crate::precedence_parser_definition::PrecedenceParserDefinitionExtensions; -use crate::scanner_definition::ScannerDefinitionExtensions; -use crate::trie::Trie; +use crate::parser::ParserModel; #[derive(Default, Serialize)] pub struct RuntimeModel { /// Defines the `Language::SUPPORTED_VERSIONS` field. all_versions: BTreeSet, - /// Constructs inner `Language` the state to evaluate the version-dependent branches. - referenced_versions: BTreeSet, - - /// Defines the `RuleKind` enum variants. - rule_kinds: BTreeSet<&'static str>, - /// Defines the `TokenKind`` enum variants. - token_kinds: BTreeSet<&'static str>, - /// Defines `TokenKind::is_trivia` method. - trivia_scanner_names: BTreeSet<&'static str>, - /// Defines `NodeLabel` enum variants. - labels: BTreeSet, - - /// Defines the top-level scanner functions in `Language`. - scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code) - // Defines the `LexicalContext(Type)` enum and type-level variants. - scanner_contexts: BTreeMap<&'static str, ScannerContext>, - /// Defines the top-level compound scanners used when lexing in `Language`. - keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code) - - /// Defines the top-level parser functions in `Language`. - parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) - /// Defines the top-level trivia parser functions in `Language`. - trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) - + parser: ParserModel, ast: AstModel, - - // Internal state: - /// Makes sure to codegen the scanner functions that are referenced by other scanners. - #[serde(skip)] - top_level_scanner_names: BTreeSet<&'static str>, - /// Lookup table for all scanners; used to generate trie scanners. - #[serde(skip)] - all_scanners: BTreeMap<&'static str, ScannerDefinitionRef>, - /// The current context of a parent scanner/parser being processed. - #[serde(skip)] - current_context_name: &'static str, -} - -#[derive(Default, Serialize)] -struct ScannerContext { - /// Rust code for the trie scanner that matches literals. - literal_scanner: String, - /// Names of the compound scanners that are keywords. - // Values (Rust code) is only used to generate the top-level `keyword_compound_scanners`. - keyword_compound_scanners: BTreeMap<&'static str, String>, - /// Rust code for the trie scanner that matches keywords - keyword_trie_scanner: String, - /// Names of the scanners for identifiers that can be promoted to keywords. - promotable_identifier_scanners: BTreeSet<&'static str>, - /// Names of the scanners that are compound (do not consist of only literals). - compound_scanner_names: Vec<&'static str>, - /// Set of delimiter pairs for this context that are used in delimited error recovery. - delimiters: BTreeMap<&'static str, &'static str>, - // Internal state: - #[serde(skip)] - scanner_definitions: BTreeSet<&'static str>, - #[serde(skip)] - keyword_scanner_defs: BTreeMap<&'static str, KeywordScannerDefinitionRef>, } impl RuntimeModel { pub fn from_language(language: &Rc) -> Self { - let grammar = Grammar::from_dsl_v2(language); - - let mut model = Self::default(); - grammar.accept_visitor(&mut model); - - // TODO(#638): Absorb the relevant fields into the model tree after migration is complete: - model.all_versions = language.versions.iter().cloned().collect(); - model.ast = AstModel::create(language); - - model - } - - fn set_current_context(&mut self, name: &'static str) { - self.current_context_name = name; - self.scanner_contexts.entry(name).or_default(); - } - - fn current_context(&mut self) -> &mut ScannerContext { - self.scanner_contexts - .get_mut(&self.current_context_name) - .expect("context must be set with `set_current_context`") - } -} - -impl GrammarVisitor for RuntimeModel { - fn grammar_leave(&mut self, _grammar: &Grammar) { - // Expose the scanner functions that... - self.scanner_functions = self - .all_scanners - .iter() - .filter(|(name, scanner)| { - // are compound (do not consist of only literals) - scanner.literals().is_empty() || - // but make sure to also include a scanner that is referenced by other scanners, even if not compound - !self.top_level_scanner_names.contains(*name) - }) - .map(|(name, scanner)| (*name, scanner.to_scanner_code().to_string())) - .collect(); - - for context in self.scanner_contexts.values_mut() { - let mut literal_trie = Trie::new(); - - for scanner_name in &context.scanner_definitions { - let scanner = &self.all_scanners[*scanner_name]; - - let literals = scanner.literals(); - if literals.is_empty() { - context.compound_scanner_names.push(scanner_name); - } else { - for literal in literals { - literal_trie.insert(&literal, Rc::clone(scanner)); - } - } - } - - context.literal_scanner = literal_trie.to_scanner_code().to_string(); - - context.promotable_identifier_scanners = context - .keyword_scanner_defs - .values() - .map(|def| def.identifier_scanner()) - .collect(); - - let mut keyword_trie = Trie::new(); - for (name, def) in &context.keyword_scanner_defs { - match KeywordScannerAtomic::try_from_def(def) { - Some(atomic) => keyword_trie.insert(atomic.value(), atomic.clone()), - None => { - context - .keyword_compound_scanners - .insert(name, def.to_scanner_code().to_string()); - } - } - } - - context.keyword_trie_scanner = keyword_trie.to_scanner_code().to_string(); + Self { + all_versions: language.versions.iter().cloned().collect(), + ast: AstModel::create(language), + parser: ParserModel::from_language(language), } - - // Collect all of the keyword scanners into a single list to be defined at top-level - self.keyword_compound_scanners = self - .scanner_contexts - .values() - .flat_map(|context| { - context - .keyword_compound_scanners - .iter() - .map(|(name, code)| (*name, code.clone())) - }) - .collect(); - - // Make sure empty strings are not there - self.labels.remove(""); - // These are built-in and already pre-defined - // _SLANG_INTERNAL_RESERVED_NODE_LABELS_ (keep in sync) - self.labels.remove("item"); - self.labels.remove("variant"); - self.labels.remove("separator"); - self.labels.remove("operand"); - self.labels.remove("left_operand"); - self.labels.remove("right_operand"); - self.labels.remove("leading_trivia"); - self.labels.remove("trailing_trivia"); - - // Just being anal about tidying up :) - self.all_scanners.clear(); - self.current_context_name = ""; - } - - fn scanner_definition_enter(&mut self, scanner: &ScannerDefinitionRef) { - self.all_scanners.insert(scanner.name(), Rc::clone(scanner)); - } - - fn keyword_scanner_definition_enter(&mut self, scanner: &KeywordScannerDefinitionRef) { - for def in scanner.definitions() { - let versions = def.enabled.iter().chain(def.reserved.iter()); - - self.referenced_versions.extend( - versions - .map(|vqr| &vqr.from) - // "Removed from 0.0.0" is an alias for "never"; it's never directly checked - .filter(|v| *v != &Version::new(0, 0, 0)) - .cloned(), - ); - } - } - - fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { - self.set_current_context(parser.context()); - let trivia_scanners = { - use codegen_grammar::Visitable; - - #[derive(Default)] - struct CollectTriviaScanners { - scanner_names: BTreeSet<&'static str>, - } - impl codegen_grammar::GrammarVisitor for CollectTriviaScanners { - fn scanner_definition_enter(&mut self, node: &ScannerDefinitionRef) { - self.scanner_names.insert(node.name()); - } - } - - let mut visitor = CollectTriviaScanners::default(); - parser.node().accept_visitor(&mut visitor); - visitor.scanner_names - }; - self.trivia_scanner_names.extend(trivia_scanners); - - self.trivia_parser_functions - .insert(parser.name(), parser.to_parser_code().to_string()); - } - - fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { - // Have to set this regardless so that we can collect referenced scanners - self.set_current_context(parser.context()); - if !parser.is_inline() { - self.rule_kinds.insert(parser.name()); - let code = parser.to_parser_code(); - self.parser_functions.insert( - parser.name(), - { - let rule_kind = format_ident!("{}", parser.name()); - quote! { #code.with_kind(RuleKind::#rule_kind) } - } - .to_string(), - ); - } - } - - fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) { - self.set_current_context(parser.context()); - self.rule_kinds.insert(parser.name()); - for (_, name, _) in &parser.node().operators { - self.rule_kinds.insert(name); - } - - // While it's not common to parse a precedence expression as a standalone rule, - // we generate a function for completeness. - for (name, code) in parser.to_precedence_expression_parser_code() { - self.parser_functions.insert(name, code.to_string()); - } - - self.parser_functions.insert( - parser.name(), - { - let code = parser.to_parser_code(); - let rule_kind = format_ident!("{}", parser.name()); - quote! { #code.with_kind(RuleKind::#rule_kind) } - } - .to_string(), - ); - } - - fn scanner_definition_node_enter(&mut self, node: &ScannerDefinitionNode) { - if let ScannerDefinitionNode::Versioned(_, version_quality_ranges) = node { - for vqr in version_quality_ranges { - self.referenced_versions.insert(vqr.from.clone()); - } - } - } - - fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) { - match node { - ParserDefinitionNode::Versioned(_, version_quality_ranges) => { - for vqr in version_quality_ranges { - self.referenced_versions.insert(vqr.from.clone()); - } - } - ParserDefinitionNode::ScannerDefinition(scanner) => { - self.top_level_scanner_names.insert(scanner.name()); - self.token_kinds.insert(scanner.name()); - - self.current_context() - .scanner_definitions - .insert(scanner.name()); - } - ParserDefinitionNode::KeywordScannerDefinition(scanner) => { - self.token_kinds.insert(scanner.name()); - - self.current_context() - .keyword_scanner_defs - .insert(scanner.name(), Rc::clone(scanner)); - } - - // Collect labels: - ParserDefinitionNode::Choice(choice) => { - self.labels.insert(choice.label.clone()); - } - ParserDefinitionNode::Sequence(sequence) => { - for node in sequence { - self.labels.insert(node.label.clone()); - } - } - ParserDefinitionNode::SeparatedBy(item, separator) => { - self.labels.insert(item.label.clone()); - self.labels.insert(separator.label.clone()); - } - ParserDefinitionNode::TerminatedBy(_, terminator) => { - self.labels.insert(terminator.label.clone()); - } - - // Collect delimiters for each context - ParserDefinitionNode::DelimitedBy(open, _, close, ..) => { - self.labels.insert(open.label.clone()); - self.labels.insert(close.label.clone()); - - let (open, close) = match (open.as_ref(), close.as_ref()) { - ( - ParserDefinitionNode::ScannerDefinition(open, ..), - ParserDefinitionNode::ScannerDefinition(close, ..), - ) => (open.name(), close.name()), - _ => panic!("DelimitedBy must be delimited by scanners"), - }; - - let delimiters = &mut self.current_context().delimiters; - - assert!( - delimiters.get(close).is_none(), - "Cannot use a closing delimiter as an opening one" - ); - delimiters.insert(open, close); - } - _ => {} - }; } } diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs new file mode 100644 index 0000000000..b071ec2692 --- /dev/null +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -0,0 +1,351 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::rc::Rc; + +use codegen_grammar::{ + Grammar, GrammarConstructorDslV2 as _, GrammarVisitor, KeywordScannerAtomic, + KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, + PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, + TriviaParserDefinitionRef, +}; +use codegen_language_definition::model::Language; +use quote::{format_ident, quote}; +use semver::Version; +use serde::Serialize; + +mod keyword_scanner_definition; +mod parser_definition; +mod precedence_parser_definition; +mod scanner_definition; +mod trie; + +use keyword_scanner_definition::KeywordScannerDefinitionExtensions as _; +use parser_definition::ParserDefinitionExtensions as _; +use precedence_parser_definition::PrecedenceParserDefinitionExtensions as _; +use scanner_definition::ScannerDefinitionExtensions as _; +use trie::Trie; + +#[derive(Default, Serialize)] +pub struct ParserModel { + /// Defines the `Language::SUPPORTED_VERSIONS` field. + all_versions: BTreeSet, + /// Constructs inner `Language` the state to evaluate the version-dependent branches. + referenced_versions: BTreeSet, + + /// Defines the `RuleKind` enum variants. + rule_kinds: BTreeSet<&'static str>, + /// Defines the `TokenKind`` enum variants. + token_kinds: BTreeSet<&'static str>, + /// Defines `TokenKind::is_trivia` method. + trivia_scanner_names: BTreeSet<&'static str>, + /// Defines `NodeLabel` enum variants. + labels: BTreeSet, + + /// Defines the top-level scanner functions in `Language`. + scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code) + // Defines the `LexicalContext(Type)` enum and type-level variants. + scanner_contexts: BTreeMap<&'static str, ScannerContext>, + /// Defines the top-level compound scanners used when lexing in `Language`. + keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code) + + /// Defines the top-level parser functions in `Language`. + parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) + /// Defines the top-level trivia parser functions in `Language`. + trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code) + + // Internal state: + /// Makes sure to codegen the scanner functions that are referenced by other scanners. + #[serde(skip)] + top_level_scanner_names: BTreeSet<&'static str>, + /// Lookup table for all scanners; used to generate trie scanners. + #[serde(skip)] + all_scanners: BTreeMap<&'static str, ScannerDefinitionRef>, + /// The current context of a parent scanner/parser being processed. + #[serde(skip)] + current_context_name: &'static str, +} + +#[derive(Default, Serialize)] +struct ScannerContext { + /// Rust code for the trie scanner that matches literals. + literal_scanner: String, + /// Names of the compound scanners that are keywords. + // Values (Rust code) is only used to generate the top-level `keyword_compound_scanners`. + keyword_compound_scanners: BTreeMap<&'static str, String>, + /// Rust code for the trie scanner that matches keywords + keyword_trie_scanner: String, + /// Names of the scanners for identifiers that can be promoted to keywords. + promotable_identifier_scanners: BTreeSet<&'static str>, + /// Names of the scanners that are compound (do not consist of only literals). + compound_scanner_names: Vec<&'static str>, + /// Set of delimiter pairs for this context that are used in delimited error recovery. + delimiters: BTreeMap<&'static str, &'static str>, + // Internal state: + #[serde(skip)] + scanner_definitions: BTreeSet<&'static str>, + #[serde(skip)] + keyword_scanner_defs: BTreeMap<&'static str, KeywordScannerDefinitionRef>, +} + +impl ParserModel { + pub fn from_language(language: &Rc) -> Self { + let grammar = Grammar::from_dsl_v2(language); + + let mut model = Self::default(); + grammar.accept_visitor(&mut model); + + model + } + + fn set_current_context(&mut self, name: &'static str) { + self.current_context_name = name; + self.scanner_contexts.entry(name).or_default(); + } + + fn current_context(&mut self) -> &mut ScannerContext { + self.scanner_contexts + .get_mut(&self.current_context_name) + .expect("context must be set with `set_current_context`") + } +} + +impl GrammarVisitor for ParserModel { + fn grammar_leave(&mut self, _grammar: &Grammar) { + // Expose the scanner functions that... + self.scanner_functions = self + .all_scanners + .iter() + .filter(|(name, scanner)| { + // are compound (do not consist of only literals) + scanner.literals().is_empty() || + // but make sure to also include a scanner that is referenced by other scanners, even if not compound + !self.top_level_scanner_names.contains(*name) + }) + .map(|(name, scanner)| (*name, scanner.to_scanner_code().to_string())) + .collect(); + + for context in self.scanner_contexts.values_mut() { + let mut literal_trie = Trie::new(); + + for scanner_name in &context.scanner_definitions { + let scanner = &self.all_scanners[*scanner_name]; + + let literals = scanner.literals(); + if literals.is_empty() { + context.compound_scanner_names.push(scanner_name); + } else { + for literal in literals { + literal_trie.insert(&literal, Rc::clone(scanner)); + } + } + } + + context.literal_scanner = literal_trie.to_scanner_code().to_string(); + + context.promotable_identifier_scanners = context + .keyword_scanner_defs + .values() + .map(|def| def.identifier_scanner()) + .collect(); + + let mut keyword_trie = Trie::new(); + for (name, def) in &context.keyword_scanner_defs { + match KeywordScannerAtomic::try_from_def(def) { + Some(atomic) => keyword_trie.insert(atomic.value(), atomic.clone()), + None => { + context + .keyword_compound_scanners + .insert(name, def.to_scanner_code().to_string()); + } + } + } + + context.keyword_trie_scanner = keyword_trie.to_scanner_code().to_string(); + } + + // Collect all of the keyword scanners into a single list to be defined at top-level + self.keyword_compound_scanners = self + .scanner_contexts + .values() + .flat_map(|context| { + context + .keyword_compound_scanners + .iter() + .map(|(name, code)| (*name, code.clone())) + }) + .collect(); + + // Make sure empty strings are not there + self.labels.remove(""); + // These are built-in and already pre-defined + // _SLANG_INTERNAL_RESERVED_NODE_LABELS_ (keep in sync) + self.labels.remove("item"); + self.labels.remove("variant"); + self.labels.remove("separator"); + self.labels.remove("operand"); + self.labels.remove("left_operand"); + self.labels.remove("right_operand"); + self.labels.remove("leading_trivia"); + self.labels.remove("trailing_trivia"); + + // Just being anal about tidying up :) + self.all_scanners.clear(); + self.current_context_name = ""; + } + + fn scanner_definition_enter(&mut self, scanner: &ScannerDefinitionRef) { + self.all_scanners.insert(scanner.name(), Rc::clone(scanner)); + } + + fn keyword_scanner_definition_enter(&mut self, scanner: &KeywordScannerDefinitionRef) { + for def in scanner.definitions() { + let versions = def.enabled.iter().chain(def.reserved.iter()); + + self.referenced_versions.extend( + versions + .map(|vqr| &vqr.from) + // "Removed from 0.0.0" is an alias for "never"; it's never directly checked + .filter(|v| *v != &Version::new(0, 0, 0)) + .cloned(), + ); + } + } + + fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { + self.set_current_context(parser.context()); + let trivia_scanners = { + use codegen_grammar::Visitable; + + #[derive(Default)] + struct CollectTriviaScanners { + scanner_names: BTreeSet<&'static str>, + } + impl codegen_grammar::GrammarVisitor for CollectTriviaScanners { + fn scanner_definition_enter(&mut self, node: &ScannerDefinitionRef) { + self.scanner_names.insert(node.name()); + } + } + + let mut visitor = CollectTriviaScanners::default(); + parser.node().accept_visitor(&mut visitor); + visitor.scanner_names + }; + self.trivia_scanner_names.extend(trivia_scanners); + + self.trivia_parser_functions + .insert(parser.name(), parser.to_parser_code().to_string()); + } + + fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) { + // Have to set this regardless so that we can collect referenced scanners + self.set_current_context(parser.context()); + if !parser.is_inline() { + self.rule_kinds.insert(parser.name()); + let code = parser.to_parser_code(); + self.parser_functions.insert( + parser.name(), + { + let rule_kind = format_ident!("{}", parser.name()); + quote! { #code.with_kind(RuleKind::#rule_kind) } + } + .to_string(), + ); + } + } + + fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) { + self.set_current_context(parser.context()); + self.rule_kinds.insert(parser.name()); + for (_, name, _) in &parser.node().operators { + self.rule_kinds.insert(name); + } + + // While it's not common to parse a precedence expression as a standalone rule, + // we generate a function for completeness. + for (name, code) in parser.to_precedence_expression_parser_code() { + self.parser_functions.insert(name, code.to_string()); + } + + self.parser_functions.insert( + parser.name(), + { + let code = parser.to_parser_code(); + let rule_kind = format_ident!("{}", parser.name()); + quote! { #code.with_kind(RuleKind::#rule_kind) } + } + .to_string(), + ); + } + + fn scanner_definition_node_enter(&mut self, node: &ScannerDefinitionNode) { + if let ScannerDefinitionNode::Versioned(_, version_quality_ranges) = node { + for vqr in version_quality_ranges { + self.referenced_versions.insert(vqr.from.clone()); + } + } + } + + fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) { + match node { + ParserDefinitionNode::Versioned(_, version_quality_ranges) => { + for vqr in version_quality_ranges { + self.referenced_versions.insert(vqr.from.clone()); + } + } + ParserDefinitionNode::ScannerDefinition(scanner) => { + self.top_level_scanner_names.insert(scanner.name()); + self.token_kinds.insert(scanner.name()); + + self.current_context() + .scanner_definitions + .insert(scanner.name()); + } + ParserDefinitionNode::KeywordScannerDefinition(scanner) => { + self.token_kinds.insert(scanner.name()); + + self.current_context() + .keyword_scanner_defs + .insert(scanner.name(), Rc::clone(scanner)); + } + + // Collect labels: + ParserDefinitionNode::Choice(choice) => { + self.labels.insert(choice.label.clone()); + } + ParserDefinitionNode::Sequence(sequence) => { + for node in sequence { + self.labels.insert(node.label.clone()); + } + } + ParserDefinitionNode::SeparatedBy(item, separator) => { + self.labels.insert(item.label.clone()); + self.labels.insert(separator.label.clone()); + } + ParserDefinitionNode::TerminatedBy(_, terminator) => { + self.labels.insert(terminator.label.clone()); + } + + // Collect delimiters for each context + ParserDefinitionNode::DelimitedBy(open, _, close, ..) => { + self.labels.insert(open.label.clone()); + self.labels.insert(close.label.clone()); + + let (open, close) = match (open.as_ref(), close.as_ref()) { + ( + ParserDefinitionNode::ScannerDefinition(open, ..), + ParserDefinitionNode::ScannerDefinition(close, ..), + ) => (open.name(), close.name()), + _ => panic!("DelimitedBy must be delimited by scanners"), + }; + + let delimiters = &mut self.current_context().delimiters; + + assert!( + delimiters.get(close).is_none(), + "Cannot use a closing delimiter as an opening one" + ); + delimiters.insert(open, close); + } + _ => {} + }; + } +} diff --git a/crates/codegen/runtime/generator/src/keyword_scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs similarity index 95% rename from crates/codegen/runtime/generator/src/keyword_scanner_definition.rs rename to crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs index 2a7af0e1a7..4e365519a1 100644 --- a/crates/codegen/runtime/generator/src/keyword_scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs @@ -4,8 +4,8 @@ use codegen_grammar::{ use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use crate::parser_definition::VersionQualityRangeVecExtensions; -use crate::scanner_definition::ScannerDefinitionNodeExtensions; +use crate::parser::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::scanner_definition::ScannerDefinitionNodeExtensions; pub trait KeywordScannerDefinitionExtensions { fn to_scanner_code(&self) -> TokenStream; diff --git a/crates/codegen/runtime/generator/src/parser_definition.rs b/crates/codegen/runtime/generator/src/parser/parser_definition.rs similarity index 100% rename from crates/codegen/runtime/generator/src/parser_definition.rs rename to crates/codegen/runtime/generator/src/parser/parser_definition.rs diff --git a/crates/codegen/runtime/generator/src/precedence_parser_definition.rs b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs similarity index 99% rename from crates/codegen/runtime/generator/src/precedence_parser_definition.rs rename to crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs index 91d0c01673..06d873e7af 100644 --- a/crates/codegen/runtime/generator/src/precedence_parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs @@ -5,7 +5,9 @@ use inflector::Inflector; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; -use crate::parser_definition::{make_choice, make_sequence, ParserDefinitionNodeExtensions}; +use crate::parser::parser_definition::{ + make_choice, make_sequence, ParserDefinitionNodeExtensions, +}; pub trait PrecedenceParserDefinitionExtensions { fn to_parser_code(&self) -> TokenStream; diff --git a/crates/codegen/runtime/generator/src/scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs similarity index 98% rename from crates/codegen/runtime/generator/src/scanner_definition.rs rename to crates/codegen/runtime/generator/src/parser/scanner_definition.rs index 034d456a5c..c23ba551f7 100644 --- a/crates/codegen/runtime/generator/src/scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs @@ -5,7 +5,7 @@ use inflector::Inflector; use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use crate::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::parser_definition::VersionQualityRangeVecExtensions; pub trait ScannerDefinitionExtensions { fn to_scanner_code(&self) -> TokenStream; diff --git a/crates/codegen/runtime/generator/src/trie.rs b/crates/codegen/runtime/generator/src/parser/trie.rs similarity index 98% rename from crates/codegen/runtime/generator/src/trie.rs rename to crates/codegen/runtime/generator/src/parser/trie.rs index 391b569d6c..d7ad28beed 100644 --- a/crates/codegen/runtime/generator/src/trie.rs +++ b/crates/codegen/runtime/generator/src/parser/trie.rs @@ -8,7 +8,7 @@ use codegen_grammar::{ use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use crate::parser_definition::VersionQualityRangeVecExtensions; +use crate::parser::parser_definition::VersionQualityRangeVecExtensions; #[derive(Clone, Debug, Default)] pub struct Trie { From db33c232944a97d5a6d8de08150f99ecd3bf0601 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 20:37:12 +0200 Subject: [PATCH 3/6] refactor: Move codegen_grammar into parser generator directly --- Cargo.lock | 12 +----------- Cargo.toml | 2 -- crates/codegen/grammar/Cargo.toml | 16 ---------------- crates/codegen/grammar/src/lib.rs | 15 --------------- crates/codegen/runtime/generator/Cargo.toml | 2 +- crates/codegen/runtime/generator/src/parser.rs | 17 +++++++++-------- .../generator/src/parser}/grammar.rs | 16 +++++++++++++--- .../src/parser/grammar}/constructor.rs | 2 +- .../src/parser/grammar}/parser_definition.rs | 4 ++-- .../grammar}/precedence_parser_definition.rs | 2 +- .../src/parser/grammar}/scanner_definition.rs | 2 +- .../src/parser/grammar}/version_quality.rs | 0 .../generator/src/parser/grammar}/visitor.rs | 2 +- .../src/parser/keyword_scanner_definition.rs | 6 +++--- .../generator/src/parser/parser_definition.rs | 9 +++++---- .../src/parser/precedence_parser_definition.rs | 6 +++--- .../generator/src/parser/scanner_definition.rs | 2 +- .../runtime/generator/src/parser/trie.rs | 8 ++++---- 18 files changed, 46 insertions(+), 77 deletions(-) delete mode 100644 crates/codegen/grammar/Cargo.toml delete mode 100644 crates/codegen/grammar/src/lib.rs rename crates/codegen/{grammar/src => runtime/generator/src/parser}/grammar.rs (89%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/constructor.rs (99%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/parser_definition.rs (98%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/precedence_parser_definition.rs (93%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/scanner_definition.rs (98%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/version_quality.rs (100%) rename crates/codegen/{grammar/src => runtime/generator/src/parser/grammar}/visitor.rs (97%) diff --git a/Cargo.lock b/Cargo.lock index 7f8aa6a2eb..5c2ad3b528 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,16 +320,6 @@ dependencies = [ "indexmap", ] -[[package]] -name = "codegen_grammar" -version = "0.14.2" -dependencies = [ - "codegen_language_definition", - "indexmap", - "semver", - "strum_macros", -] - [[package]] name = "codegen_language_definition" version = "0.14.2" @@ -401,7 +391,6 @@ version = "0.14.2" dependencies = [ "Inflector", "anyhow", - "codegen_grammar", "codegen_language_definition", "indexmap", "infra_utils", @@ -410,6 +399,7 @@ dependencies = [ "quote", "semver", "serde", + "strum_macros", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7193ad9636..b37e9969dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,6 @@ publish = false resolver = "2" members = [ "crates/codegen/ebnf", - "crates/codegen/grammar", "crates/codegen/language/definition", "crates/codegen/language/internal_macros", "crates/codegen/language/macros", @@ -47,7 +46,6 @@ members = [ # Internal # codegen_ebnf = { path = "crates/codegen/ebnf" } -codegen_grammar = { path = "crates/codegen/grammar" } codegen_language_definition = { path = "crates/codegen/language/definition" } codegen_language_internal_macros = { path = "crates/codegen/language/internal_macros" } codegen_language_macros = { path = "crates/codegen/language/macros" } diff --git a/crates/codegen/grammar/Cargo.toml b/crates/codegen/grammar/Cargo.toml deleted file mode 100644 index f99b62bbb6..0000000000 --- a/crates/codegen/grammar/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "codegen_grammar" -version.workspace = true -rust-version.workspace = true -edition.workspace = true -publish = false - -[dependencies] -codegen_language_definition = { workspace = true } -indexmap = { workspace = true } -semver = { workspace = true } -strum_macros = { workspace = true } - -[lints] -workspace = true - diff --git a/crates/codegen/grammar/src/lib.rs b/crates/codegen/grammar/src/lib.rs deleted file mode 100644 index df8f06b587..0000000000 --- a/crates/codegen/grammar/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod constructor; -mod grammar; -mod parser_definition; -mod precedence_parser_definition; -mod scanner_definition; -mod version_quality; -mod visitor; - -pub use constructor::GrammarConstructorDslV2; -pub use grammar::*; -pub use parser_definition::*; -pub use precedence_parser_definition::*; -pub use scanner_definition::*; -pub use version_quality::*; -pub use visitor::*; diff --git a/crates/codegen/runtime/generator/Cargo.toml b/crates/codegen/runtime/generator/Cargo.toml index 72ea0e22da..d911a27d15 100644 --- a/crates/codegen/runtime/generator/Cargo.toml +++ b/crates/codegen/runtime/generator/Cargo.toml @@ -7,7 +7,6 @@ publish = false [dependencies] anyhow = { workspace = true } -codegen_grammar = { workspace = true } codegen_language_definition = { workspace = true } indexmap = { workspace = true } Inflector = { workspace = true } @@ -17,6 +16,7 @@ proc-macro2 = { workspace = true } quote = { workspace = true } semver = { workspace = true } serde = { workspace = true } +strum_macros = { workspace = true } [lints] workspace = true diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index b071ec2692..d66d0dfcf0 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -1,23 +1,24 @@ use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; -use codegen_grammar::{ - Grammar, GrammarConstructorDslV2 as _, GrammarVisitor, KeywordScannerAtomic, - KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, - PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, - TriviaParserDefinitionRef, -}; use codegen_language_definition::model::Language; use quote::{format_ident, quote}; use semver::Version; use serde::Serialize; +mod grammar; mod keyword_scanner_definition; mod parser_definition; mod precedence_parser_definition; mod scanner_definition; mod trie; +use grammar::{ + Grammar, GrammarConstructorDslV2 as _, GrammarVisitor, KeywordScannerAtomic, + KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, + PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, + TriviaParserDefinitionRef, +}; use keyword_scanner_definition::KeywordScannerDefinitionExtensions as _; use parser_definition::ParserDefinitionExtensions as _; use precedence_parser_definition::PrecedenceParserDefinitionExtensions as _; @@ -213,13 +214,13 @@ impl GrammarVisitor for ParserModel { fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) { self.set_current_context(parser.context()); let trivia_scanners = { - use codegen_grammar::Visitable; + use crate::parser::grammar::visitor::Visitable; #[derive(Default)] struct CollectTriviaScanners { scanner_names: BTreeSet<&'static str>, } - impl codegen_grammar::GrammarVisitor for CollectTriviaScanners { + impl crate::parser::grammar::visitor::GrammarVisitor for CollectTriviaScanners { fn scanner_definition_enter(&mut self, node: &ScannerDefinitionRef) { self.scanner_names.insert(node.name()); } diff --git a/crates/codegen/grammar/src/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs similarity index 89% rename from crates/codegen/grammar/src/grammar.rs rename to crates/codegen/runtime/generator/src/parser/grammar.rs index da3021ee85..3e9bb1554d 100644 --- a/crates/codegen/grammar/src/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -2,9 +2,19 @@ use std::collections::{BTreeSet, HashMap}; use semver::Version; -use crate::parser_definition::{ParserDefinitionRef, TriviaParserDefinitionRef}; -use crate::visitor::{GrammarVisitor, Visitable}; -use crate::{KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef}; +pub mod constructor; +pub mod parser_definition; +pub mod precedence_parser_definition; +pub mod scanner_definition; +pub mod version_quality; +pub mod visitor; + +pub use constructor::GrammarConstructorDslV2; +pub use parser_definition::*; +pub use precedence_parser_definition::*; +pub use scanner_definition::*; +pub use version_quality::*; +pub use visitor::*; pub struct Grammar { pub name: String, diff --git a/crates/codegen/grammar/src/constructor.rs b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs similarity index 99% rename from crates/codegen/grammar/src/constructor.rs rename to crates/codegen/runtime/generator/src/parser/grammar/constructor.rs index d525a3961b..21d99273f9 100644 --- a/crates/codegen/grammar/src/constructor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs @@ -9,7 +9,7 @@ use std::rc::Rc; use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier, Item}; use indexmap::IndexMap; -use crate::{ +use crate::parser::grammar::{ DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition, KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition, diff --git a/crates/codegen/grammar/src/parser_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs similarity index 98% rename from crates/codegen/grammar/src/parser_definition.rs rename to crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs index 60804a4d1b..640b838fc1 100644 --- a/crates/codegen/grammar/src/parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/parser_definition.rs @@ -3,8 +3,8 @@ use std::rc::Rc; use codegen_language_definition::model; -use crate::visitor::{GrammarVisitor, Visitable}; -use crate::{ +use crate::parser::grammar::visitor::{GrammarVisitor, Visitable}; +use crate::parser::grammar::{ KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef, VersionQualityRange, }; diff --git a/crates/codegen/grammar/src/precedence_parser_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs similarity index 93% rename from crates/codegen/grammar/src/precedence_parser_definition.rs rename to crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs index 8f0c105415..f26143d8a3 100644 --- a/crates/codegen/grammar/src/precedence_parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/precedence_parser_definition.rs @@ -1,7 +1,7 @@ use std::fmt::Debug; use std::rc::Rc; -use crate::{GrammarVisitor, ParserDefinitionNode, Visitable}; +use crate::parser::grammar::{GrammarVisitor, ParserDefinitionNode, Visitable}; pub trait PrecedenceParserDefinition: Debug { fn name(&self) -> &'static str; diff --git a/crates/codegen/grammar/src/scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs similarity index 98% rename from crates/codegen/grammar/src/scanner_definition.rs rename to crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs index 42687919be..63a01c44a0 100644 --- a/crates/codegen/grammar/src/scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/scanner_definition.rs @@ -1,7 +1,7 @@ use std::fmt::Debug; use std::rc::Rc; -use crate::{GrammarVisitor, VersionQualityRange, Visitable}; +use crate::parser::grammar::{GrammarVisitor, VersionQualityRange, Visitable}; pub trait ScannerDefinition: Debug { fn name(&self) -> &'static str; diff --git a/crates/codegen/grammar/src/version_quality.rs b/crates/codegen/runtime/generator/src/parser/grammar/version_quality.rs similarity index 100% rename from crates/codegen/grammar/src/version_quality.rs rename to crates/codegen/runtime/generator/src/parser/grammar/version_quality.rs diff --git a/crates/codegen/grammar/src/visitor.rs b/crates/codegen/runtime/generator/src/parser/grammar/visitor.rs similarity index 97% rename from crates/codegen/grammar/src/visitor.rs rename to crates/codegen/runtime/generator/src/parser/grammar/visitor.rs index 874092fb5e..5256684180 100644 --- a/crates/codegen/grammar/src/visitor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/visitor.rs @@ -1,4 +1,4 @@ -use crate::{ +use crate::parser::grammar::{ Grammar, KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionNode, PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, TriviaParserDefinitionRef, diff --git a/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs index 4e365519a1..ebd9ac53e8 100644 --- a/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/keyword_scanner_definition.rs @@ -1,9 +1,9 @@ -use codegen_grammar::{ - KeywordScannerDefinitionNode, KeywordScannerDefinitionRef, ScannerDefinitionNode, -}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; +use crate::parser::grammar::{ + KeywordScannerDefinitionNode, KeywordScannerDefinitionRef, ScannerDefinitionNode, +}; use crate::parser::parser_definition::VersionQualityRangeVecExtensions; use crate::parser::scanner_definition::ScannerDefinitionNodeExtensions; diff --git a/crates/codegen/runtime/generator/src/parser/parser_definition.rs b/crates/codegen/runtime/generator/src/parser/parser_definition.rs index 486c583bc5..3ade34e746 100644 --- a/crates/codegen/runtime/generator/src/parser/parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/parser_definition.rs @@ -1,12 +1,13 @@ -use codegen_grammar::{ - Labeled, ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, VersionQuality, - VersionQualityRange, -}; use inflector::Inflector; use proc_macro2::TokenStream; use quote::{format_ident, quote}; use semver::Version; +use crate::parser::grammar::{ + Labeled, ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, VersionQuality, + VersionQualityRange, +}; + pub trait ParserDefinitionExtensions { fn to_parser_code(&self) -> TokenStream; } diff --git a/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs index 06d873e7af..e4ef3780ce 100644 --- a/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/precedence_parser_definition.rs @@ -1,10 +1,10 @@ -use codegen_grammar::{ - PrecedenceOperatorModel, PrecedenceParserDefinitionNode, PrecedenceParserDefinitionRef, -}; use inflector::Inflector; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; +use crate::parser::grammar::{ + PrecedenceOperatorModel, PrecedenceParserDefinitionNode, PrecedenceParserDefinitionRef, +}; use crate::parser::parser_definition::{ make_choice, make_sequence, ParserDefinitionNodeExtensions, }; diff --git a/crates/codegen/runtime/generator/src/parser/scanner_definition.rs b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs index c23ba551f7..f02f90b6eb 100644 --- a/crates/codegen/runtime/generator/src/parser/scanner_definition.rs +++ b/crates/codegen/runtime/generator/src/parser/scanner_definition.rs @@ -1,10 +1,10 @@ use std::collections::BTreeSet; -use codegen_grammar::{ScannerDefinitionNode, ScannerDefinitionRef}; use inflector::Inflector; use proc_macro2::TokenStream; use quote::{format_ident, quote}; +use crate::parser::grammar::{ScannerDefinitionNode, ScannerDefinitionRef}; use crate::parser::parser_definition::VersionQualityRangeVecExtensions; pub trait ScannerDefinitionExtensions { diff --git a/crates/codegen/runtime/generator/src/parser/trie.rs b/crates/codegen/runtime/generator/src/parser/trie.rs index d7ad28beed..44ddad5954 100644 --- a/crates/codegen/runtime/generator/src/parser/trie.rs +++ b/crates/codegen/runtime/generator/src/parser/trie.rs @@ -1,13 +1,13 @@ use std::collections::BTreeMap; use std::fmt::Debug; -use codegen_grammar::{ - KeywordScannerAtomic, KeywordScannerDefinitionVersionedNode, ScannerDefinitionNode, - ScannerDefinitionRef, VersionQualityRange, -}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; +use crate::parser::grammar::{ + KeywordScannerAtomic, KeywordScannerDefinitionVersionedNode, ScannerDefinitionNode, + ScannerDefinitionRef, VersionQualityRange, +}; use crate::parser::parser_definition::VersionQualityRangeVecExtensions; #[derive(Clone, Debug, Default)] From 1df622e9257f9690f2d8e777a47fa6115e0732bd Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 20:42:21 +0200 Subject: [PATCH 4/6] refactor: Remove leftover warnings after the move --- .../runtime/generator/src/parser/grammar.rs | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index 3e9bb1554d..055ec1164d 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -25,10 +25,6 @@ pub struct Grammar { } impl Grammar { - pub fn elements(&self) -> &HashMap<&'static str, GrammarElement> { - &self.elements - } - pub fn accept_visitor(&self, visitor: &mut V) { visitor.grammar_enter(self); for element in self.elements.values() { @@ -36,13 +32,9 @@ impl Grammar { } visitor.grammar_leave(self); } - - pub fn register>(&mut self, instance: E) { - let element: GrammarElement = instance.into(); - self.elements.insert(element.name(), element); - } } +#[allow(clippy::enum_variant_names)] // this will be removed soon #[derive(Clone)] pub enum GrammarElement { ScannerDefinition(ScannerDefinitionRef), @@ -52,18 +44,6 @@ pub enum GrammarElement { PrecedenceParserDefinition(PrecedenceParserDefinitionRef), } -impl GrammarElement { - pub fn name(&self) -> &'static str { - match self { - Self::ScannerDefinition(scanner) => scanner.name(), - Self::KeywordScannerDefinition(scanner) => scanner.name(), - Self::TriviaParserDefinition(trivia_parser) => trivia_parser.name(), - Self::ParserDefinition(parser) => parser.name(), - Self::PrecedenceParserDefinition(precedence_parser) => precedence_parser.name(), - } - } -} - impl From for GrammarElement { fn from(def: ScannerDefinitionRef) -> Self { GrammarElement::ScannerDefinition(def) From 154c04f87fda4e2a890ece9064417f03c0de0e0f Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 21:00:07 +0200 Subject: [PATCH 5/6] refactor: Remove the now unnecessary GrammarConstructorDslV2 trait --- crates/codegen/runtime/generator/src/parser.rs | 7 +++---- .../codegen/runtime/generator/src/parser/grammar.rs | 1 - .../generator/src/parser/grammar/constructor.rs | 13 ++++--------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index d66d0dfcf0..2d9719b41b 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -14,10 +14,9 @@ mod scanner_definition; mod trie; use grammar::{ - Grammar, GrammarConstructorDslV2 as _, GrammarVisitor, KeywordScannerAtomic, - KeywordScannerDefinitionRef, ParserDefinitionNode, ParserDefinitionRef, - PrecedenceParserDefinitionRef, ScannerDefinitionNode, ScannerDefinitionRef, - TriviaParserDefinitionRef, + Grammar, GrammarVisitor, KeywordScannerAtomic, KeywordScannerDefinitionRef, + ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef, + ScannerDefinitionNode, ScannerDefinitionRef, TriviaParserDefinitionRef, }; use keyword_scanner_definition::KeywordScannerDefinitionExtensions as _; use parser_definition::ParserDefinitionExtensions as _; diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index 055ec1164d..5ceecd70c9 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -9,7 +9,6 @@ pub mod scanner_definition; pub mod version_quality; pub mod visitor; -pub use constructor::GrammarConstructorDslV2; pub use parser_definition::*; pub use precedence_parser_definition::*; pub use scanner_definition::*; diff --git a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs index 21d99273f9..007cd2d6b9 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar/constructor.rs @@ -1,5 +1,4 @@ -//! Defines [`GrammarConstructorDslV2`], which allows turning the DSL v2 model into [`Grammar`] -//! (used for generating the parser and the CST). +//! Defines a translation of DSL v2 model into [`Grammar`], which is used for generating the parser and the CST. use std::cell::OnceCell; use std::collections::{BTreeMap, BTreeSet, HashMap}; @@ -17,13 +16,9 @@ use crate::parser::grammar::{ TriviaParserDefinition, VersionQuality, VersionQualityRange, }; -/// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`]. -pub trait GrammarConstructorDslV2 { - fn from_dsl_v2(lang: &model::Language) -> Grammar; -} - -impl GrammarConstructorDslV2 for Grammar { - fn from_dsl_v2(lang: &model::Language) -> Grammar { +impl Grammar { + /// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`]. + pub fn from_dsl_v2(lang: &model::Language) -> Grammar { // Collect language items into a lookup table to speed up resolution let items: HashMap<_, _> = lang .topics() From 3776df5d52f8c20160cd4697d005bdd460f236f3 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 21 May 2024 21:00:39 +0200 Subject: [PATCH 6/6] Add some module-level docs --- crates/codegen/runtime/generator/src/parser.rs | 2 ++ crates/codegen/runtime/generator/src/parser/grammar.rs | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/crates/codegen/runtime/generator/src/parser.rs b/crates/codegen/runtime/generator/src/parser.rs index 2d9719b41b..e3dfb606c6 100644 --- a/crates/codegen/runtime/generator/src/parser.rs +++ b/crates/codegen/runtime/generator/src/parser.rs @@ -1,3 +1,5 @@ +//! Defines parser code generation for the language grammar. + use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; diff --git a/crates/codegen/runtime/generator/src/parser/grammar.rs b/crates/codegen/runtime/generator/src/parser/grammar.rs index 5ceecd70c9..6fea152c4e 100644 --- a/crates/codegen/runtime/generator/src/parser/grammar.rs +++ b/crates/codegen/runtime/generator/src/parser/grammar.rs @@ -1,3 +1,9 @@ +//! Definitions of the [`GrammarElement`]s and the grammar itself ([`Grammar`]). + +// TODO(#638): This is a leftover module from the original DSLv1 implementation. +// We should remove it and replace the grammar construction in the super `parser` +// module with the one from the new DSLv2 in the `constructor` module. + use std::collections::{BTreeSet, HashMap}; use semver::Version;