Skip to content

Commit

Permalink
Reduce as much as possible state when walking the grammar in PG
Browse files Browse the repository at this point in the history
  • Loading branch information
Xanewok committed Jun 5, 2024
1 parent e5e006d commit daff07c
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 59 deletions.
73 changes: 39 additions & 34 deletions crates/codegen/runtime/generator/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@ use codegen::{
};
use grammar::{
GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef,
TriviaParserDefinitionRef,
};

use crate::parser::codegen::KeywordScannerAtomic;
use crate::parser::grammar::resolver::Resolution;
use crate::parser::grammar::ResolveCtx;
use crate::parser::grammar::{GrammarElement, ResolveCtx, TriviaParserDefinitionRef};

/// Newtype for the already generated Rust code, not to be confused with regular strings.
#[derive(Serialize, Default, Clone)]
Expand Down Expand Up @@ -63,11 +62,6 @@ struct ParserAccumulatorState {
// Defines the `Lexer::next_terminal` method.
scanner_contexts: BTreeMap<Identifier, ScannerContextAccumulatorState>,

/// Defines the top-level parser functions in `Language`.
parser_functions: BTreeMap<Identifier, RustCode>, // (name of parser, code)
/// Defines the top-level trivia parser functions in `Language`.
trivia_parser_functions: BTreeMap<Identifier, RustCode>, // (name of parser, code)

/// Makes sure to codegen the scanner functions that are referenced by other scanners.
top_level_scanner_names: BTreeSet<Identifier>,
/// The current context of a parent scanner/parser being processed.
Expand Down Expand Up @@ -107,6 +101,7 @@ impl ParserAccumulatorState {
.expect("context must be set with `set_current_context`")
}

#[allow(clippy::too_many_lines)] // FIXME
fn into_model(mut self, resolved: &Resolution) -> ParserModel {
// Lookup table for all scanners; used to generate trie scanners.
let all_scanners: BTreeMap<_, _> = resolved
Expand Down Expand Up @@ -205,9 +200,44 @@ impl ParserAccumulatorState {
})
.collect();

// FIXME: Move this to a separate collector for clarity
// and to limit mutable state needed for the lexer model
let mut trivia_parser_functions = BTreeMap::default();
let mut parser_functions = BTreeMap::default();
for (_, item) in resolved.items() {
match item {
GrammarElement::TriviaParserDefinition(parser) => {
trivia_parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}
GrammarElement::ParserDefinition(parser) if !parser.is_inline() => {
parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}

GrammarElement::PrecedenceParserDefinition(parser) => {
// While it's not common to parse a precedence expression as a standalone nonterminal,
// we generate a function for completeness.
for (name, code) in parser.to_precedence_expression_parser_code() {
parser_functions.insert(name.clone(), RustCode(code.to_string()));
}
parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}
_ => {}
}
}

ParserModel {
parser_functions: self.parser_functions,
trivia_parser_functions: self.trivia_parser_functions,
// These are directly collected from the flat resolved items
parser_functions,
trivia_parser_functions,
// These are derived from the accumulated state
scanner_contexts: contexts,
scanner_functions,
Expand All @@ -219,39 +249,14 @@ impl ParserAccumulatorState {
impl GrammarVisitor for ParserAccumulatorState {
fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) {
self.set_current_context(parser.context().clone());

self.trivia_parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}

fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) {
// Have to set this regardless so that we can collect referenced scanners
self.set_current_context(parser.context().clone());

if !parser.is_inline() {
self.parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}
}

fn precedence_parser_definition_enter(&mut self, parser: &PrecedenceParserDefinitionRef) {
self.set_current_context(parser.context().clone());

// While it's not common to parse a precedence expression as a standalone nonterminal,
// we generate a function for completeness.
for (name, code) in parser.to_precedence_expression_parser_code() {
self.parser_functions
.insert(name.clone(), RustCode(code.to_string()));
}

self.parser_functions.insert(
parser.name().clone(),
RustCode(parser.to_parser_code().to_string()),
);
}

fn parser_definition_node_enter(&mut self, node: &ParserDefinitionNode) {
Expand Down
47 changes: 22 additions & 25 deletions crates/codegen/runtime/generator/src/parser/grammar/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,6 @@ pub struct Resolution {
/// Original items as defined by the DSL v2.
items: HashMap<Identifier, (Identifier, Item)>,
resolved: HashMap<Identifier, GrammarElement>,
// Trivia are defined separately from the main grammar
leading_trivia: Rc<dyn TriviaParserDefinition>,
trailing_trivia: Rc<dyn TriviaParserDefinition>,
}

impl ResolveCtx {
Expand All @@ -140,15 +137,25 @@ impl ResolveCtx {
resolve_grammar_element(item.name(), &mut ctx);
}

// Trivia is defined separately from the main grammar
let leading_trivia = Rc::new(NamedTriviaParser {
name: Identifier::from("LeadingTrivia"),
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
});

let trailing_trivia = Rc::new(NamedTriviaParser {
name: Identifier::from("TrailingTrivia"),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
});

for trivia in [leading_trivia, trailing_trivia] {
ctx.resolved.insert(
trivia.name().clone(),
GrammarElement::TriviaParserDefinition(trivia),
);
}

Resolution {
leading_trivia: Rc::new(NamedTriviaParser {
name: Identifier::from("LeadingTrivia"),
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>,
trailing_trivia: Rc::new(NamedTriviaParser {
name: Identifier::from("TrailingTrivia"),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>,
items: ctx.items,
resolved: ctx.resolved,
}
Expand All @@ -167,21 +174,11 @@ impl Resolution {

/// Collects the already resolved item into a [`Grammar`].
pub fn to_grammar(&self) -> Grammar {
let resolved_items = self
.resolved
.iter()
.map(|(name, elem)| (name.clone(), elem.clone()));

let leading_trivia = Rc::clone(&self.leading_trivia);
let trailing_trivia = Rc::clone(&self.trailing_trivia);

Grammar {
elements: resolved_items
.chain(
[leading_trivia, trailing_trivia]
.into_iter()
.map(|elem| (elem.name().clone(), elem.into())),
)
elements: self
.resolved
.iter()
.map(|(name, elem)| (name.clone(), elem.clone()))
.collect(),
}
}
Expand Down

0 comments on commit daff07c

Please sign in to comment.