Skip to content

Commit

Permalink
Attempt no. 2512 at simplifying grammar resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
Xanewok committed Jun 4, 2024
1 parent 0e8fb3d commit b8352fe
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 80 deletions.
9 changes: 6 additions & 3 deletions crates/codegen/runtime/generator/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ use codegen::{
PrecedenceParserDefinitionCodegen as _, Trie,
};
use grammar::{
Grammar, GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef,
PrecedenceParserDefinitionRef, ScannerDefinitionRef, TriviaParserDefinitionRef,
GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef,
ScannerDefinitionRef, TriviaParserDefinitionRef,
};

use crate::parser::codegen::KeywordScannerAtomic;
use crate::parser::grammar::ResolveCtx;

/// Newtype for the already generated Rust code, not to be confused with regular strings.
#[derive(Serialize, Default, Clone)]
Expand Down Expand Up @@ -89,7 +90,8 @@ struct ScannerContextAccumulatorState {
impl ParserModel {
pub fn from_language(language: &Rc<Language>) -> Self {
// First, we construct the DSLv1 model from the DSLv2 definition...
let grammar = Grammar::from_dsl_v2(language);
let resolved = ResolveCtx::resolve(language);
let grammar = resolved.to_grammar();
// ...which we then transform into the parser model
let mut acc = ParserAccumulatorState::default();
grammar.accept_visitor(&mut acc);
Expand Down Expand Up @@ -219,6 +221,7 @@ impl GrammarVisitor for ParserAccumulatorState {
fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) {
// Have to set this regardless so that we can collect referenced scanners
self.set_current_context(parser.context().clone());

if !parser.is_inline() {
self.parser_functions.insert(
parser.name().clone(),
Expand Down
3 changes: 2 additions & 1 deletion crates/codegen/runtime/generator/src/parser/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ use std::rc::Rc;

use codegen_language_definition::model::{self, Identifier};

pub mod constructor;
pub mod parser_definition;
pub mod precedence_parser_definition;
pub mod resolver;
pub mod scanner_definition;
pub mod visitor;

pub use parser_definition::*;
pub use precedence_parser_definition::*;
pub use resolver::ResolveCtx;
pub use scanner_definition::*;
pub use visitor::*;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,80 +17,6 @@ use crate::parser::grammar::{

static DEFAULT_LEX_CTXT: Lazy<Identifier> = Lazy::new(|| Identifier::from("Default"));

impl Grammar {
/// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`].
pub fn from_dsl_v2(lang: &model::Language) -> Grammar {
let mut ctx = ResolveCtx::new(lang);

let leading_trivia = Rc::new(NamedTriviaParser {
name: Identifier::from("LeadingTrivia"),
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

let trailing_trivia = Rc::new(NamedTriviaParser {
name: Identifier::from("TrailingTrivia"),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

for item in lang.items() {
resolve_grammar_element(item.name(), &mut ctx);
}

// TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie,
// we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group
// keywords by their lexical context.
let mut keywords_per_ctxt = HashMap::new();
for (ident, (lex_ctx, item)) in &ctx.items {
if let Item::Keyword { .. } = item {
keywords_per_ctxt
.entry(lex_ctx.clone())
.or_insert_with(BTreeSet::new)
.insert(ident.clone());
}
}
for (lex_ctx, keywords) in keywords_per_ctxt {
let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords"));
let all_keywords = model::EnumItem {
name: parser_name.clone(),
enabled: None,
variants: keywords
.iter()
.map(|ident| model::EnumVariant {
reference: ident.clone(),
enabled: None,
})
.collect(),
};

let def = resolve_choice(all_keywords, &mut ctx);
ctx.resolved.insert(
parser_name.clone(),
GrammarElement::ParserDefinition(Rc::new(NamedParserThunk {
name: parser_name,
context: lex_ctx.clone(),
is_inline: true,
def: OnceCell::from(def),
})),
);
}

let resolved_items = ctx
.resolved
.iter()
.map(|(name, elem)| (name.clone(), elem.clone()));

Grammar {
elements: resolved_items
.chain(
[leading_trivia, trailing_trivia]
.into_iter()
.map(|elem| (elem.name().clone(), elem.into())),
)
.collect(),
}
}
}

#[derive(Debug)]
struct NamedTriviaParser {
name: Identifier,
Expand Down Expand Up @@ -177,13 +103,20 @@ impl ParserThunk {
}
}

struct ResolveCtx {
pub struct ResolveCtx {
items: HashMap<Identifier, (Identifier, Item)>,
resolved: HashMap<Identifier, GrammarElement>,
}

pub struct Resolution {
resolved: HashMap<Identifier, GrammarElement>,
// Trivia are defined separately from the main grammar
leading_trivia: Rc<dyn TriviaParserDefinition>,
trailing_trivia: Rc<dyn TriviaParserDefinition>,
}

impl ResolveCtx {
pub fn new(language: &Language) -> Self {
fn new(language: &Language) -> Self {
// Collect language items into a lookup table to speed up resolution
let items: HashMap<_, _> = language
.topics()
Expand All @@ -201,6 +134,87 @@ impl ResolveCtx {
resolved: HashMap::new(),
}
}

pub fn resolve(lang: &Language) -> Resolution {
let mut ctx = ResolveCtx::new(lang);

for item in lang.items() {
resolve_grammar_element(item.name(), &mut ctx);
}

// TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie,
// we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group
// keywords by their lexical context.
let mut keywords_per_ctxt = HashMap::new();
for (ident, (lex_ctx, item)) in &ctx.items {
if let Item::Keyword { .. } = item {
keywords_per_ctxt
.entry(lex_ctx.clone())
.or_insert_with(BTreeSet::new)
.insert(ident.clone());
}
}
for (lex_ctx, keywords) in keywords_per_ctxt {
let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords"));
let all_keywords = model::EnumItem {
name: parser_name.clone(),
enabled: None,
variants: keywords
.iter()
.map(|ident| model::EnumVariant {
reference: ident.clone(),
enabled: None,
})
.collect(),
};

let def = resolve_choice(all_keywords, &mut ctx);
ctx.resolved.insert(
parser_name.clone(),
GrammarElement::ParserDefinition(Rc::new(NamedParserThunk {
name: parser_name,
context: lex_ctx.clone(),
is_inline: true,
def: OnceCell::from(def),
})),
);
}

Resolution {
leading_trivia: Rc::new(NamedTriviaParser {
name: Identifier::from("LeadingTrivia"),
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>,
trailing_trivia: Rc::new(NamedTriviaParser {
name: Identifier::from("TrailingTrivia"),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>,
resolved: ctx.resolved,
}
}
}

impl Resolution {
/// Collects the already resolved item into a [`Grammar`].
pub fn to_grammar(&self) -> Grammar {
let resolved_items = self
.resolved
.iter()
.map(|(name, elem)| (name.clone(), elem.clone()));

let leading_trivia = Rc::clone(&self.leading_trivia);
let trailing_trivia = Rc::clone(&self.trailing_trivia);

Grammar {
elements: resolved_items
.chain(
[leading_trivia, trailing_trivia]
.into_iter()
.map(|elem| (elem.name().clone(), elem.into())),
)
.collect(),
}
}
}

#[allow(clippy::too_many_lines)] // FIXME(#638): Simplify me when we simplify the v2-to-v1 interface
Expand Down

0 comments on commit b8352fe

Please sign in to comment.