Skip to content

Commit

Permalink
refactor: Do not emit synthetic parser to account for keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
Xanewok committed Jun 20, 2024
1 parent 64b8566 commit 6fc41a4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 48 deletions.
59 changes: 12 additions & 47 deletions crates/codegen/runtime/generator/src/parser/grammar/resolver.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Defines a translation of DSL v2 model into [`Grammar`], which is used for generating the parser and the CST.
use std::cell::OnceCell;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::collections::{BTreeMap, HashMap};
use std::ops::Deref;
use std::rc::Rc;

Expand Down Expand Up @@ -111,16 +111,18 @@ pub struct ResolveCtx {
}

pub struct Resolution {
/// Original items as defined by the DSL v2.
items: HashMap<Identifier, (Identifier, Item)>,
resolved: HashMap<Identifier, GrammarElement>,
// Trivia are defined separately from the main grammar
leading_trivia: Rc<dyn TriviaParserDefinition>,
trailing_trivia: Rc<dyn TriviaParserDefinition>,
}

impl ResolveCtx {
fn new(language: &Language) -> Self {
pub fn resolve(lang: &Language) -> Resolution {
// Collect language items into a lookup table to speed up resolution
let items: HashMap<_, _> = language
let items: HashMap<_, _> = lang
.topics()
.flat_map(|topic| {
topic.items.iter().map(|item| {
Expand All @@ -131,57 +133,15 @@ impl ResolveCtx {
})
.collect();

ResolveCtx {
let mut ctx = ResolveCtx {
items,
resolved: HashMap::new(),
}
}

pub fn resolve(lang: &Language) -> Resolution {
let mut ctx = ResolveCtx::new(lang);
};

for item in lang.items() {
resolve_grammar_element(item.name(), &mut ctx);
}

// TODO(#638): To make sure the unused (not referred to) keywords are included in the scanner literal trie,
// we replicate the DSL v1 behaviour of introducing a synthetic parser that is only meant to group
// keywords by their lexical context.
let mut keywords_per_ctxt = HashMap::new();
for (ident, (lex_ctx, item)) in &ctx.items {
if let Item::Keyword { .. } = item {
keywords_per_ctxt
.entry(lex_ctx.clone())
.or_insert_with(BTreeSet::new)
.insert(ident.clone());
}
}
for (lex_ctx, keywords) in keywords_per_ctxt {
let parser_name = Identifier::from(format!("{lex_ctx}AllKeywords"));
let all_keywords = model::EnumItem {
name: parser_name.clone(),
enabled: None,
variants: keywords
.iter()
.map(|ident| model::EnumVariant {
reference: ident.clone(),
enabled: None,
})
.collect(),
};

let def = resolve_choice(all_keywords, &mut ctx);
ctx.resolved.insert(
parser_name.clone(),
GrammarElement::ParserDefinition(Rc::new(NamedParserThunk {
name: parser_name,
context: lex_ctx.clone(),
is_inline: true,
def: OnceCell::from(def),
})),
);
}

Resolution {
leading_trivia: Rc::new(NamedTriviaParser {
name: Identifier::from("LeadingTrivia"),
Expand All @@ -191,12 +151,17 @@ impl ResolveCtx {
name: Identifier::from("TrailingTrivia"),
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
}) as Rc<dyn TriviaParserDefinition>,
items: ctx.items,
resolved: ctx.resolved,
}
}
}

impl Resolution {
pub fn original(&self, name: &Identifier) -> &(Identifier, Item) {
&self.items[name]
}

/// Returns the resolved items.
pub fn items(&self) -> impl Iterator<Item = (&Identifier, &GrammarElement)> {
self.resolved.iter()
Expand Down
17 changes: 16 additions & 1 deletion crates/codegen/runtime/generator/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,27 @@ impl ParserAccumulatorState {
.expect("context must be set with `set_current_context`")
}

fn into_model(self, resolved: &Resolution) -> ParserModel {
fn into_model(mut self, resolved: &Resolution) -> ParserModel {
// Lookup table for all scanners; used to generate trie scanners.
let all_scanners: BTreeMap<_, _> = resolved
.items()
.filter_map(|(_, item)| item.try_as_scanner_definition_ref())
.map(|scanner| (scanner.name().clone(), Rc::clone(scanner)))
.collect();

for kw_scanner_def in resolved
.items()
.filter_map(|(_, item)| item.try_as_keyword_scanner_definition_ref())
{
let (lex_ctxt, _) = resolved.original(&kw_scanner_def.name);

self.scanner_contexts
.entry(lex_ctxt.clone())
.or_default()
.keyword_scanner_defs
.insert(kw_scanner_def.name.clone(), Rc::clone(kw_scanner_def));
}

let contexts = self
.scanner_contexts
.into_iter()
Expand Down Expand Up @@ -251,6 +264,8 @@ impl GrammarVisitor for ParserAccumulatorState {
.insert(scanner.name().clone());
}
ParserDefinitionNode::KeywordScannerDefinition(scanner) => {
// In addition to the context a keyword is defined in, we also
// need to include reachable ones for the current lexical context
self.current_context()
.keyword_scanner_defs
.insert(scanner.name.clone(), Rc::clone(scanner));
Expand Down

0 comments on commit 6fc41a4

Please sign in to comment.