Skip to content

Commit

Permalink
refactor: Use model::Scanner in place of ScannerDefinitionNode
Browse files Browse the repository at this point in the history
  • Loading branch information
Xanewok committed Jun 5, 2024
1 parent a318014 commit 583d04e
Show file tree
Hide file tree
Showing 13 changed files with 285 additions and 398 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use codegen_language_internal_macros::{derive_spanned_type, ParseInputTokens, Wr
use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::model::{Identifier, VersionSpecifier};
use crate::model::{Identifier, Scanner, VersionSpecifier};

#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[derive_spanned_type(Clone, Debug, ParseInputTokens, WriteOutputTokens)]
Expand Down Expand Up @@ -32,6 +32,23 @@ pub enum KeywordValue {
Atom { atom: String },
}

impl From<KeywordValue> for Scanner {
fn from(value: KeywordValue) -> Scanner {
match value {
KeywordValue::Optional { value } => Scanner::Optional {
scanner: Box::new((*value).into()),
},
KeywordValue::Sequence { values } => Scanner::Sequence {
scanners: values.into_iter().map(Into::into).collect(),
},
KeywordValue::Atom { atom } => Scanner::Atom { atom },
KeywordValue::Choice { values } => Scanner::Choice {
scanners: values.into_iter().map(Into::into).collect(),
},
}
}
}

impl KeywordValue {
/// Collects all possible variations generated by this value.
pub fn collect_variations(&self) -> Vec<String> {
Expand Down
21 changes: 11 additions & 10 deletions crates/codegen/runtime/generator/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@
use std::collections::{BTreeMap, BTreeSet};
use std::rc::Rc;

use codegen_language_definition::model::{Identifier, Language};
use codegen_language_definition::model::{self, Identifier, Language};
use serde::Serialize;

mod codegen;
mod grammar;

use codegen::{
KeywordScannerDefinitionCodegen as _, ParserDefinitionCodegen as _,
PrecedenceParserDefinitionCodegen as _, ScannerDefinitionCodegen as _, Trie,
PrecedenceParserDefinitionCodegen as _, Trie,
};
use grammar::{
Grammar, GrammarVisitor, KeywordScannerAtomic, KeywordScannerDefinitionRef,
ParserDefinitionNode, ParserDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef,
TriviaParserDefinitionRef,
Grammar, GrammarVisitor, ParserDefinitionNode, ParserDefinitionRef,
PrecedenceParserDefinitionRef, ScannerDefinitionRef, TriviaParserDefinitionRef,
};

use crate::parser::codegen::KeywordScannerAtomic;

/// Newtype for the already generated Rust code, not to be confused with regular strings.
#[derive(Serialize, Default, Clone)]
struct RustCode(String);
Expand Down Expand Up @@ -78,7 +79,7 @@ struct ScannerContextAccumulatorState {
/// Set of delimiter pairs for this context that are used in delimited error recovery.
delimiters: BTreeMap<Identifier, Identifier>,
scanner_definitions: BTreeSet<Identifier>,
keyword_scanner_defs: BTreeMap<Identifier, KeywordScannerDefinitionRef>,
keyword_scanner_defs: BTreeMap<Identifier, Rc<model::KeywordItem>>,
}

impl ParserModel {
Expand Down Expand Up @@ -121,7 +122,7 @@ impl ParserAccumulatorState {
for scanner_name in &context.scanner_definitions {
let scanner = &self.all_scanners[scanner_name];

let literals = scanner.literals();
let literals = scanner.literals().unwrap_or_default();
if literals.is_empty() {
acc.compound_scanner_names.push(scanner_name.clone());
} else {
Expand All @@ -135,7 +136,7 @@ impl ParserAccumulatorState {
acc.promotable_identifier_scanners = context
.keyword_scanner_defs
.values()
.map(|def| def.identifier_scanner().clone())
.map(|def| def.identifier.clone())
.collect();

let mut keyword_trie = Trie::new();
Expand All @@ -161,7 +162,7 @@ impl ParserAccumulatorState {
.iter()
.filter(|(name, scanner)| {
// are compound (do not consist of only literals)
scanner.literals().is_empty() ||
scanner.literals().is_none() ||
// but make sure to also include a scanner that is referenced by other scanners, even if not compound
!self.top_level_scanner_names.contains(*name)
})
Expand Down Expand Up @@ -249,7 +250,7 @@ impl GrammarVisitor for ParserAccumulatorState {
ParserDefinitionNode::KeywordScannerDefinition(scanner) => {
self.current_context()
.keyword_scanner_defs
.insert(scanner.name().clone(), Rc::clone(scanner));
.insert(scanner.name.clone(), Rc::clone(scanner));
}

// Collect delimiters for each context
Expand Down
3 changes: 1 addition & 2 deletions crates/codegen/runtime/generator/src/parser/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ mod scanner_definition;
mod trie;
mod versioned;

pub use keyword_scanner_definition::KeywordScannerDefinitionCodegen;
pub use keyword_scanner_definition::{KeywordScannerAtomic, KeywordScannerDefinitionCodegen};
pub use parser_definition::ParserDefinitionCodegen;
pub use precedence_parser_definition::PrecedenceParserDefinitionCodegen;
pub use scanner_definition::ScannerDefinitionCodegen;
pub use trie::Trie;
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
use std::rc::Rc;

use codegen_language_definition::model;
use proc_macro2::TokenStream;
use quote::{format_ident, quote};

use crate::parser::codegen::scanner_definition::ScannerDefinitionNodeCodegen as _;
use crate::parser::codegen::scanner_definition::ScannerExt as _;
use crate::parser::codegen::versioned::VersionedQuote;
use crate::parser::grammar::{KeywordScannerDefinitionRef, ScannerDefinitionNode};

pub trait KeywordScannerDefinitionCodegen {
fn to_scanner_code(&self) -> TokenStream;
}

impl KeywordScannerDefinitionCodegen for KeywordScannerDefinitionRef {
impl KeywordScannerDefinitionCodegen for model::KeywordItem {
fn to_scanner_code(&self) -> TokenStream {
let name_ident = format_ident!("{}", self.name());
let name_ident = format_ident!("{}", self.name);
let terminal_kind = quote! { TerminalKind::#name_ident };

let kw_scanners: Vec<_> = self
.definitions()
.definitions
.iter()
.map(|versioned_kw| {
let scanner = versioned_kw.value.to_scanner_code();
Expand Down Expand Up @@ -82,6 +83,54 @@ impl KeywordScannerDefinitionCodegen for KeywordScannerDefinitionRef {
impl KeywordScannerDefinitionCodegen for model::KeywordValue {
fn to_scanner_code(&self) -> TokenStream {
// This is a subset; let's reuse that
ScannerDefinitionNode::from(self.clone()).to_scanner_code()
model::Scanner::from(self.clone()).to_scanner_code()
}
}

/// A newtype wrapper around [`model::KeywordItem`] that only has a single atom value.
///
/// The main usage for this type is to construct a keyword trie, as trie will
/// only work with single atom values and keyword promotion needs to additionally account for
/// keyword reservation, rather than just literal presence.
#[derive(Clone)]
pub struct KeywordScannerAtomic(Rc<model::KeywordItem>);

impl KeywordScannerAtomic {
/// Wraps the keyword scanner definition if it is a single atom value.
pub fn try_from_def(def: &Rc<model::KeywordItem>) -> Option<Self> {
match def.definitions[..] {
[model::KeywordDefinition {
value: model::KeywordValue::Atom { .. },
..
}] => Some(Self(Rc::clone(def))),
_ => None,
}
}
}

impl std::ops::Deref for KeywordScannerAtomic {
type Target = Rc<model::KeywordItem>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl KeywordScannerAtomic {
pub fn definition(&self) -> &model::KeywordDefinition {
self.0
.definitions
.first()
.expect("KeywordScannerAtomic should have exactly one definition")
}

pub fn value(&self) -> &str {
match self.definition() {
model::KeywordDefinition {
value: model::KeywordValue::Atom { atom },
..
} => atom,
_ => unreachable!("KeywordScannerAtomic should have a single atom value"),
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ impl ParserDefinitionNodeCodegen for ParserDefinitionNode {

// Keyword scanner uses the promotion inside the parse_terminal
Self::KeywordScannerDefinition(scanner_definition) => {
let kind = format_ident!("{name}", name = scanner_definition.name());
let kind = format_ident!("{name}", name = scanner_definition.name);

let parse_terminal = if is_trivia {
format_ident!("parse_terminal")
Expand Down
Loading

0 comments on commit 583d04e

Please sign in to comment.