Skip to content

Commit

Permalink
WIP: Add some more
Browse files Browse the repository at this point in the history
  • Loading branch information
Xanewok committed Dec 23, 2023
1 parent 08b4396 commit 0746345
Show file tree
Hide file tree
Showing 32 changed files with 235 additions and 300 deletions.
14 changes: 6 additions & 8 deletions crates/codegen/parser/runtime/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ pub enum KeywordScan {
pub trait Lexer {
// Generated by the templating engine
#[doc(hidden)]
fn next_token<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext<'_>,
) -> Option<TokenKind>;
fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>)
-> Vec<TokenKind>;
// NOTE: These are context-insensitive
#[doc(hidden)]
fn leading_trivia(&self, input: &mut ParserContext<'_>) -> ParserResult;
Expand All @@ -35,7 +33,7 @@ pub trait Lexer {
let start = input.position();
let token = self.next_token::<LexCtx>(input);
input.set_position(start);
token
token.first().copied()
}

/// Peeks the next significant (i.e. non-trivia) token. Does not advance the input.
Expand All @@ -49,7 +47,7 @@ pub trait Lexer {
let token = self.next_token::<LexCtx>(input);

input.set_position(start);
token
token.first().copied()
}

/// Attempts to consume the next expected token. Advances the input only if the token matches.
Expand All @@ -59,7 +57,7 @@ pub trait Lexer {
kind: TokenKind,
) -> ParserResult {
let start = input.position();
if self.next_token::<LexCtx>(input) != Some(kind) {
if !self.next_token::<LexCtx>(input).contains(&kind) {
input.set_position(start);
return ParserResult::no_match(vec![kind]);
}
Expand Down Expand Up @@ -91,7 +89,7 @@ pub trait Lexer {
}

let start = input.position();
if self.next_token::<LexCtx>(input) != Some(kind) {
if !self.next_token::<LexCtx>(input).contains(&kind) {
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/runtime/src/support/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pub fn skip_until_with_nested_delims<L: Lexer, LexCtx: IsLexicalContext>(
let mut local_delims = vec![];
loop {
let save = input.position();
match lexer.next_token::<LexCtx>(input) {
match lexer.next_token::<LexCtx>(input).first().copied() {
// If we're not skipping past a local delimited group (delimiter stack is empty),
// we can unwind on a token that's expected by us or by our ancestor.
Some(token)
Expand Down
65 changes: 47 additions & 18 deletions crates/codegen/parser/runtime/src/templates/language.rs.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ impl Language {
fn {{ keyword_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> KeywordScan { {{ code }} }
{%- endfor %}

pub fn scan(&self, lexical_context: LexicalContext, input: &str) -> Option<TokenKind> {
pub fn scan(&self, lexical_context: LexicalContext, input: &str) -> Vec<TokenKind> {
let mut input = ParserContext::new(input);
match lexical_context {
{%- for context_name, context in code.scanner_contexts -%}
Expand Down Expand Up @@ -137,20 +137,23 @@ impl Lexer for Language {
}
}

fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>) -> Option<TokenKind> {
fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>) -> Vec<TokenKind> {
let save = input.position();
let mut furthest_position = input.position();
let mut longest_token = None;
// Whether we've seen a keyword
{# let mut longest_token = None; #}
{# let mut ambiguous = None; #}
// Whether we've seen a keyword
{# let mut results = [TokenKind::SKIPPED; 2]; #}
let mut longest_tokens = vec![];

macro_rules! longest_match {
($( { $kind:ident = $function:ident } )*) => {
$(
if self.$function(input) && input.position() > furthest_position {
furthest_position = input.position();
longest_token = Some(TokenKind::$kind);

longest_tokens = vec![TokenKind::$kind];
{# longest_token = Some(TokenKind::$kind); #}
}
input.set_position(save);
)*
Expand All @@ -160,18 +163,24 @@ impl Lexer for Language {
($( { $kind:ident = $function:ident } )*) => {
$(
match self.$function(input) {
KeywordScan::Absent => {/* To do - rollback */},
KeywordScan::Absent => {},
KeywordScan::Present => {
// Only bump if we're strictly longer?
if input.position() > furthest_position {
furthest_position = input.position();
longest_tokens = vec![TokenKind::$kind];
} else if input.position() == furthest_position {
longest_tokens.push(TokenKind::$kind);
}
}
KeywordScan::Reserved if input.position() >= furthest_position => {
furthest_position = input.position();
longest_token = Some(TokenKind::$kind);
// We're running after the identifier and we're checking if it's a reserved keyword
input.set_position(furthest_position);
return longest_token;
KeywordScan::Reserved => {
// If we're reserved, we can't be ambiguous, so always overwrite, even if len is equal
if input.position() >= furthest_position {
furthest_position = input.position();
longest_tokens = vec![TokenKind::$kind];
// We're running after the identifier and we're checking if it's a reserved keyword
input.set_position(furthest_position);
}
}
_ => {}
}
input.set_position(save);
)*
Expand All @@ -184,7 +193,8 @@ impl Lexer for Language {
// TODO: Handle keywords using a separate keword scanner promotion mechanism
if let Some(kind) = {{ context.literal_scanner }} {
furthest_position = input.position();
longest_token = Some(kind);
longest_tokens = vec![kind];
{# longest_token = Some(kind); #}
}
input.set_position(save);

Expand All @@ -203,7 +213,26 @@ impl Lexer for Language {
{%- endfor %}
}

match longest_token {
match longest_tokens.as_slice() {
{# &[token, ..] => { #}
{# input.set_position(furthest_position); #}
{# vec![token] #}
{# } #}
// TODO: Handle returning ambiguous tokens
&[_, ..] => {
input.set_position(furthest_position);
longest_tokens
{# vec![token] #}
},
// Skip a character if possible and if we didn't recognize a token
&[] if input.peek().is_some() => {
let _ = input.next();
vec![TokenKind::SKIPPED]
},
&[] => vec![],
}

{# match longest_token {
Some(..) => {
input.set_position(furthest_position);
longest_token
Expand All @@ -215,7 +244,7 @@ impl Lexer for Language {
},
// EOF
None => None,
}
} #}
}
}

Expand Down Expand Up @@ -247,7 +276,7 @@ impl Language {
}

#[napi(js_name = "scan", ts_return_type = "kinds.TokenKind | null", catch_unwind)]
pub fn scan_napi(&self, lexical_context: LexicalContext, input: String) -> Option<TokenKind> {
pub fn scan_napi(&self, lexical_context: LexicalContext, input: String) -> Vec<TokenKind> {
self.scan(lexical_context, input.as_str())
}

Expand Down
58 changes: 34 additions & 24 deletions crates/solidity/outputs/cargo/crate/src/generated/language.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 6 additions & 8 deletions crates/solidity/outputs/cargo/crate/src/generated/lexer.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/solidity/outputs/cargo/tests/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ fn test_next_token() {
("0ZZ", SKIPPED),
("0xabZZ", SKIPPED),
] {
assert_eq!(language.scan(LexicalContext::Default, s), Some(*k));
assert_eq!(language.scan(LexicalContext::Default, s), vec![*k]);
}
}
Loading

0 comments on commit 0746345

Please sign in to comment.