WIP: Add some more

Xanewok · Dec 23, 2023 · 0746345 · 0746345
1 parent 08b4396
commit 0746345
Show file tree

Hide file tree

Showing 32 changed files with 235 additions and 300 deletions.
diff --git a/crates/codegen/parser/runtime/src/lexer.rs b/crates/codegen/parser/runtime/src/lexer.rs
@@ -14,10 +14,8 @@ pub enum KeywordScan {
 pub trait Lexer {
     // Generated by the templating engine
     #[doc(hidden)]
-    fn next_token<LexCtx: IsLexicalContext>(
-        &self,
-        input: &mut ParserContext<'_>,
-    ) -> Option<TokenKind>;
+    fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>)
+        -> Vec<TokenKind>;
     // NOTE: These are context-insensitive
     #[doc(hidden)]
     fn leading_trivia(&self, input: &mut ParserContext<'_>) -> ParserResult;
@@ -35,7 +33,7 @@ pub trait Lexer {
         let start = input.position();
         let token = self.next_token::<LexCtx>(input);
         input.set_position(start);
-        token
+        token.first().copied()
     }
 
     /// Peeks the next significant (i.e. non-trivia) token. Does not advance the input.
@@ -49,7 +47,7 @@ pub trait Lexer {
         let token = self.next_token::<LexCtx>(input);
 
         input.set_position(start);
-        token
+        token.first().copied()
     }
 
     /// Attempts to consume the next expected token. Advances the input only if the token matches.
@@ -59,7 +57,7 @@ pub trait Lexer {
         kind: TokenKind,
     ) -> ParserResult {
         let start = input.position();
-        if self.next_token::<LexCtx>(input) != Some(kind) {
+        if !self.next_token::<LexCtx>(input).contains(&kind) {
             input.set_position(start);
             return ParserResult::no_match(vec![kind]);
         }
@@ -91,7 +89,7 @@ pub trait Lexer {
         }
 
         let start = input.position();
-        if self.next_token::<LexCtx>(input) != Some(kind) {
+        if !self.next_token::<LexCtx>(input).contains(&kind) {
             input.set_position(restore);
             return ParserResult::no_match(vec![kind]);
         }

diff --git a/crates/codegen/parser/runtime/src/support/recovery.rs b/crates/codegen/parser/runtime/src/support/recovery.rs
@@ -129,7 +129,7 @@ pub fn skip_until_with_nested_delims<L: Lexer, LexCtx: IsLexicalContext>(
     let mut local_delims = vec![];
     loop {
         let save = input.position();
-        match lexer.next_token::<LexCtx>(input) {
+        match lexer.next_token::<LexCtx>(input).first().copied() {
             // If we're not skipping past a local delimited group (delimiter stack is empty),
             // we can unwind on a token that's expected by us or by our ancestor.
             Some(token)

diff --git a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2
@@ -97,7 +97,7 @@ impl Language {
         fn {{ keyword_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> KeywordScan { {{ code }} }
     {%- endfor %}
 
-    pub fn scan(&self, lexical_context: LexicalContext, input: &str) -> Option<TokenKind> {
+    pub fn scan(&self, lexical_context: LexicalContext, input: &str) -> Vec<TokenKind> {
         let mut input = ParserContext::new(input);
         match lexical_context {
             {%- for context_name, context in code.scanner_contexts -%}
@@ -137,20 +137,23 @@ impl Lexer for Language {
         }
     }
 
-    fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>) -> Option<TokenKind> {
+    fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext<'_>) -> Vec<TokenKind> {
         let save = input.position();
         let mut furthest_position = input.position();
-        let mut longest_token = None;
-        // Whether we've seen a keyword
+        {# let mut longest_token = None; #}
         {# let mut ambiguous = None; #}
+        // Whether we've seen a keyword
         {# let mut results = [TokenKind::SKIPPED; 2]; #}
+        let mut longest_tokens = vec![];
 
         macro_rules! longest_match {
             ($( { $kind:ident = $function:ident } )*) => {
                 $(
                     if self.$function(input) && input.position() > furthest_position {
                         furthest_position = input.position();
-                        longest_token = Some(TokenKind::$kind);
+
+                        longest_tokens = vec![TokenKind::$kind];
+                        {# longest_token = Some(TokenKind::$kind); #}
                     }
                     input.set_position(save);
                 )*
@@ -160,18 +163,24 @@ impl Lexer for Language {
             ($( { $kind:ident = $function:ident } )*) => {
                 $(
                     match self.$function(input) {
-                        KeywordScan::Absent => {/* To do - rollback */},
+                        KeywordScan::Absent => {},
                         KeywordScan::Present => {
-                            // Only bump if we're strictly longer?
+                            if input.position() > furthest_position {
+                                furthest_position = input.position();
+                                longest_tokens = vec![TokenKind::$kind];
+                            } else if input.position() == furthest_position {
+                                longest_tokens.push(TokenKind::$kind);
+                            }
                         }
-                        KeywordScan::Reserved if input.position() >= furthest_position => {
-                            furthest_position = input.position();
-                            longest_token = Some(TokenKind::$kind);
-                            // We're running after the identifier and we're checking if it's a reserved keyword
-                            input.set_position(furthest_position);
-                            return longest_token;
+                        KeywordScan::Reserved => {
+                            // If we're reserved, we can't be ambiguous, so always overwrite, even if len is equal
+                            if input.position() >= furthest_position {
+                                furthest_position = input.position();
+                                longest_tokens = vec![TokenKind::$kind];
+                                // We're running after the identifier and we're checking if it's a reserved keyword
+                                input.set_position(furthest_position);
+                            }
                         }
-                        _ => {}
                     }
                     input.set_position(save);
                 )*
@@ -184,7 +193,8 @@ impl Lexer for Language {
                     // TODO: Handle keywords using a separate keword scanner promotion mechanism
                     if let Some(kind) = {{ context.literal_scanner }} {
                         furthest_position = input.position();
-                        longest_token = Some(kind);
+                        longest_tokens = vec![kind];
+                        {# longest_token = Some(kind); #}
                     }
                     input.set_position(save);
 
@@ -203,7 +213,26 @@ impl Lexer for Language {
             {%- endfor %}
         }
 
-        match longest_token {
+        match longest_tokens.as_slice() {
+            {# &[token, ..] => { #}
+                {# input.set_position(furthest_position); #}
+                {# vec![token] #}
+            {# } #}
+             // TODO: Handle returning ambiguous tokens
+            &[_, ..] => {
+                input.set_position(furthest_position);
+                longest_tokens
+                {# vec![token] #}
+            },
+            // Skip a character if possible and if we didn't recognize a token
+            &[] if input.peek().is_some() => {
+                let _ = input.next();
+                vec![TokenKind::SKIPPED]
+            },
+            &[] => vec![],
+        }
+
+        {# match longest_token {
             Some(..) => {
                 input.set_position(furthest_position);
                 longest_token
@@ -215,7 +244,7 @@ impl Lexer for Language {
             },
             // EOF
             None => None,
-        }
+        } #}
     }
 }
 
@@ -247,7 +276,7 @@ impl Language {
     }
 
     #[napi(js_name = "scan", ts_return_type = "kinds.TokenKind | null", catch_unwind)]
-    pub fn scan_napi(&self, lexical_context: LexicalContext, input: String) -> Option<TokenKind> {
+    pub fn scan_napi(&self, lexical_context: LexicalContext, input: String) -> Vec<TokenKind> {
         self.scan(lexical_context, input.as_str())
     }
 

diff --git a/crates/solidity/outputs/cargo/crate/src/generated/language.rs b/crates/solidity/outputs/cargo/crate/src/generated/language.rs
diff --git a/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs b/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs
diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/recovery.rs b/crates/solidity/outputs/cargo/crate/src/generated/support/recovery.rs
diff --git a/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs b/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs
@@ -26,6 +26,6 @@ fn test_next_token() {
         ("0ZZ", SKIPPED),
         ("0xabZZ", SKIPPED),
     ] {
-        assert_eq!(language.scan(LexicalContext::Default, s), Some(*k));
+        assert_eq!(language.scan(LexicalContext::Default, s), vec![*k]);
     }
 }