-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement (contextual) keywords and use their versioning from v2 (#723)
Closes #568 There is still one outstanding issue where we return a `Vec<TokenKind>` from `next_token`; it'd like to return a more specialized type and ideally pass it on stack (2x2 bytes), rather than on-heap (extra 3x8 bytes for the Vec handle + indirection). We should name it better and properly show that we can return at most 2 token kinds (single token kind or identifier + kw combo). To do: - [x] Return tokens from `next_token` via stack Apart from that, I think this is a more correct approach than #598, especially accounting for the new keyword definition format in DSL v2. The main change is that we only check the keyword trie and additionally the (newly introduced) compound keyword scanners only after the token has been lexed as an identifier. For each context, we collect Identifier scanners used by the keywords and attempt promotion there. The existing lexing performance is not impacted from what I've seen when running the sanctuary tests and I can verify (incl. CST tests) that we now properly parse source that uses contextual keywords (e.g. `from`) and that the compound keywords (e.g. `ufixedMxN`) are properly versioned. This adapts the existing `codegen_grammar` interface that's a leftover from DSLv1; I did that to work on finishing #638; once this is merged and we now properly parse contextual keywords, I'll move to clean it up and reduce the parser codegen indirection (right now we go from v2 -> v1 model -> code generator -> Tera templates; it'd like to at least cut out the v1 model and/or simplify visiting v2 from the existing `CodeGenerator`). Please excuse the WIP comments in the middle; the first and the last ones should make sense when reviewing. I can simplify this a bit for review, if needed.
- Loading branch information
Showing
93 changed files
with
9,493 additions
and
5,616 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@nomicfoundation/slang": minor | ||
--- | ||
|
||
Properly parse unreserved keywords in an identifier position, i.e. `from`, `emit`, `global` etc. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
crates/codegen/parser/generator/src/keyword_scanner_definition.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
use codegen_grammar::{ | ||
KeywordScannerDefinitionNode, KeywordScannerDefinitionRef, ScannerDefinitionNode, | ||
}; | ||
use proc_macro2::TokenStream; | ||
use quote::{format_ident, quote}; | ||
|
||
use crate::parser_definition::VersionQualityRangeVecExtensions; | ||
use crate::scanner_definition::ScannerDefinitionNodeExtensions; | ||
|
||
pub trait KeywordScannerDefinitionExtensions { | ||
fn to_scanner_code(&self) -> TokenStream; | ||
} | ||
|
||
impl KeywordScannerDefinitionExtensions for KeywordScannerDefinitionRef { | ||
fn to_scanner_code(&self) -> TokenStream { | ||
let name_ident = format_ident!("{}", self.name()); | ||
let token_kind = quote! { TokenKind::#name_ident }; | ||
|
||
let kw_scanners: Vec<_> = self | ||
.definitions() | ||
.iter() | ||
.map(|versioned_kw| { | ||
let scanner = versioned_kw.value.to_scanner_code(); | ||
let enabled_cond = versioned_kw.enabled.as_bool_expr(); | ||
let reserved_cond = versioned_kw.reserved.as_bool_expr(); | ||
|
||
// Simplify the emitted code if we trivially know that reserved or enabled is true | ||
match (&*reserved_cond.to_string(), &*enabled_cond.to_string()) { | ||
("true", _) => quote! { | ||
if #scanner { | ||
KeywordScan::Reserved(#token_kind) | ||
} else { | ||
KeywordScan::Absent | ||
} | ||
}, | ||
("false", _) => quote! { | ||
if #enabled_cond && #scanner { | ||
KeywordScan::Present(#token_kind) | ||
} else { | ||
KeywordScan::Absent | ||
} | ||
}, | ||
(_, "true") => quote! { | ||
if #scanner { | ||
if #reserved_cond { | ||
KeywordScan::Reserved(#token_kind) | ||
} else { | ||
KeywordScan::Present(#token_kind) | ||
} | ||
} else { | ||
KeywordScan::Absent | ||
} | ||
}, | ||
(_, "false") => quote! { | ||
if #reserved_cond && #scanner { | ||
KeywordScan::Reserved(#token_kind) | ||
} else { | ||
KeywordScan::Absent | ||
} | ||
}, | ||
_ => quote! { | ||
if (#reserved_cond || #enabled_cond) && #scanner { | ||
if #reserved_cond { | ||
KeywordScan::Reserved(#token_kind) | ||
} else { | ||
KeywordScan::Present(#token_kind) | ||
} | ||
} else { | ||
KeywordScan::Absent | ||
} | ||
}, | ||
} | ||
}) | ||
.collect(); | ||
|
||
match &kw_scanners[..] { | ||
[] => quote! { KeywordScan::Absent }, | ||
multiple => quote! { scan_keyword_choice!(input, ident, #(#multiple),*) }, | ||
} | ||
} | ||
} | ||
|
||
impl KeywordScannerDefinitionExtensions for KeywordScannerDefinitionNode { | ||
fn to_scanner_code(&self) -> TokenStream { | ||
// This is a subset; let's reuse that | ||
ScannerDefinitionNode::from(self.clone()).to_scanner_code() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.