Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove EndOfFileTrivia and always collect trivia after top-level parsing #733

Merged
merged 8 commits into from
Jan 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,7 @@ impl Selector {
RustNamedNode {
name: _,
node: RustNode::Rule(rule),
} if matches!(
rule.kind,
RuleKind::LeadingTrivia | RuleKind::TrailingTrivia | RuleKind::EndOfFileTrivia
) => {
} if rule.kind.is_trivia() => {
// skip trivia, since it's not part of the AST
self.index += 1;
continue;
Expand Down
40 changes: 35 additions & 5 deletions crates/codegen/parser/runtime/src/support/parser_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::rc::Rc;

use crate::cst::{self, NamedNode};
use crate::kinds::TokenKind;
use crate::lexer::Lexer;
use crate::parse_error::ParseError;
use crate::parse_output::ParseOutput;
use crate::support::context::ParserContext;
Expand All @@ -12,16 +13,43 @@ pub trait ParserFunction<L>
where
Self: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str) -> ParseOutput;
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput;
}

impl<L, F> ParserFunction<L> for F
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
where
L: Lexer,
F: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str) -> ParseOutput {
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput {
let mut stream = ParserContext::new(input);
let result = self(language, &mut stream);
let mut result = self(language, &mut stream);

// For a succesful/recovered parse, collect any remaining trivia as part of the parse result
// TODO(#737): Remove this once we unconditionally collect trivia
if collect_trivia {
if let ParserResult::Match(r#match) = &mut result {
let [topmost] = r#match.nodes.as_mut_slice() else {
unreachable!(
"Match at the top level of a parse does not have exactly one Rule node"
)
};

let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
Some(eof_trivia.nodes)
}
_ => None,
};

if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
let mut new_children = rule.children.clone();
new_children.extend(eof_trivia);

topmost.node = cst::Node::rule(rule.kind, new_children);
}
}
}

let is_incomplete = matches!(result, ParserResult::IncompleteMatch(_));
let is_recovering = matches!(result, ParserResult::SkippedUntil(_));
Expand Down Expand Up @@ -67,7 +95,6 @@ where

let start = stream.position();

let errors = stream.into_errors();
// Mark the rest of the unconsumed stream as skipped and report an error
// NOTE: IncompleteMatch internally consumes the stream when picked via choice,
// so needs a separate check here.
Expand All @@ -81,7 +108,8 @@ where
cst::Node::token(TokenKind::SKIPPED, input[start.utf8..].to_string());
let mut new_children = topmost_rule.children.clone();
new_children.push(NamedNode::anonymous(skipped_node));
let mut errors = errors;

let mut errors = stream.into_errors();
errors.push(ParseError::new_covering_range(
start..input.into(),
expected_tokens,
Expand All @@ -93,6 +121,8 @@ where
}
} else {
let parse_tree = cst::Node::Rule(topmost_rule);
let errors = stream.into_errors();

// Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes
debug_assert_eq!(
errors.is_empty(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ impl Language {
pub fn parse(&self, kind: RuleKind, input: &str) -> ParseOutput {
match kind {
{%- for parser_name, _code in code.parser_functions -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input),
{# TODO(#737): Remove the special case once we stop generating RuleKind for trivia #}
{%- if parser_name is ending_with("Trivia") -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, false),
{%- else -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, true),
{%- endif -%}
{%- endfor -%}
}
}
Expand Down
52 changes: 2 additions & 50 deletions crates/solidity/inputs/language/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ pub trait GrammarConstructorDslV2 {
}

impl GrammarConstructorDslV2 for Grammar {
#[allow(clippy::too_many_lines)] // TODO: Remove me once the hack below is removed
fn from_dsl_v2(lang: &model::Language) -> Grammar {
// Collect language items into a lookup table to speed up resolution
let mut items: HashMap<_, _> = lang
let items: HashMap<_, _> = lang
.items_with_section()
.map(|(_, topic, item)| {
(
Expand All @@ -35,39 +34,6 @@ impl GrammarConstructorDslV2 for Grammar {
})
.collect();

// TODO(#638): To minimize regression in the parser migration, we keep the existing DSL v1 model
// of SourceUnit being followed by `EndOfFileTrivia`.
items.insert(
Identifier::from("SourceUnit"),
(
None,
model::Item::Struct {
item: Rc::new(model::StructItem {
name: Identifier::from("SourceUnit"),
enabled: None,
error_recovery: None,
fields: IndexMap::from_iter([
(
Identifier::from("members"),
model::Field::Optional {
reference: Identifier::from("SourceUnitMembers"),

enabled: None,
},
),
(
Identifier::from("eof_trivia"),
model::Field::Optional {
reference: Identifier::from("EndOfFileTrivia"),
enabled: None,
},
),
]),
}),
},
),
);

let mut resolved = HashMap::new();
let mut ctx = ResolveCtx {
items: &items,
Expand All @@ -84,16 +50,6 @@ impl GrammarConstructorDslV2 for Grammar {
def: resolve_trivia(lang.trailing_trivia.clone(), &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

let eof_trivia = Rc::new(NamedTriviaParser {
name: "EndOfFileTrivia",
def: resolve_trivia(lang.leading_trivia.clone(), &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

ctx.resolved.insert(
Identifier::from("EndOfFileTrivia"),
eof_trivia.clone().into(),
);

for (_lex_ctx, item) in items.values() {
resolve_grammar_element(item.name(), &mut ctx);
}
Expand Down Expand Up @@ -151,7 +107,7 @@ impl GrammarConstructorDslV2 for Grammar {
trailing_trivia_parser: trailing_trivia.clone(),
elements: resolved_items
.chain(
[leading_trivia, trailing_trivia, eof_trivia]
[leading_trivia, trailing_trivia]
.into_iter()
.map(|elem| (elem.name(), elem.into())),
)
Expand Down Expand Up @@ -321,10 +277,6 @@ struct ResolveCtx<'a> {

#[allow(clippy::too_many_lines)] // FIXME: Simplify me when we simplify the v2-to-v1 interface
fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> GrammarElement {
if ident.as_str() == "EndOfFileTrivia" {
return ctx.resolved.get(ident).unwrap().clone();
}

let (lex_ctx, elem) = ctx.items.get(ident).expect("Missing item");

// FIXME: Don't leak
Expand Down
3 changes: 0 additions & 3 deletions crates/solidity/outputs/cargo/crate/src/generated/kinds.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading