From 71f3d1b7ffffdd9e45bf179ba34f5b3a4b5c4b01 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Mon, 4 Mar 2024 15:55:16 +0100 Subject: [PATCH] feat: Allow specifying token match threshold for delimited recovery This is required to resolve syntax ambiguities, since we can't always safely attempt recovering from lookahead of 2 and more, by default. --- crates/codegen/grammar/src/constructor.rs | 6 +- .../codegen/grammar/src/parser_definition.rs | 28 ++--- .../parse_input_tokens/external_types.rs | 9 ++ .../write_output_tokens/external_types.rs | 10 ++ .../src/model/non_terminals/field.rs | 9 +- .../internal_macros/src/derive/spanned.rs | 2 +- .../parser/generator/src/parser_definition.rs | 13 +-- .../parser/runtime/src/parser_support/mod.rs | 2 +- .../src/parser_support/parser_result.rs | 29 ++++- .../runtime/src/parser_support/recovery.rs | 33 +++--- .../runtime/src/templates/language.rs.jinja2 | 2 +- .../inputs/language/src/definition.rs | 8 +- .../slang_solidity/src/generated/language.rs | 104 +++++++++--------- .../src/generated/parser_support/mod.rs | 2 +- .../generated/parser_support/parser_result.rs | 29 ++++- .../src/generated/parser_support/recovery.rs | 33 +++--- .../try_catch/generated/0.6.2-failure.yml | 51 --------- .../slang_testlang/src/generated/language.rs | 6 +- .../src/generated/parser_support/mod.rs | 2 +- .../generated/parser_support/parser_result.rs | 29 ++++- .../src/generated/parser_support/recovery.rs | 33 +++--- 21 files changed, 238 insertions(+), 202 deletions(-) delete mode 100644 crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/generated/0.6.2-failure.yml diff --git a/crates/codegen/grammar/src/constructor.rs b/crates/codegen/grammar/src/constructor.rs index d785433784..4f17b8d464 100644 --- a/crates/codegen/grammar/src/constructor.rs +++ b/crates/codegen/grammar/src/constructor.rs @@ -10,7 +10,7 @@ use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier, use indexmap::IndexMap; use crate::{ - DelimitedRecoveryOpts, Grammar, GrammarElement, KeywordScannerDefinition, + DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition, KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition, PrecedenceParserDefinitionNode, ScannerDefinition, ScannerDefinitionNode, @@ -602,8 +602,8 @@ fn resolve_sequence_like( let open = delims.next().unwrap(); let close = delims.next().unwrap(); - let opts = DelimitedRecoveryOpts::from(delimiters); - ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close, opts) + let threshold = DelimitedRecoveryTokenThreshold::from(delimiters); + ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close, threshold) }; // Replace with a new delimited node fields.insert( diff --git a/crates/codegen/grammar/src/parser_definition.rs b/crates/codegen/grammar/src/parser_definition.rs index 4fb90388ce..8f1bf3c0ab 100644 --- a/crates/codegen/grammar/src/parser_definition.rs +++ b/crates/codegen/grammar/src/parser_definition.rs @@ -55,22 +55,22 @@ impl Visitable for TriviaParserDefinitionRef { } } +/// How many tokens have to be matched to trigger the error recovery. +/// For ambiguous syntaxes this needs to be set to at least N, where N +/// is the token lookahead required to disambiguate the syntax. +/// +// By default, we assume no lookahead is required to recover from +// unrecognized body between delimiters. #[derive(Clone, Debug, Default)] -pub struct DelimitedRecoveryOpts { - /// Whether completely unmatched body between the delimiters should - /// prevent the the error recovery from being applied. - /// This is generally safe but sometimes needs to be disabled if the - /// recovery would lead to a misparse in case of ambiguous input. - pub disallow_unmatched_body: bool, -} +pub struct DelimitedRecoveryTokenThreshold(pub u8); -impl From for DelimitedRecoveryOpts { +impl From for DelimitedRecoveryTokenThreshold { fn from(delimiters: model::FieldDelimiters) -> Self { - Self { - disallow_unmatched_body: delimiters - .disallow_unmatched_body - .unwrap_or(DelimitedRecoveryOpts::default().disallow_unmatched_body), - } + Self( + delimiters + .tokens_matched_acceptance_threshold + .unwrap_or(DelimitedRecoveryTokenThreshold::default().0), + ) } } @@ -91,7 +91,7 @@ pub enum ParserDefinitionNode { Labeled>, Box, Labeled>, - DelimitedRecoveryOpts, + DelimitedRecoveryTokenThreshold, ), SeparatedBy(Labeled>, Labeled>), TerminatedBy(Box, Labeled>), diff --git a/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs b/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs index b674402754..6f9a17d844 100644 --- a/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs +++ b/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs @@ -140,6 +140,15 @@ impl ParseInputTokens for usize { } } +impl ParseInputTokens for u8 { + fn parse_value(input: ParseStream<'_>, _: &mut ErrorsCollection) -> Result { + let literal = ParseHelpers::syn::(input)?; + let value = literal.base10_parse::()?; + + Ok(value) + } +} + impl ParseInputTokens for Vec { fn parse_value(input: ParseStream<'_>, errors: &mut ErrorsCollection) -> Result { Ok(ParseHelpers::sequence(input, errors)) diff --git a/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs b/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs index fc477476bb..ac4fa8f48e 100644 --- a/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs +++ b/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs @@ -119,6 +119,16 @@ impl WriteOutputTokens for usize { } } +impl WriteOutputTokens for u8 { + fn write_output_tokens(&self) -> TokenStream { + let value = Literal::u8_suffixed(*self); + + quote! { + #value.into() + } + } +} + impl WriteOutputTokens for Vec { fn write_output_tokens(&self) -> TokenStream { let items = self.iter().map(T::write_output_tokens); diff --git a/crates/codegen/language/definition/src/model/non_terminals/field.rs b/crates/codegen/language/definition/src/model/non_terminals/field.rs index a53e1dfdbb..5a31c1ed56 100644 --- a/crates/codegen/language/definition/src/model/non_terminals/field.rs +++ b/crates/codegen/language/definition/src/model/non_terminals/field.rs @@ -15,12 +15,11 @@ pub struct FieldsErrorRecovery { pub struct FieldDelimiters { pub open: Identifier, pub close: Identifier, - /// Whether completely unmatched body between the delimiters should - /// prevent the the error recovery from being applied. - /// - /// This is generally safe but sometimes needs to be disabled if the - /// recovery would lead to a misparse in case of ambiguous input. pub disallow_unmatched_body: Option, + /// How many tokens have to be matched to trigger the error recovery. + /// For ambiguous syntaxes this needs to be set to at least N, where N + /// is the token lookahead required to disambiguate the syntax. + pub tokens_matched_acceptance_threshold: Option, } #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] diff --git a/crates/codegen/language/internal_macros/src/derive/spanned.rs b/crates/codegen/language/internal_macros/src/derive/spanned.rs index f83fa812a3..5358569296 100644 --- a/crates/codegen/language/internal_macros/src/derive/spanned.rs +++ b/crates/codegen/language/internal_macros/src/derive/spanned.rs @@ -159,7 +159,7 @@ fn get_spanned_type(input: Type) -> Type { } // External types should also be wrapped in 'Spanned': - "bool" | "char" | "PathBuf" | "String" | "Version" => { + "bool" | "u8" | "char" | "PathBuf" | "String" | "Version" => { parse_quote! { crate::internals::Spanned<#input> } diff --git a/crates/codegen/parser/generator/src/parser_definition.rs b/crates/codegen/parser/generator/src/parser_definition.rs index 07f0bc90e2..486c583bc5 100644 --- a/crates/codegen/parser/generator/src/parser_definition.rs +++ b/crates/codegen/parser/generator/src/parser_definition.rs @@ -191,7 +191,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { quote! { self.#function_name(input) } } - Self::DelimitedBy(open, body, close, opts) => { + Self::DelimitedBy(open, body, close, threshold) => { let open_label = format_ident!("{}", open.label.to_pascal_case()); let close_label = format_ident!("{}", close.label.to_pascal_case()); let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) { @@ -201,11 +201,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { ) => [open, close].map(|scanner| format_ident!("{}", scanner.name())), _ => unreachable!("Only tokens are permitted as delimiters"), }; - let recover = if opts.disallow_unmatched_body { - quote! { RecoverFromNoMatch::No } - } else { - quote! { RecoverFromNoMatch::Yes } - }; + let threshold = threshold.0; let parser = body.to_parser_code(context_name, is_trivia); let body_parser = body.applicable_version_quality_ranges().wrap_code( @@ -214,7 +210,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { .recover_until_with_nested_delims::<_, #lex_ctx>(input, self, TokenKind::#close_delim, - #recover, + TokenAcceptanceThreshold(#threshold), ) )?; }, @@ -280,7 +276,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode { .recover_until_with_nested_delims::<_, #lex_ctx>(input, self, TokenKind::#terminator, - RecoverFromNoMatch::No, + // Requires at least a partial match not to risk misparsing + TokenAcceptanceThreshold(1u8), ) )?; }, diff --git a/crates/codegen/parser/runtime/src/parser_support/mod.rs b/crates/codegen/parser/runtime/src/parser_support/mod.rs index 8c3d83af20..2673e34ebf 100644 --- a/crates/codegen/parser/runtime/src/parser_support/mod.rs +++ b/crates/codegen/parser/runtime/src/parser_support/mod.rs @@ -26,7 +26,7 @@ pub(crate) use parser_result::ParserResult; #[allow(unused_imports)] pub(crate) use precedence_helper::PrecedenceHelper; #[allow(unused_imports)] -pub(crate) use recovery::RecoverFromNoMatch; +pub(crate) use recovery::TokenAcceptanceThreshold; #[allow(unused_imports)] pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper}; #[allow(unused_imports)] diff --git a/crates/codegen/parser/runtime/src/parser_support/parser_result.rs b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs index e74efbea5b..37213ed76a 100644 --- a/crates/codegen/parser/runtime/src/parser_support/parser_result.rs +++ b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs @@ -1,6 +1,6 @@ use std::ops::ControlFlow; -use crate::cst::{self, LabeledNode}; +use crate::cst::{self, LabeledNode, Node}; use crate::kinds::{NodeLabel, RuleKind, TokenKind}; use crate::text_index::TextIndex; @@ -201,6 +201,33 @@ impl IncompleteMatch { expected_tokens, } } + + /// Whether this prefix-matched at least `n` (non-skipped) tokens. + pub fn matches_at_least_n_tokens(&self, n: u8) -> bool { + let result = self + .nodes + .iter() + .flat_map(|node| node.cursor_with_offset(TextIndex::ZERO)) + .try_fold(0u8, |mut acc, node| { + match node { + Node::Token(tok) if tok.kind != TokenKind::SKIPPED => { + acc += 1; + } + _ => {} + } + + // Short-circuit not to walk the whole tree if we've already matched enough + if acc >= n { + ControlFlow::Break(acc) + } else { + ControlFlow::Continue(acc) + } + }); + + match result { + ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n, + } + } } #[derive(PartialEq, Eq, Clone, Debug)] diff --git a/crates/codegen/parser/runtime/src/parser_support/recovery.rs b/crates/codegen/parser/runtime/src/parser_support/recovery.rs index 94b52036c9..2e62815e55 100644 --- a/crates/codegen/parser/runtime/src/parser_support/recovery.rs +++ b/crates/codegen/parser/runtime/src/parser_support/recovery.rs @@ -7,18 +7,11 @@ use crate::parser_support::parser_result::SkippedUntil; use crate::parser_support::ParserResult; use crate::text_index::{TextRange, TextRangeExtensions as _}; -/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method. +/// How many tokens have to be matched to trigger the error recovery. +/// For ambiguous syntaxes this needs to be set to at least N, where N +/// is the token lookahead required to disambiguate the syntax. #[derive(Clone, Copy)] -pub(crate) enum RecoverFromNoMatch { - Yes, - No, -} - -impl RecoverFromNoMatch { - pub fn as_bool(self) -> bool { - matches!(self, RecoverFromNoMatch::Yes) - } -} +pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8); fn opt_parse( input: &mut ParserContext<'_>, @@ -46,7 +39,7 @@ impl ParserResult { input: &mut ParserContext<'_>, lexer: &L, expected: TokenKind, - recover_from_no_match: RecoverFromNoMatch, + acceptance_threshold: TokenAcceptanceThreshold, ) -> ParserResult { enum ParseResultKind { Match, @@ -57,11 +50,15 @@ impl ParserResult { let before_recovery = input.position(); let (mut nodes, mut expected_tokens, result_kind) = match self { - ParserResult::IncompleteMatch(result) => ( - result.nodes, - result.expected_tokens, - ParseResultKind::Incomplete, - ), + ParserResult::IncompleteMatch(result) + if result.matches_at_least_n_tokens(acceptance_threshold.0) => + { + ( + result.nodes, + result.expected_tokens, + ParseResultKind::Incomplete, + ) + } ParserResult::Match(result) if lexer .peek_token_with_trivia::(input) @@ -70,7 +67,7 @@ impl ParserResult { { (result.nodes, result.expected_tokens, ParseResultKind::Match) } - ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => { + ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => { (vec![], result.expected_tokens, ParseResultKind::NoMatch) } // No need to recover, so just return as-is. diff --git a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 index f597691060..8343d5e29b 100644 --- a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 @@ -22,7 +22,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput; use crate::parse_output::ParseOutput; use crate::parser_support::{ ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult, - PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper, + PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper, }; #[derive(Debug)] diff --git a/crates/solidity/inputs/language/src/definition.rs b/crates/solidity/inputs/language/src/definition.rs index c10ba93709..f5540387eb 100644 --- a/crates/solidity/inputs/language/src/definition.rs +++ b/crates/solidity/inputs/language/src/definition.rs @@ -3349,10 +3349,10 @@ codegen_language_macros::compile!(Language( open = open_brace, close = close_brace, // NOTE: Despite `NamedArguments` requiring at least one element, - // we need to disable attempting to recover from empty elements, - // because this postfix is ambiguous with `try {} catch {}` - // and could lead to incorrect parsing if we recover past valid syntax. - disallow_unmatched_body = true + // we can only recover if we found at least two tokens (`ident:`) + // in the body, as this may be otherwise ambiguous with + // `try { func() } catch {}`. + tokens_matched_acceptance_threshold = 2 ) ), fields = ( diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs index da14499d52..e7b2ce370b 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs @@ -24,7 +24,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput; use crate::parse_output::ParseOutput; use crate::parser_support::{ ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult, - PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper, + PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper, }; #[derive(Debug)] @@ -327,7 +327,7 @@ impl Language { input, self, TokenKind::CloseBracket, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -408,7 +408,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -558,7 +558,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -586,7 +586,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -763,7 +763,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -876,7 +876,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -940,7 +940,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -1050,7 +1050,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -1102,7 +1102,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -1120,7 +1120,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -1225,7 +1225,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -1277,7 +1277,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -1365,7 +1365,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -1441,7 +1441,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -1497,7 +1497,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -1570,7 +1570,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -2104,7 +2104,7 @@ impl Language { TokenKind::OpenBrace, ), )?; - seq . elem (self . named_arguments (input) . with_label (NodeLabel :: Arguments) . recover_until_with_nested_delims :: < _ , LexicalContextType :: Default > (input , self , TokenKind :: CloseBrace , RecoverFromNoMatch :: No ,)) ? ; + seq . elem (self . named_arguments (input) . with_label (NodeLabel :: Arguments) . recover_until_with_nested_delims :: < _ , LexicalContextType :: Default > (input , self , TokenKind :: CloseBrace , TokenAcceptanceThreshold (2u8) ,)) ? ; seq.elem_labeled( NodeLabel::CloseBrace, self.parse_token_with_trivia::( @@ -2167,7 +2167,7 @@ impl Language { input, self, TokenKind::CloseBracket, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -2320,7 +2320,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -2459,7 +2459,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -2846,7 +2846,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -2925,7 +2925,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3003,7 +3003,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -3139,7 +3139,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3232,7 +3232,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3327,7 +3327,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3541,7 +3541,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3587,7 +3587,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3768,7 +3768,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3855,7 +3855,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3914,7 +3914,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -3986,7 +3986,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4116,7 +4116,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4171,7 +4171,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4352,7 +4352,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4566,7 +4566,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -4602,7 +4602,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4639,7 +4639,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4756,7 +4756,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -4782,7 +4782,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -4816,7 +4816,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -4891,7 +4891,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -4934,7 +4934,7 @@ impl Language { input, self, TokenKind::CloseBracket, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -5208,7 +5208,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -5281,7 +5281,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -5375,7 +5375,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -5518,7 +5518,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -5841,7 +5841,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -5917,7 +5917,7 @@ impl Language { input, self, TokenKind::CloseBrace, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -6417,7 +6417,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( @@ -6694,7 +6694,7 @@ impl Language { input, self, TokenKind::CloseParen, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/mod.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/mod.rs index cf8ccbaad9..2d005d87d3 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/mod.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/mod.rs @@ -28,7 +28,7 @@ pub(crate) use parser_result::ParserResult; #[allow(unused_imports)] pub(crate) use precedence_helper::PrecedenceHelper; #[allow(unused_imports)] -pub(crate) use recovery::RecoverFromNoMatch; +pub(crate) use recovery::TokenAcceptanceThreshold; #[allow(unused_imports)] pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper}; #[allow(unused_imports)] diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs index 138ba829a5..50146f58c7 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/parser_result.rs @@ -2,7 +2,7 @@ use std::ops::ControlFlow; -use crate::cst::{self, LabeledNode}; +use crate::cst::{self, LabeledNode, Node}; use crate::kinds::{NodeLabel, RuleKind, TokenKind}; use crate::text_index::TextIndex; @@ -203,6 +203,33 @@ impl IncompleteMatch { expected_tokens, } } + + /// Whether this prefix-matched at least `n` (non-skipped) tokens. + pub fn matches_at_least_n_tokens(&self, n: u8) -> bool { + let result = self + .nodes + .iter() + .flat_map(|node| node.cursor_with_offset(TextIndex::ZERO)) + .try_fold(0u8, |mut acc, node| { + match node { + Node::Token(tok) if tok.kind != TokenKind::SKIPPED => { + acc += 1; + } + _ => {} + } + + // Short-circuit not to walk the whole tree if we've already matched enough + if acc >= n { + ControlFlow::Break(acc) + } else { + ControlFlow::Continue(acc) + } + }); + + match result { + ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n, + } + } } #[derive(PartialEq, Eq, Clone, Debug)] diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/recovery.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/recovery.rs index 563d3a60bf..2a53c35945 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/recovery.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/parser_support/recovery.rs @@ -9,18 +9,11 @@ use crate::parser_support::parser_result::SkippedUntil; use crate::parser_support::ParserResult; use crate::text_index::{TextRange, TextRangeExtensions as _}; -/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method. +/// How many tokens have to be matched to trigger the error recovery. +/// For ambiguous syntaxes this needs to be set to at least N, where N +/// is the token lookahead required to disambiguate the syntax. #[derive(Clone, Copy)] -pub(crate) enum RecoverFromNoMatch { - Yes, - No, -} - -impl RecoverFromNoMatch { - pub fn as_bool(self) -> bool { - matches!(self, RecoverFromNoMatch::Yes) - } -} +pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8); fn opt_parse( input: &mut ParserContext<'_>, @@ -48,7 +41,7 @@ impl ParserResult { input: &mut ParserContext<'_>, lexer: &L, expected: TokenKind, - recover_from_no_match: RecoverFromNoMatch, + acceptance_threshold: TokenAcceptanceThreshold, ) -> ParserResult { enum ParseResultKind { Match, @@ -59,11 +52,15 @@ impl ParserResult { let before_recovery = input.position(); let (mut nodes, mut expected_tokens, result_kind) = match self { - ParserResult::IncompleteMatch(result) => ( - result.nodes, - result.expected_tokens, - ParseResultKind::Incomplete, - ), + ParserResult::IncompleteMatch(result) + if result.matches_at_least_n_tokens(acceptance_threshold.0) => + { + ( + result.nodes, + result.expected_tokens, + ParseResultKind::Incomplete, + ) + } ParserResult::Match(result) if lexer .peek_token_with_trivia::(input) @@ -72,7 +69,7 @@ impl ParserResult { { (result.nodes, result.expected_tokens, ParseResultKind::Match) } - ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => { + ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => { (vec![], result.expected_tokens, ParseResultKind::NoMatch) } // No need to recover, so just return as-is. diff --git a/crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/generated/0.6.2-failure.yml b/crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/generated/0.6.2-failure.yml deleted file mode 100644 index 5ce42e09f4..0000000000 --- a/crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/generated/0.6.2-failure.yml +++ /dev/null @@ -1,51 +0,0 @@ -# This file is generated automatically by infrastructure scripts. Please don't edit by hand. - -Source: > - 1 │ // Make sure that error recovery won't lead to misparsing │ 0..57 - 2 │ // ambiguous function call options with the block following the try expression │ 58..136 - 3 │ try a.b() {ident();} catch {} │ 137..166 - -Errors: # 2 total - - > - Error: Expected Colon. - ╭─[crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/input.sol:3:17] - │ - 3 │ try a.b() {ident();} catch {} - │ ─┬─ - │ ╰─── Error occurred here. - ───╯ - - > - Error: Expected OpenBrace or ReturnsKeyword. - ╭─[crates/solidity/testing/snapshots/cst_output/TryStatement/try_catch/input.sol:3:21] - │ - 3 │ try a.b() {ident();} catch {} - │ ─────┬──── - │ ╰────── Error occurred here. - ───╯ - -Tree: - - (TryStatement): # "// Make sure that error recovery won't lead to mis..." (0..167) - - (LeadingTrivia): # "// Make sure that error recovery won't lead to mis..." (0..137) - - (SingleLineComment): "// Make sure that error recovery won't lead to mis..." # (0..57) - - (EndOfLine): "\n" # (57..58) - - (SingleLineComment): "// ambiguous function call options with the block ..." # (58..136) - - (EndOfLine): "\n" # (136..137) - - (try_keyword꞉ TryKeyword): "try" # (137..140) - - (expression꞉ Expression) ► (variant꞉ CallOptionsExpression): # " a.b() {ident();}" (140..157) - - (operand꞉ Expression) ► (variant꞉ FunctionCallExpression): # " a.b()" (140..146) - - (operand꞉ Expression) ► (variant꞉ MemberAccessExpression): # " a.b" (140..144) - - (operand꞉ Expression): # " a" (140..142) - - (LeadingTrivia) ► (Whitespace): " " # (140..141) - - (variant꞉ Identifier): "a" # (141..142) - - (period꞉ Period): "." # (142..143) - - (member꞉ MemberAccess) ► (variant꞉ Identifier): "b" # (143..144) - - (arguments꞉ ArgumentsDeclaration) ► (variant꞉ PositionalArgumentsDeclaration): # "()" (144..146) - - (open_paren꞉ OpenParen): "(" # (144..145) - - (close_paren꞉ CloseParen): ")" # (145..146) - - (LeadingTrivia) ► (Whitespace): " " # (146..147) - - (open_brace꞉ OpenBrace): "{" # (147..148) - - (arguments꞉ NamedArguments): # "ident" (148..153) - - (item꞉ NamedArgument) ► (name꞉ Identifier): "ident" # (148..153) - - (SKIPPED): "();" # (153..156) - - (close_brace꞉ CloseBrace): "}" # (156..157) - - (SKIPPED): " catch {}\n" # (157..167) diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/language.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/language.rs index 8229c2ce22..e16c46f0ce 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/language.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/language.rs @@ -24,7 +24,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput; use crate::parse_output::ParseOutput; use crate::parser_support::{ ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult, - PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper, + PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper, }; #[derive(Debug)] @@ -392,7 +392,7 @@ impl Language { input, self, TokenKind::Semicolon, - RecoverFromNoMatch::No, + TokenAcceptanceThreshold(1u8), ), )?; seq.elem_labeled( @@ -426,7 +426,7 @@ impl Language { input, self, TokenKind::CloseBracket, - RecoverFromNoMatch::Yes, + TokenAcceptanceThreshold(0u8), ), )?; seq.elem_labeled( diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/mod.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/mod.rs index cf8ccbaad9..2d005d87d3 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/mod.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/mod.rs @@ -28,7 +28,7 @@ pub(crate) use parser_result::ParserResult; #[allow(unused_imports)] pub(crate) use precedence_helper::PrecedenceHelper; #[allow(unused_imports)] -pub(crate) use recovery::RecoverFromNoMatch; +pub(crate) use recovery::TokenAcceptanceThreshold; #[allow(unused_imports)] pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper}; #[allow(unused_imports)] diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs index 138ba829a5..50146f58c7 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/parser_result.rs @@ -2,7 +2,7 @@ use std::ops::ControlFlow; -use crate::cst::{self, LabeledNode}; +use crate::cst::{self, LabeledNode, Node}; use crate::kinds::{NodeLabel, RuleKind, TokenKind}; use crate::text_index::TextIndex; @@ -203,6 +203,33 @@ impl IncompleteMatch { expected_tokens, } } + + /// Whether this prefix-matched at least `n` (non-skipped) tokens. + pub fn matches_at_least_n_tokens(&self, n: u8) -> bool { + let result = self + .nodes + .iter() + .flat_map(|node| node.cursor_with_offset(TextIndex::ZERO)) + .try_fold(0u8, |mut acc, node| { + match node { + Node::Token(tok) if tok.kind != TokenKind::SKIPPED => { + acc += 1; + } + _ => {} + } + + // Short-circuit not to walk the whole tree if we've already matched enough + if acc >= n { + ControlFlow::Break(acc) + } else { + ControlFlow::Continue(acc) + } + }); + + match result { + ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n, + } + } } #[derive(PartialEq, Eq, Clone, Debug)] diff --git a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/recovery.rs b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/recovery.rs index 563d3a60bf..2a53c35945 100644 --- a/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/recovery.rs +++ b/crates/testlang/outputs/cargo/slang_testlang/src/generated/parser_support/recovery.rs @@ -9,18 +9,11 @@ use crate::parser_support::parser_result::SkippedUntil; use crate::parser_support::ParserResult; use crate::text_index::{TextRange, TextRangeExtensions as _}; -/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method. +/// How many tokens have to be matched to trigger the error recovery. +/// For ambiguous syntaxes this needs to be set to at least N, where N +/// is the token lookahead required to disambiguate the syntax. #[derive(Clone, Copy)] -pub(crate) enum RecoverFromNoMatch { - Yes, - No, -} - -impl RecoverFromNoMatch { - pub fn as_bool(self) -> bool { - matches!(self, RecoverFromNoMatch::Yes) - } -} +pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8); fn opt_parse( input: &mut ParserContext<'_>, @@ -48,7 +41,7 @@ impl ParserResult { input: &mut ParserContext<'_>, lexer: &L, expected: TokenKind, - recover_from_no_match: RecoverFromNoMatch, + acceptance_threshold: TokenAcceptanceThreshold, ) -> ParserResult { enum ParseResultKind { Match, @@ -59,11 +52,15 @@ impl ParserResult { let before_recovery = input.position(); let (mut nodes, mut expected_tokens, result_kind) = match self { - ParserResult::IncompleteMatch(result) => ( - result.nodes, - result.expected_tokens, - ParseResultKind::Incomplete, - ), + ParserResult::IncompleteMatch(result) + if result.matches_at_least_n_tokens(acceptance_threshold.0) => + { + ( + result.nodes, + result.expected_tokens, + ParseResultKind::Incomplete, + ) + } ParserResult::Match(result) if lexer .peek_token_with_trivia::(input) @@ -72,7 +69,7 @@ impl ParserResult { { (result.nodes, result.expected_tokens, ParseResultKind::Match) } - ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => { + ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => { (vec![], result.expected_tokens, ParseResultKind::NoMatch) } // No need to recover, so just return as-is.