From a59a464c5cc6550387c4408f64abaf535c5c06f8 Mon Sep 17 00:00:00 2001 From: Omar Tawfik <15987992+OmarTawfik@users.noreply.github.com> Date: Mon, 6 Nov 2023 15:59:44 -0800 Subject: [PATCH] fix trailing IdentifierPart in grammar (#641) - Moved numeric literals to use `notFollowedBy: IdentifierStart` instead of `IdentifierPart`. - Removed `notFollowedBy: IdentifierPart` from string literals, to match the behavior of `solc`. --- .../05-expressions/04-numbers/productions.yml | 8 +- .../05-expressions/05-strings/productions.yml | 30 +-- .../06-yul/03-yul-expressions/productions.yml | 4 +- .../inputs/language/src/definition.rs | 222 ++++++++++-------- crates/solidity/inputs/language/src/dsl.rs | 15 +- .../cargo/crate/src/generated/language.rs | 30 +-- .../outputs/cargo/tests/src/scanner/mod.rs | 6 +- .../npm/crate/src/generated/language.rs | 30 +-- 8 files changed, 161 insertions(+), 184 deletions(-) diff --git a/crates/solidity/inputs/language/definition/05-expressions/04-numbers/productions.yml b/crates/solidity/inputs/language/definition/05-expressions/04-numbers/productions.yml index c0c72b5ff2..5bf8586110 100644 --- a/crates/solidity/inputs/language/definition/05-expressions/04-numbers/productions.yml +++ b/crates/solidity/inputs/language/definition/05-expressions/04-numbers/productions.yml @@ -37,7 +37,7 @@ - oneOrMore: reference: "HexCharacter" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" 0.5.0: # removed uppercase "0X" trailingContext: @@ -52,7 +52,7 @@ - oneOrMore: reference: "HexCharacter" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" - name: "DecimalLiteral" kind: "Scanner" @@ -75,7 +75,7 @@ - optional: reference: "DecimalExponent" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" 0.5.0: # Second "DecimalDigits" is no longer "optional" trailingContext: @@ -94,7 +94,7 @@ - optional: reference: "DecimalExponent" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" - name: "DecimalDigits" kind: "Scanner" diff --git a/crates/solidity/inputs/language/definition/05-expressions/05-strings/productions.yml b/crates/solidity/inputs/language/definition/05-expressions/05-strings/productions.yml index 702c258ee1..6c0786bf17 100644 --- a/crates/solidity/inputs/language/definition/05-expressions/05-strings/productions.yml +++ b/crates/solidity/inputs/language/definition/05-expressions/05-strings/productions.yml @@ -24,13 +24,9 @@ - name: "HexStringLiteral" kind: "Scanner" unversioned: - trailingContext: - scanner: - choice: - - reference: "SingleQuotedHexStringLiteral" - - reference: "DoubleQuotedHexStringLiteral" - notFollowedBy: - reference: "IdentifierPart" + choice: + - reference: "SingleQuotedHexStringLiteral" + - reference: "DoubleQuotedHexStringLiteral" - name: "SingleQuotedHexStringLiteral" kind: "Scanner" @@ -92,13 +88,9 @@ - name: "AsciiStringLiteral" kind: "Scanner" unversioned: - trailingContext: - scanner: - choice: - - reference: "SingleQuotedAsciiStringLiteral" - - reference: "DoubleQuotedAsciiStringLiteral" - notFollowedBy: - reference: "IdentifierPart" + choice: + - reference: "SingleQuotedAsciiStringLiteral" + - reference: "DoubleQuotedAsciiStringLiteral" - name: "SingleQuotedAsciiStringLiteral" kind: "Scanner" @@ -151,13 +143,9 @@ kind: "Scanner" versioned: 0.7.0: - trailingContext: - scanner: - choice: - - reference: "SingleQuotedUnicodeStringLiteral" - - reference: "DoubleQuotedUnicodeStringLiteral" - notFollowedBy: - reference: "IdentifierPart" + choice: + - reference: "SingleQuotedUnicodeStringLiteral" + - reference: "DoubleQuotedUnicodeStringLiteral" - name: "SingleQuotedUnicodeStringLiteral" kind: "Scanner" diff --git a/crates/solidity/inputs/language/definition/06-yul/03-yul-expressions/productions.yml b/crates/solidity/inputs/language/definition/06-yul/03-yul-expressions/productions.yml index f68b587d04..80231c5ec9 100644 --- a/crates/solidity/inputs/language/definition/06-yul/03-yul-expressions/productions.yml +++ b/crates/solidity/inputs/language/definition/06-yul/03-yul-expressions/productions.yml @@ -135,7 +135,7 @@ - oneOrMore: reference: "HexCharacter" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" - name: "YulDecimalLiteral" kind: "Scanner" @@ -153,4 +153,4 @@ from: "0" to: "9" notFollowedBy: - reference: "IdentifierPart" + reference: "IdentifierStart" diff --git a/crates/solidity/inputs/language/src/definition.rs b/crates/solidity/inputs/language/src/definition.rs index c19736b0a0..d730b4295a 100644 --- a/crates/solidity/inputs/language/src/definition.rs +++ b/crates/solidity/inputs/language/src/definition.rs @@ -122,9 +122,16 @@ codegen_language_macros::compile!(Language( name = ExperimentalPragma, fields = ( experimental_keyword = Required(Terminal([ExperimentalKeyword])), - feature = Required(Terminal([AsciiStringLiteral, Identifier])) + feature = Required(NonTerminal(ExperimentalFeature)) ) ), + Enum( + name = ExperimentalFeature, + variants = [ + EnumVariant(name = Identifier, reference = Identifier), + EnumVariant(name = String, reference = AsciiStringLiteral) + ] + ), Struct( name = VersionPragma, fields = ( @@ -216,7 +223,7 @@ codegen_language_macros::compile!(Language( Struct( name = PathImportSymbol, fields = ( - path = Required(Terminal([AsciiStringLiteral])), + path = Required(NonTerminal(AsciiStringLiteral)), alias = Optional(kind = NonTerminal(ImportAlias)) ) ), @@ -226,7 +233,7 @@ codegen_language_macros::compile!(Language( asterisk = Required(Terminal([Asterisk])), alias = Required(NonTerminal(ImportAlias)), from_keyword = Required(Terminal([FromKeyword])), - path = Required(Terminal([AsciiStringLiteral])) + path = Required(NonTerminal(AsciiStringLiteral)) ) ), Struct( @@ -240,7 +247,7 @@ codegen_language_macros::compile!(Language( fields = Required(NonTerminal(ImportDeconstructionFields)), close_brace = Required(Terminal([CloseBrace])), from_keyword = Required(Terminal([FromKeyword])), - path = Required(Terminal([AsciiStringLiteral])) + path = Required(NonTerminal(AsciiStringLiteral)) ) ), Separated( @@ -3641,7 +3648,7 @@ codegen_language_macros::compile!(Language( OneOrMore(Fragment(HexCharacter)) ])) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ), // Uppercase "0X" only enabled before "0.5.0": @@ -3656,7 +3663,7 @@ codegen_language_macros::compile!(Language( OneOrMore(Fragment(HexCharacter)) ])) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ) ] @@ -3671,7 +3678,7 @@ codegen_language_macros::compile!(Language( Fragment(DecimalDigits), Optional(Fragment(DecimalExponent)) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ), // An integer and a dot (without a fraction) is disabled in "0.5.0" @@ -3683,7 +3690,7 @@ codegen_language_macros::compile!(Language( Atom("."), Optional(Fragment(DecimalExponent)) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ), // A dot and a fraction (without an integer) is enabled in all versions: @@ -3694,7 +3701,7 @@ codegen_language_macros::compile!(Language( Fragment(DecimalDigits), Optional(Fragment(DecimalExponent)) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ), // An integer, a dot, and a fraction is enabled in all versions: @@ -3706,7 +3713,7 @@ codegen_language_macros::compile!(Language( Fragment(DecimalDigits), Optional(Fragment(DecimalExponent)) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) ) ] @@ -3784,33 +3791,38 @@ codegen_language_macros::compile!(Language( ] ), Repeated(name = HexStringLiterals, repeated = HexStringLiteral), - Token( + Enum( name = HexStringLiteral, - definitions = [TokenDefinition( - scanner = TrailingContext( - scanner = Choice([ - Fragment(SingleQuotedHexString), - Fragment(DoubleQuotedHexString) - ]), - not_followed_by = Fragment(IdentifierPart) + variants = [ + EnumVariant( + name = SingleQuoted, + reference = SingleQuotedHexStringLiteral + ), + EnumVariant( + name = DoubleQuoted, + reference = DoubleQuotedHexStringLiteral ) - )] + ] ), - Fragment( - name = SingleQuotedHexString, - scanner = Sequence([ - Atom("hex'"), - Optional(Fragment(HexStringContents)), - Atom("'") - ]) + Token( + name = SingleQuotedHexStringLiteral, + definitions = [TokenDefinition( + scanner = Sequence([ + Atom("hex'"), + Optional(Fragment(HexStringContents)), + Atom("'") + ]) + )] ), - Fragment( - name = DoubleQuotedHexString, - scanner = Sequence([ - Atom("hex\""), - Optional(Fragment(HexStringContents)), - Atom("\"") - ]) + Token( + name = DoubleQuotedHexStringLiteral, + definitions = [TokenDefinition( + scanner = Sequence([ + Atom("hex\""), + Optional(Fragment(HexStringContents)), + Atom("\"") + ]) + )] ), Fragment( name = HexStringContents, @@ -3833,85 +3845,95 @@ codegen_language_macros::compile!(Language( ]) ), Repeated(name = AsciiStringLiterals, repeated = AsciiStringLiteral), - Token( + Enum( name = AsciiStringLiteral, - definitions = [TokenDefinition( - scanner = TrailingContext( - scanner = Choice([ - Fragment(SingleQuotedAsciiString), - Fragment(DoubleQuotedAsciiString) - ]), - not_followed_by = Fragment(IdentifierPart) + variants = [ + EnumVariant( + name = SingleQuoted, + reference = SingleQuotedAsciiStringLiteral + ), + EnumVariant( + name = DoubleQuoted, + reference = DoubleQuotedAsciiStringLiteral ) - )] + ] ), - Fragment( - name = SingleQuotedAsciiString, - scanner = Sequence([ - Atom("'"), - ZeroOrMore(Choice([ - Fragment(EscapeSequence), - Range(inclusive_start = ' ', inclusive_end = '&'), - Range(inclusive_start = '(', inclusive_end = '['), - Range(inclusive_start = ']', inclusive_end = '~') - ])), - Atom("'") - ]) + Token( + name = SingleQuotedAsciiStringLiteral, + definitions = [TokenDefinition( + scanner = Sequence([ + Atom("'"), + ZeroOrMore(Choice([ + Fragment(EscapeSequence), + Range(inclusive_start = ' ', inclusive_end = '&'), + Range(inclusive_start = '(', inclusive_end = '['), + Range(inclusive_start = ']', inclusive_end = '~') + ])), + Atom("'") + ]) + )] ), - Fragment( - name = DoubleQuotedAsciiString, - scanner = Sequence([ - Atom("\""), - ZeroOrMore(Choice([ - Fragment(EscapeSequence), - Range(inclusive_start = ' ', inclusive_end = '!'), - Range(inclusive_start = '#', inclusive_end = '['), - Range(inclusive_start = ']', inclusive_end = '~') - ])), - Atom("\"") - ]) + Token( + name = DoubleQuotedAsciiStringLiteral, + definitions = [TokenDefinition( + scanner = Sequence([ + Atom("\""), + ZeroOrMore(Choice([ + Fragment(EscapeSequence), + Range(inclusive_start = ' ', inclusive_end = '!'), + Range(inclusive_start = '#', inclusive_end = '['), + Range(inclusive_start = ']', inclusive_end = '~') + ])), + Atom("\"") + ]) + )] ), Repeated( name = UnicodeStringLiterals, repeated = UnicodeStringLiteral, enabled = From("0.7.0") ), - Token( + Enum( name = UnicodeStringLiteral, + enabled = From("0.7.0"), + variants = [ + EnumVariant( + name = SingleQuoted, + reference = SingleQuotedUnicodeStringLiteral + ), + EnumVariant( + name = DoubleQuoted, + reference = DoubleQuotedUnicodeStringLiteral + ) + ] + ), + Token( + name = SingleQuotedUnicodeStringLiteral, definitions = [TokenDefinition( enabled = From("0.7.0"), - scanner = TrailingContext( - scanner = Choice([ - Fragment(SingleQuotedUnicodeString), - Fragment(DoubleQuotedUnicodeString) - ]), - not_followed_by = Fragment(IdentifierPart) - ) + scanner = Sequence([ + Atom("unicode'"), + ZeroOrMore(Choice([ + Fragment(EscapeSequence), + Not(['\'', '\\', '\r', '\n']) + ])), + Atom("'") + ]) )] ), - Fragment( - name = SingleQuotedUnicodeString, - enabled = From("0.7.0"), - scanner = Sequence([ - Atom("unicode'"), - ZeroOrMore(Choice([ - Fragment(EscapeSequence), - Not(['\'', '\\', '\r', '\n']) - ])), - Atom("'") - ]) - ), - Fragment( - name = DoubleQuotedUnicodeString, - enabled = From("0.7.0"), - scanner = Sequence([ - Atom("unicode\""), - ZeroOrMore(Choice([ - Fragment(EscapeSequence), - Not(['"', '\\', '\r', '\n']) - ])), - Atom("\"") - ]) + Token( + name = DoubleQuotedUnicodeStringLiteral, + definitions = [TokenDefinition( + enabled = From("0.7.0"), + scanner = Sequence([ + Atom("unicode\""), + ZeroOrMore(Choice([ + Fragment(EscapeSequence), + Not(['"', '\\', '\r', '\n']) + ])), + Atom("\"") + ]) + )] ), Fragment( name = EscapeSequence, @@ -4007,7 +4029,7 @@ codegen_language_macros::compile!(Language( name = AssemblyStatement, fields = ( assembly_keyword = Required(Terminal([AssemblyKeyword])), - label = Optional(kind = Terminal([AsciiStringLiteral])), + label = Optional(kind = NonTerminal(AsciiStringLiteral)), flags = Optional(kind = NonTerminal(AssemblyFlagsDeclaration)), body = Required(NonTerminal(YulBlock)) ) @@ -4268,7 +4290,7 @@ codegen_language_macros::compile!(Language( )) ]) ]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) )] ), @@ -4278,7 +4300,7 @@ codegen_language_macros::compile!(Language( scanner = TrailingContext( scanner = Sequence([Atom("0x"), OneOrMore(Fragment(HexCharacter))]), - not_followed_by = Fragment(IdentifierPart) + not_followed_by = Fragment(IdentifierStart) ) )] ) diff --git a/crates/solidity/inputs/language/src/dsl.rs b/crates/solidity/inputs/language/src/dsl.rs index 92a07cd413..441472a904 100644 --- a/crates/solidity/inputs/language/src/dsl.rs +++ b/crates/solidity/inputs/language/src/dsl.rs @@ -796,19 +796,13 @@ slang_grammar! { // Ascii String Literals - scanner AsciiStringLiteral = ( - (SingleQuotedAsciiStringLiteral | DoubleQuotedAsciiStringLiteral) - not followed by IdentifierStart - ) ; + scanner AsciiStringLiteral = (SingleQuotedAsciiStringLiteral | DoubleQuotedAsciiStringLiteral) ; scanner DoubleQuotedAsciiStringLiteral = ("\"" ((EscapeSequence | AsciiCharacterWithoutDoubleQuoteOrBackslash) *) "\"") ; scanner SingleQuotedAsciiStringLiteral = ("\'" ((EscapeSequence | AsciiCharacterWithoutSingleQuoteOrBackslash) *) "\'") ; // Hex String Literals - scanner HexStringLiteral = ( - (SingleQuotedHexStringLiteral | DoubleQuotedHexStringLiteral) - not followed by IdentifierStart - ) ; + scanner HexStringLiteral = (SingleQuotedHexStringLiteral | DoubleQuotedHexStringLiteral) ; scanner DoubleQuotedHexStringLiteral = ("hex\"" (HexStringContents ?) "\"") ; scanner SingleQuotedHexStringLiteral = ("hex\'" (HexStringContents ?) "\'") ; scanner HexStringContents = (HexCharacter HexCharacter ((('_' ?) HexCharacter HexCharacter) *)) ; @@ -816,10 +810,7 @@ slang_grammar! { // Unicode String Literals scanner UnicodeStringLiteral = { - introduced in "0.7.0" ( - (SingleQuotedUnicodeStringLiteral | DoubleQuotedUnicodeStringLiteral) - not followed by IdentifierStart - ) + introduced in "0.7.0" (SingleQuotedUnicodeStringLiteral | DoubleQuotedUnicodeStringLiteral) } ; scanner DoubleQuotedUnicodeStringLiteral = { introduced in "0.7.0" ("unicode\"" ((EscapeSequence | (! "\n\r\"\\")) *) "\"") } ; scanner SingleQuotedUnicodeStringLiteral = { introduced in "0.7.0" ("unicode\'" ((EscapeSequence | (! "\n\r\'\\")) *) "\'") } ; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/language.rs b/crates/solidity/outputs/cargo/crate/src/generated/language.rs index 611c7dabff..17ffb593ce 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/language.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/language.rs @@ -5056,14 +5056,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn ascii_string_literal(&self, input: &mut ParserContext) -> bool { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_ascii_string_literal(input), - self.double_quoted_ascii_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_ascii_string_literal(input), + self.double_quoted_ascii_string_literal(input) ) } @@ -5321,14 +5317,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn hex_string_literal(&self, input: &mut ParserContext) -> bool { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_hex_string_literal(input), - self.double_quoted_hex_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_hex_string_literal(input), + self.double_quoted_hex_string_literal(input) ) } @@ -5505,14 +5497,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn unicode_string_literal(&self, input: &mut ParserContext) -> bool { if self.version_is_at_least_0_7_0 { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_unicode_string_literal(input), - self.double_quoted_unicode_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_unicode_string_literal(input), + self.double_quoted_unicode_string_literal(input) ) } else { false diff --git a/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs b/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs index 0c6d96c376..632b1977d4 100644 --- a/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs +++ b/crates/solidity/outputs/cargo/tests/src/scanner/mod.rs @@ -18,15 +18,15 @@ fn test_next_token() { ("1", DecimalLiteral), ("\n", EndOfLine), ("unicode'abc'", UnicodeStringLiteral), - ("unicode'abc'ZZ", Identifier), // TODO: This needs to be further checked against solc ("hex'abcd'", HexStringLiteral), - ("hex'abcd'ZZz", HexKeyword), // TODO: This needs to be further checked against solc + ("'abc'ZZ", AsciiStringLiteral), // with an identifier afterwards + ("unicode'abc'ZZ", UnicodeStringLiteral), // with an identifier afterwards + ("hex'abcd'ZZz", HexStringLiteral), // with an identifier afterwards ("// single line\n", SingleLineComment), ("/* multi-line\n comment */ blah", MultilineComment), ("/* multi-line comment **/ blah", MultilineComment), ("0ZZ", SKIPPED), ("0xabZZ", SKIPPED), - ("'abc'ZZ", SKIPPED), ] { assert_eq!(language.scan(LexicalContext::Default, s), Some(*k)); } diff --git a/crates/solidity/outputs/npm/crate/src/generated/language.rs b/crates/solidity/outputs/npm/crate/src/generated/language.rs index 611c7dabff..17ffb593ce 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/language.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/language.rs @@ -5056,14 +5056,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn ascii_string_literal(&self, input: &mut ParserContext) -> bool { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_ascii_string_literal(input), - self.double_quoted_ascii_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_ascii_string_literal(input), + self.double_quoted_ascii_string_literal(input) ) } @@ -5321,14 +5317,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn hex_string_literal(&self, input: &mut ParserContext) -> bool { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_hex_string_literal(input), - self.double_quoted_hex_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_hex_string_literal(input), + self.double_quoted_hex_string_literal(input) ) } @@ -5505,14 +5497,10 @@ impl Language { #[allow(unused_assignments, unused_parens)] fn unicode_string_literal(&self, input: &mut ParserContext) -> bool { if self.version_is_at_least_0_7_0 { - scan_not_followed_by!( + scan_choice!( input, - scan_choice!( - input, - self.single_quoted_unicode_string_literal(input), - self.double_quoted_unicode_string_literal(input) - ), - self.identifier_start(input) + self.single_quoted_unicode_string_literal(input), + self.double_quoted_unicode_string_literal(input) ) } else { false