From ea4c5aae1ca450377b7a74a79c2fb6295ce536a8 Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Fri, 5 Apr 2024 23:07:19 +0200 Subject: [PATCH] feat: Support arbitrary ascii escapes until 0.4.25 --- .../inputs/language/src/definition.rs | 49 ++++++++++++-- .../slang_solidity/src/generated/language.rs | 64 ++++++++++++++++++- .../tests/src/cst_output/generated/mod.rs | 3 +- .../outputs/spec/generated/grammar.ebnf | 16 ++++- .../public/05-expressions/05-strings.md | 16 ++++- .../generated/public/supported-versions.md | 4 +- .../generated/0.4.11-success.yml | 14 ++++ ...{0.4.11-failure.yml => 0.4.25-failure.yml} | 0 8 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-success.yml rename crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/{0.4.11-failure.yml => 0.4.25-failure.yml} (100%) diff --git a/crates/solidity/inputs/language/src/definition.rs b/crates/solidity/inputs/language/src/definition.rs index afc260a449..457c0c5bd3 100644 --- a/crates/solidity/inputs/language/src/definition.rs +++ b/crates/solidity/inputs/language/src/definition.rs @@ -3820,9 +3820,21 @@ codegen_language_macros::compile!(Language( Token( name = SingleQuotedStringLiteral, definitions = [ - // Allows unicode characters: + // Allows unicode characters and arbitrary ASCII escape sequences: TokenDefinition( - enabled = Till("0.7.0"), + enabled = Till("0.4.25"), + scanner = Sequence([ + Atom("'"), + ZeroOrMore(Choice([ + Fragment(EscapeSequenceArbitraryAscii), + Not(['\'', '\\', '\r', '\n']) + ])), + Atom("'") + ]) + ), + // Allows unicode characters but allows only known ASCII escape sequences: + TokenDefinition( + enabled = Range(from = "0.4.25", till = "0.7.0"), scanner = Sequence([ Atom("'"), ZeroOrMore(Choice([ @@ -3850,9 +3862,21 @@ codegen_language_macros::compile!(Language( Token( name = DoubleQuotedStringLiteral, definitions = [ - // Allows unicode characters: + // Allows unicode characters and arbitrary ASCII escape sequences: TokenDefinition( - enabled = Till("0.7.0"), + enabled = Till("0.4.25"), + scanner = Sequence([ + Atom("\""), + ZeroOrMore(Choice([ + Fragment(EscapeSequenceArbitraryAscii), + Not(['"', '\\', '\r', '\n']) + ])), + Atom("\"") + ]) + ), + // Allows unicode characters but allows only known ASCII escape sequences: + TokenDefinition( + enabled = Range(from = "0.4.25", till = "0.7.0"), scanner = Sequence([ Atom("\""), ZeroOrMore(Choice([ @@ -3981,6 +4005,18 @@ codegen_language_macros::compile!(Language( ]) ]) ), + Fragment( + name = EscapeSequenceArbitraryAscii, + enabled = Till("0.4.25"), + scanner = Sequence([ + Atom("\\"), + Choice([ + Fragment(AsciiEscapeArbitrary), + Fragment(HexByteEscape), + Fragment(UnicodeEscape) + ]) + ]) + ), Fragment( name = AsciiEscape, scanner = Choice([ @@ -3995,6 +4031,11 @@ codegen_language_macros::compile!(Language( Atom("\n") ]) ), + Fragment( + name = AsciiEscapeArbitrary, + enabled = Till("0.4.25"), + scanner = Not(['x', 'u']) + ), Fragment( name = HexByteEscape, scanner = Sequence([ diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs index 911e545699..ac5c95cbf3 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/language.rs @@ -36,6 +36,7 @@ pub struct Language { pub(crate) version_is_at_least_0_4_16: bool, pub(crate) version_is_at_least_0_4_21: bool, pub(crate) version_is_at_least_0_4_22: bool, + pub(crate) version_is_at_least_0_4_25: bool, pub(crate) version_is_at_least_0_5_0: bool, pub(crate) version_is_at_least_0_5_3: bool, pub(crate) version_is_at_least_0_5_5: bool, @@ -168,6 +169,7 @@ impl Language { version_is_at_least_0_4_16: Version::new(0, 4, 16) <= version, version_is_at_least_0_4_21: Version::new(0, 4, 21) <= version, version_is_at_least_0_4_22: Version::new(0, 4, 22) <= version, + version_is_at_least_0_4_25: Version::new(0, 4, 25) <= version, version_is_at_least_0_5_0: Version::new(0, 5, 0) <= version, version_is_at_least_0_5_3: Version::new(0, 5, 3) <= version, version_is_at_least_0_5_5: Version::new(0, 5, 5) <= version, @@ -6920,6 +6922,15 @@ impl Language { ) } + #[allow(unused_assignments, unused_parens)] + fn ascii_escape_arbitrary(&self, input: &mut ParserContext<'_>) -> bool { + if !self.version_is_at_least_0_4_25 { + scan_none_of!(input, 'x', 'u') + } else { + false + } + } + #[allow(unused_assignments, unused_parens)] fn decimal_digits(&self, input: &mut ParserContext<'_>) -> bool { scan_sequence!( @@ -7030,7 +7041,23 @@ impl Language { fn double_quoted_string_literal(&self, input: &mut ParserContext<'_>) -> bool { scan_choice!( input, - if !self.version_is_at_least_0_7_0 { + if !self.version_is_at_least_0_4_25 { + scan_sequence!( + scan_chars!(input, '"'), + scan_zero_or_more!( + input, + scan_choice!( + input, + self.escape_sequence_arbitrary_ascii(input), + scan_none_of!(input, '"', '\\', '\r', '\n') + ) + ), + scan_chars!(input, '"') + ) + } else { + false + }, + if self.version_is_at_least_0_4_25 && !self.version_is_at_least_0_7_0 { scan_sequence!( scan_chars!(input, '"'), scan_zero_or_more!( @@ -7108,6 +7135,23 @@ impl Language { ) } + #[allow(unused_assignments, unused_parens)] + fn escape_sequence_arbitrary_ascii(&self, input: &mut ParserContext<'_>) -> bool { + if !self.version_is_at_least_0_4_25 { + scan_sequence!( + scan_chars!(input, '\\'), + scan_choice!( + input, + self.ascii_escape_arbitrary(input), + self.hex_byte_escape(input), + self.unicode_escape(input) + ) + ) + } else { + false + } + } + #[allow(unused_assignments, unused_parens)] fn hex_byte_escape(&self, input: &mut ParserContext<'_>) -> bool { scan_sequence!( @@ -7281,7 +7325,23 @@ impl Language { fn single_quoted_string_literal(&self, input: &mut ParserContext<'_>) -> bool { scan_choice!( input, - if !self.version_is_at_least_0_7_0 { + if !self.version_is_at_least_0_4_25 { + scan_sequence!( + scan_chars!(input, '\''), + scan_zero_or_more!( + input, + scan_choice!( + input, + self.escape_sequence_arbitrary_ascii(input), + scan_none_of!(input, '\'', '\\', '\r', '\n') + ) + ), + scan_chars!(input, '\'') + ) + } else { + false + }, + if self.version_is_at_least_0_4_25 && !self.version_is_at_least_0_7_0 { scan_sequence!( scan_chars!(input, '\''), scan_zero_or_more!( diff --git a/crates/solidity/outputs/cargo/tests/src/cst_output/generated/mod.rs b/crates/solidity/outputs/cargo/tests/src/cst_output/generated/mod.rs index 7596da4a3d..2594d5f48b 100644 --- a/crates/solidity/outputs/cargo/tests/src/cst_output/generated/mod.rs +++ b/crates/solidity/outputs/cargo/tests/src/cst_output/generated/mod.rs @@ -54,13 +54,14 @@ mod yul_label; mod yul_leave_statement; mod yul_variable_declaration_statement; -pub const VERSION_BREAKS: [Version; 30] = [ +pub const VERSION_BREAKS: [Version; 31] = [ Version::new(0, 4, 11), Version::new(0, 4, 12), Version::new(0, 4, 14), Version::new(0, 4, 16), Version::new(0, 4, 21), Version::new(0, 4, 22), + Version::new(0, 4, 25), Version::new(0, 5, 0), Version::new(0, 5, 3), Version::new(0, 5, 5), diff --git a/crates/solidity/outputs/spec/generated/grammar.ebnf b/crates/solidity/outputs/spec/generated/grammar.ebnf index cdc23d019f..1395e68c7b 100644 --- a/crates/solidity/outputs/spec/generated/grammar.ebnf +++ b/crates/solidity/outputs/spec/generated/grammar.ebnf @@ -1256,12 +1256,18 @@ StringLiterals = StringLiteral+; StringLiteral = SINGLE_QUOTED_STRING_LITERAL | DOUBLE_QUOTED_STRING_LITERAL; -(* Deprecated in 0.7.0 *) +(* Deprecated in 0.4.25 *) +SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE_ARBITRARY_ASCII» | !("'" "\\" "\r" "\n"))* "'"; + +(* Introduced in 0.4.25 and deprecated in 0.7.0. *) SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | !("'" "\\" "\r" "\n"))* "'"; SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | (" "…"&") | ("("…"[") | ("]"…"~"))* "'"; -(* Deprecated in 0.7.0 *) +(* Deprecated in 0.4.25 *) +DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE_ARBITRARY_ASCII» | !('"' "\\" "\r" "\n"))* '"'; + +(* Introduced in 0.4.25 and deprecated in 0.7.0. *) DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | !('"' "\\" "\r" "\n"))* '"'; DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | (" "…"!") | ("#"…"[") | ("]"…"~"))* '"'; @@ -1295,8 +1301,14 @@ DOUBLE_QUOTED_UNICODE_STRING_LITERAL = 'unicode"' («ESCAPE_SEQUENCE» | !('"' " «ESCAPE_SEQUENCE» = "\\" («ASCII_ESCAPE» | «HEX_BYTE_ESCAPE» | «UNICODE_ESCAPE»); +(* Deprecated in 0.4.25 *) +«ESCAPE_SEQUENCE_ARBITRARY_ASCII» = "\\" («ASCII_ESCAPE_ARBITRARY» | «HEX_BYTE_ESCAPE» | «UNICODE_ESCAPE»); + «ASCII_ESCAPE» = "n" | "r" | "t" | "'" | '"' | "\\" | "\r\n" | "\r" | "\n"; +(* Deprecated in 0.4.25 *) +«ASCII_ESCAPE_ARBITRARY» = !("x" "u"); + «HEX_BYTE_ESCAPE» = "x" «HEX_CHARACTER» «HEX_CHARACTER»; «UNICODE_ESCAPE» = "u" «HEX_CHARACTER» «HEX_CHARACTER» «HEX_CHARACTER» «HEX_CHARACTER»; diff --git a/crates/solidity/outputs/spec/generated/public/05-expressions/05-strings.md b/crates/solidity/outputs/spec/generated/public/05-expressions/05-strings.md index 601d1ebeb3..23dfa91074 100644 --- a/crates/solidity/outputs/spec/generated/public/05-expressions/05-strings.md +++ b/crates/solidity/outputs/spec/generated/public/05-expressions/05-strings.md @@ -26,13 +26,13 @@ ``` -
(* Deprecated in 0.7.0 *)
SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | !("'" "\\" "\r" "\n"))* "'";

SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | (" ""&") | ("(""[") | ("]""~"))* "'";
+
(* Deprecated in 0.4.25 *)
SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE_ARBITRARY_ASCII» | !("'" "\\" "\r" "\n"))* "'";

(* Introduced in 0.4.25 and deprecated in 0.7.0. *)
SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | !("'" "\\" "\r" "\n"))* "'";

SINGLE_QUOTED_STRING_LITERAL = "'" («ESCAPE_SEQUENCE» | (" ""&") | ("(""[") | ("]""~"))* "'";
```{ .ebnf #DoubleQuotedStringLiteral } ``` -
(* Deprecated in 0.7.0 *)
DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | !('"' "\\" "\r" "\n"))* '"';

DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | (" ""!") | ("#""[") | ("]""~"))* '"';
+
(* Deprecated in 0.4.25 *)
DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE_ARBITRARY_ASCII» | !('"' "\\" "\r" "\n"))* '"';

(* Introduced in 0.4.25 and deprecated in 0.7.0. *)
DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | !('"' "\\" "\r" "\n"))* '"';

DOUBLE_QUOTED_STRING_LITERAL = '"' («ESCAPE_SEQUENCE» | (" ""!") | ("#""[") | ("]""~"))* '"';
```{ .ebnf #HexStringLiterals } @@ -100,12 +100,24 @@
«ESCAPE_SEQUENCE» = "\\" («ASCII_ESCAPE» | «HEX_BYTE_ESCAPE» | «UNICODE_ESCAPE»);
+```{ .ebnf #EscapeSequenceArbitraryAscii } + +``` + +
(* Deprecated in 0.4.25 *)
«ESCAPE_SEQUENCE_ARBITRARY_ASCII» = "\\" («ASCII_ESCAPE_ARBITRARY» | «HEX_BYTE_ESCAPE» | «UNICODE_ESCAPE»);
+ ```{ .ebnf #AsciiEscape } ```
«ASCII_ESCAPE» = "n" | "r" | "t" | "'" | '"' | "\\" | "\r\n" | "\r" | "\n";
+```{ .ebnf #AsciiEscapeArbitrary } + +``` + +
(* Deprecated in 0.4.25 *)
«ASCII_ESCAPE_ARBITRARY» = !("x" "u");
+ ```{ .ebnf #HexByteEscape } ``` diff --git a/crates/solidity/outputs/spec/generated/public/supported-versions.md b/crates/solidity/outputs/spec/generated/public/supported-versions.md index 4424d2a841..a81ad3a072 100644 --- a/crates/solidity/outputs/spec/generated/public/supported-versions.md +++ b/crates/solidity/outputs/spec/generated/public/supported-versions.md @@ -4,6 +4,6 @@ This specification compiles information from 77 publicly released versions of So `0.4.11` `0.4.12` `0.4.13` `0.4.14` `0.4.15` `0.4.16` `0.4.17` `0.4.18` `0.4.19` `0.4.20` `0.4.21` `0.4.22` `0.4.23` `0.4.24` `0.4.25` `0.4.26` `0.5.0` `0.5.1` `0.5.2` `0.5.3` `0.5.4` `0.5.5` `0.5.6` `0.5.7` `0.5.8` `0.5.9` `0.5.10` `0.5.11` `0.5.12` `0.5.13` `0.5.14` `0.5.15` `0.5.16` `0.5.17` `0.6.0` `0.6.1` `0.6.2` `0.6.3` `0.6.4` `0.6.5` `0.6.6` `0.6.7` `0.6.8` `0.6.9` `0.6.10` `0.6.11` `0.6.12` `0.7.0` `0.7.1` `0.7.2` `0.7.3` `0.7.4` `0.7.5` `0.7.6` `0.8.0` `0.8.1` `0.8.2` `0.8.3` `0.8.4` `0.8.5` `0.8.6` `0.8.7` `0.8.8` `0.8.9` `0.8.10` `0.8.11` `0.8.12` `0.8.13` `0.8.14` `0.8.15` `0.8.16` `0.8.17` `0.8.18` `0.8.19` `0.8.20` `0.8.21` `0.8.22` -Among which, 30 versions have breaking changes: +Among which, 31 versions have breaking changes: -`0.4.11` `0.4.12` `0.4.14` `0.4.16` `0.4.21` `0.4.22` `0.5.0` `0.5.3` `0.5.5` `0.5.8` `0.5.10` `0.5.12` `0.5.14` `0.6.0` `0.6.2` `0.6.5` `0.6.7` `0.6.8` `0.6.11` `0.7.0` `0.7.1` `0.7.4` `0.8.0` `0.8.4` `0.8.7` `0.8.8` `0.8.13` `0.8.18` `0.8.19` `0.8.22` +`0.4.11` `0.4.12` `0.4.14` `0.4.16` `0.4.21` `0.4.22` `0.4.25` `0.5.0` `0.5.3` `0.5.5` `0.5.8` `0.5.10` `0.5.12` `0.5.14` `0.6.0` `0.6.2` `0.6.5` `0.6.7` `0.6.8` `0.6.11` `0.7.0` `0.7.1` `0.7.4` `0.8.0` `0.8.4` `0.8.7` `0.8.8` `0.8.13` `0.8.18` `0.8.19` `0.8.22` diff --git a/crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-success.yml b/crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-success.yml new file mode 100644 index 0000000000..352f1d3f31 --- /dev/null +++ b/crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-success.yml @@ -0,0 +1,14 @@ +# This file is generated automatically by infrastructure scripts. Please don't edit by hand. + +Source: > + 1 │ // Disallowed post 0.4.25 │ 0..25 + 2 │ "\a" │ 26..30 + +Errors: [] + +Tree: + - (StringLiteral): # '// Disallowed post 0.4.25\n"\a"\n' (0..31) + - (leading_trivia꞉ SingleLineComment): "// Disallowed post 0.4.25" # (0..25) + - (leading_trivia꞉ EndOfLine): "\n" # (25..26) + - (variant꞉ DoubleQuotedStringLiteral): '"\a"' # (26..30) + - (trailing_trivia꞉ EndOfLine): "\n" # (30..31) diff --git a/crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-failure.yml b/crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.25-failure.yml similarity index 100% rename from crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.11-failure.yml rename to crates/solidity/testing/snapshots/cst_output/StringLiteral/escape_ascii_arbitrary/generated/0.4.25-failure.yml