diff --git a/core/parser/src/lexer/mod.rs b/core/parser/src/lexer/mod.rs index 01650f899b6..d0c41b0da81 100644 --- a/core/parser/src/lexer/mod.rs +++ b/core/parser/src/lexer/mod.rs @@ -161,7 +161,7 @@ impl Lexer { )) } } - InputElement::RegExp => { + InputElement::RegExp | InputElement::HashbangOrRegExp => { // Can be a regular expression. RegexLiteral.lex(&mut self.cursor, start, interner) } @@ -214,28 +214,34 @@ impl Lexer { { let _timer = Profiler::global().start_event("next()", "Lexing"); - let (start, next_ch) = loop { - let start = self.cursor.pos(); - if let Some(next_ch) = self.cursor.next_char()? { - // Ignore whitespace - if !is_whitespace(next_ch) { - break (start, next_ch); - } - } else { - return Ok(None); - } + let mut start = self.cursor.pos(); + let Some(mut next_ch) = self.cursor.next_char()? else { + return Ok(None); }; - //handle hashbang here so the below match block still throws error on - //# if position isn't (1, 1) - if start.column_number() == 1 - && start.line_number() == 1 - && next_ch == 0x23 - && self.cursor.peek_char()? == Some(0x21) - { - let _token = HashbangComment.lex(&mut self.cursor, start, interner); - return self.next(interner); - }; + // If the goal symbol is HashbangOrRegExp, then we need to check if the next token is a hashbang comment. + // Since the goal symbol is only valid for the first token, we need to change it to RegExp after the first token. + if self.get_goal() == InputElement::HashbangOrRegExp { + self.set_goal(InputElement::RegExp); + if next_ch == 0x23 && self.cursor.peek_char()? == Some(0x21) { + let _token = HashbangComment.lex(&mut self.cursor, start, interner); + return self.next(interner); + }; + } + + // Ignore whitespace + if is_whitespace(next_ch) { + loop { + start = self.cursor.pos(); + let Some(next) = self.cursor.next_char()? else { + return Ok(None); + }; + if !is_whitespace(next) { + next_ch = next; + break; + } + } + } if let Ok(c) = char::try_from(next_ch) { let token = match c { @@ -392,6 +398,7 @@ pub(crate) enum InputElement { Div, RegExp, TemplateTail, + HashbangOrRegExp, } impl Default for InputElement { diff --git a/core/parser/src/parser/mod.rs b/core/parser/src/parser/mod.rs index 1f2bd364070..ada983b6e24 100644 --- a/core/parser/src/parser/mod.rs +++ b/core/parser/src/parser/mod.rs @@ -11,7 +11,7 @@ mod tests; use crate::{ error::ParseResult, - lexer::Error as LexError, + lexer::{Error as LexError, InputElement}, parser::{ cursor::Cursor, function::{FormalParameters, FunctionStatementList}, @@ -140,6 +140,7 @@ impl<'a, R: ReadChar> Parser<'a, R> { /// /// [spec]: https://tc39.es/ecma262/#prod-Script pub fn parse_script(&mut self, interner: &mut Interner) -> ParseResult { + self.cursor.set_goal(InputElement::HashbangOrRegExp); ScriptParser::new(false).parse(&mut self.cursor, interner) } @@ -155,6 +156,7 @@ impl<'a, R: ReadChar> Parser<'a, R> { where R: ReadChar, { + self.cursor.set_goal(InputElement::HashbangOrRegExp); ModuleParser.parse(&mut self.cursor, interner) } @@ -172,6 +174,7 @@ impl<'a, R: ReadChar> Parser<'a, R> { direct: bool, interner: &mut Interner, ) -> ParseResult { + self.cursor.set_goal(InputElement::HashbangOrRegExp); ScriptParser::new(direct).parse(&mut self.cursor, interner) }