Skip to content

Commit

Permalink
Ensure errors in template literals also dealt with
Browse files Browse the repository at this point in the history
  • Loading branch information
metatoaster committed Jul 3, 2020
1 parent c0eb467 commit 6d8f76d
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 1 deletion.
83 changes: 83 additions & 0 deletions src/calmjs/parse/lexers/es2015.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import re
import ply
from itertools import chain

from calmjs.parse.utils import repr_compat
from calmjs.parse.exceptions import ECMASyntaxError
from calmjs.parse.lexers.es5 import Lexer as ES5Lexer

template_token_types = (
Expand All @@ -26,12 +29,74 @@
'YIELD',
)

PATT_BROKEN_TEMPLATE = re.compile(r"""
(?:(?:`|}) # opening ` or }
(?: [^`\\] # not `, \; allow
| \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # line continuation
| \\[a-tvwyzA-TVWYZ!-\/:-@\[-`{-~] # escaped chars
| \\x[0-9a-fA-F]{2} # hex_escape_sequence
| \\u[0-9a-fA-F]{4} # unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # octal_escape_sequence
| \\0 # <NUL> (ECMA-262 6.0 21.2.2.11)
)* # zero or many times
) # omit closing ` or ${
""", flags=re.VERBOSE)


def broken_template_token_handler(lexer, token):
match = PATT_BROKEN_TEMPLATE.match(token.value)
if not match:
return

# update the error token value to only include what was matched here
# as this will be the actual token that "failed"
token.value = match.group()
# calculate colno for current token colno before...
colno = lexer._get_colno(token)
# updating the newline indexes for the error reporting for raw
# lexpos
lexer._update_newline_idx(token)
# probe for the next values (which no valid rules will match)
position = lexer.lexer.lexpos + len(token.value)
failure = lexer.lexer.lexdata[position:position + 2]
if failure and failure[0] == '\\':
type_ = {'x': 'hexadecimal', 'u': 'unicode'}[failure[1]]
seq = re.match(
r'\\[xu][0-9-a-f-A-F]*', lexer.lexer.lexdata[position:]
).group()
raise ECMASyntaxError(
"Invalid %s escape sequence '%s' at %s:%s" % (
type_, seq, lexer.lineno,
lexer._get_colno_lexpos(position)
)
)
tl = 16 # truncate length

if lexer.current_template_tokens:
# join all tokens together
tmpl = '...'.join(
t.value for t in chain(lexer.current_template_tokens, [token]))
lineno = lexer.current_template_tokens[0].lineno
colno = lexer.current_template_tokens[0].colno
else:
tmpl = token.value
lineno = token.lineno

raise ECMASyntaxError('Unterminated template literal %s at %s:%s' % (
repr_compat(tmpl[:tl].strip() + (tmpl[tl:] and '...')), lineno, colno))


class Lexer(ES5Lexer):
"""
ES2015 lexer.
"""

def __init__(self, with_comments=False, yield_comments=False):
super(Lexer, self).__init__(
with_comments=with_comments, yield_comments=yield_comments)
self.error_token_handlers.append(broken_template_token_handler)
self.current_template_tokens = []

# Punctuators (ES6)
# t_DOLLAR_LBRACE = r'${'
# this is also a right brace punctuator...
Expand Down Expand Up @@ -84,9 +149,27 @@ class Lexer(ES5Lexer):
(?:`|\${)) # closing ` or ${
"""

RBRACE = r'}'

@ply.lex.TOKEN(template)
def t_TEMPLATE_RAW(self, token):
for patt, token_type in template_token_types:
if patt.match(token.value):
token.type = token_type
break
if token.type == 'TEMPLATE_HEAD':
self.current_template_tokens = [token]
elif token.type == 'TEMPLATE_MIDDLE':
self.current_template_tokens.append(token)
else:
self.current_template_tokens = []

return token

@ply.lex.TOKEN(RBRACE)
def t_RBRACE(self, token):
if self.current_template_tokens:
self.lexer.lexpos = self.lexer.lexpos - 1
token.value = self.lexer.lexdata[self.lexer.lexpos:]
broken_template_token_handler(self, token)
return token
46 changes: 46 additions & 0 deletions src/calmjs/parse/tests/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,52 @@
)
]

# various template related syntax errors
es2015_error_cases_tmpl = [
(
'unterminated_template_eof',
"var foo = `test",
"Unterminated template literal '`test' at 1:11",
), (
'unterminated_template_middle_eof',
"var foo = `${foo}bar${baz}fail",
# the specific identifiers are not tracked, thus ...
"Unterminated template literal '`${...}bar${...}...' at 1:11",
), (
'invalid_hex_sequence',
"var foo = `fail\\x1`",
# backticks are converted to single quotes
"Invalid hexadecimal escape sequence '\\x1' at 1:16",
), (
'invalid_unicode_sequence',
"var foo = `fail\\u12`",
"Invalid unicode escape sequence '\\u12' at 1:16",
), (
'invalid_hex_sequence_multiline',
"var foo = `foobar\r\nfail\\x1`",
# backticks are converted to single quotes
"Invalid hexadecimal escape sequence '\\x1' at 2:5",
), (
'invalid_unicode_sequence_multiline',
"var foo = `foobar\nfail\\u12`",
"Invalid unicode escape sequence '\\u12' at 2:5",
), (
'invalid_hex_sequence_middle',
"var foo = `fail${wat}blah\\x1`",
# backticks are converted to single quotes
"Invalid hexadecimal escape sequence '\\x1' at 1:26",
), (
'invalid_hex_sequence_middle_multiline',
"var foo = `foobar${lolwat}\r\nfailure${failure}wat\r\nwat\\x1`",
# backticks are converted to single quotes
"Invalid hexadecimal escape sequence '\\x1' at 3:4",
), (
'long_invalid_template_truncated',
"var foo = `1234567890abcdetruncated",
"Unterminated template literal '`1234567890abcde...' at 1:11",
)
]


def run_lexer(value, lexer_cls):
lexer = lexer_cls()
Expand Down
9 changes: 8 additions & 1 deletion src/calmjs/parse/tests/test_es2015_lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from calmjs.parse.exceptions import ECMASyntaxError

from calmjs.parse.testing.util import build_equality_testcase
from calmjs.parse.testing.util import build_exception_testcase
from calmjs.parse.tests.lexer import (
run_lexer,
run_lexer_pos,
Expand All @@ -14,6 +15,7 @@
es5_all_cases,
es2015_cases,
es2015_pos_cases,
es2015_error_cases_tmpl,
)


Expand All @@ -24,7 +26,8 @@ def test_initial_template_character(self):
lexer.input('`')
with self.assertRaises(ECMASyntaxError) as e:
[token for token in lexer]
self.assertEqual(str(e.exception), "Illegal character '`' at 1:1")
self.assertEqual(
str(e.exception), "Unterminated template literal '`' at 1:1")


LexerKeywordTestCase = build_equality_testcase(
Expand Down Expand Up @@ -60,3 +63,7 @@ def test_initial_template_character(self):
LexerES2015PosTestCase = build_equality_testcase(
'LexerES2015PosTestCase', partial(
run_lexer_pos, lexer_cls=Lexer), es2015_pos_cases)

LexerES2015ErrorTemplateTestCase = build_exception_testcase(
'LexerES2015ErrorTemplateTestCase', partial(
run_lexer, lexer_cls=Lexer), es2015_error_cases_tmpl, ECMASyntaxError)

0 comments on commit 6d8f76d

Please sign in to comment.