Skip to content

Commit

Permalink
Correct tracking of nested template states
Browse files Browse the repository at this point in the history
- This must be done as templates can nest templates
- Not to mention the fact that objects can be provided as values.  Given
  they share the RBRACE symbol, there needs to be a way to disambiguate
  that symbol for objects and the opening of the template middle/tail
  fragments.
  • Loading branch information
metatoaster committed Jul 3, 2020
1 parent 35266cf commit 9fb50df
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 8 deletions.
45 changes: 37 additions & 8 deletions src/calmjs/parse/lexers/es2015.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
ES2015 (ECMAScript 6th Edition/ES6) lexer.
"""

from __future__ import unicode_literals

import re
import ply
from itertools import chain
Expand Down Expand Up @@ -75,9 +77,9 @@ def broken_template_token_handler(lexer, token):
if lexer.current_template_tokens:
# join all tokens together
tmpl = '...'.join(
t.value for t in chain(lexer.current_template_tokens, [token]))
lineno = lexer.current_template_tokens[0].lineno
colno = lexer.current_template_tokens[0].colno
t.value for t in chain(lexer.current_template_tokens[-1], [token]))
lineno = lexer.current_template_tokens[-1][0].lineno
colno = lexer.current_template_tokens[-1][0].colno
else:
tmpl = token.value
lineno = token.lineno
Expand All @@ -96,6 +98,7 @@ def __init__(self, with_comments=False, yield_comments=False):
with_comments=with_comments, yield_comments=yield_comments)
self.error_token_handlers.append(broken_template_token_handler)
self.current_template_tokens = []
self.current_template_tokens_braces = []

# Punctuators (ES6)
# t_DOLLAR_LBRACE = r'${'
Expand Down Expand Up @@ -149,6 +152,7 @@ def __init__(self, with_comments=False, yield_comments=False):
(?:`|\${)) # closing ` or ${
"""

LBRACE = r'{'
RBRACE = r'}'

@ply.lex.TOKEN(template)
Expand All @@ -157,13 +161,38 @@ def t_TEMPLATE_RAW(self, token):
if patt.match(token.value):
token.type = token_type
break
if token.type == 'TEMPLATE_HEAD':
self.current_template_tokens = [token]
elif token.type == 'TEMPLATE_MIDDLE':
self.current_template_tokens.append(token)
else:
self.current_template_tokens = []
raise ValueError("invalid token %r" % token)

if token.type == 'TEMPLATE_HEAD':
self.current_template_tokens.append([token])
self.current_template_tokens_braces.append(0)
return token
elif token.type == 'TEMPLATE_NOSUB':
return token

if not self.current_template_tokens_braces:
raise ECMASyntaxError('Unexpected %s at %s:%s' % (
repr_compat('}'), token.lineno, self._get_colno(token)))
if self.current_template_tokens_braces[-1] > 0:
# produce a LBRACE token instead
self.current_template_tokens_braces[-1] -= 1
self.lexer.lexpos = self.lexer.lexpos - len(token.value) + 1
token.value = token.value[0]
token.type = 'RBRACE'
return token

if token.type == 'TEMPLATE_MIDDLE':
self.current_template_tokens[-1].append(token)
elif token.type == 'TEMPLATE_TAIL':
self.current_template_tokens_braces.pop()
self.current_template_tokens.pop()
return token

@ply.lex.TOKEN(LBRACE)
def t_LBRACE(self, token):
if self.current_template_tokens_braces:
self.current_template_tokens_braces[-1] += 1
return token

@ply.lex.TOKEN(RBRACE)
Expand Down
21 changes: 21 additions & 0 deletions src/calmjs/parse/tests/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,18 @@
'template_literal_escape',
(r'`f\`o`',
[r'TEMPLATE_NOSUB `f\`o`']),
), (
'template_middle_with_object',
('`object${{1:1}} ${foo}`',
['TEMPLATE_HEAD `object${',
'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }',
'TEMPLATE_MIDDLE } ${', 'ID foo', 'TEMPLATE_TAIL }`']),
), (
'template_tail_with_object',
('`object${{1:1}}`',
['TEMPLATE_HEAD `object${',
'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }',
'TEMPLATE_TAIL }`']),
), (
'template_literal_assignment',
('s = `hello world`',
Expand Down Expand Up @@ -684,6 +696,15 @@
"var foo = `${foo}bar${baz}fail",
# the specific identifiers are not tracked, thus ...
"Unterminated template literal '`${...}bar${...}...' at 1:11",
), (
'unterminated_template_nested',
"var foo = `${`${foo}bar${baz}fail`}",
# the specific identifiers are not tracked, thus ...
"Unterminated template literal '`${...}' at 1:11",
), (
'unexpected_template_tail',
"var foo = `${value}`}`",
"Unexpected '}' at 1:21",
), (
'invalid_hex_sequence',
"var foo = `fail\\x1`",
Expand Down
116 changes: 116 additions & 0 deletions src/calmjs/parse/tests/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2871,6 +2871,24 @@ def parse_to_repr(value):
>>
]>
""",
), (
'template_with_many_rbrace',
"""
value = `string is ${wat}}}}`
""",
"""
<Program @1:1 ?children=[
<ExprStatement @1:1 expr=<Assign @1:7 left=<
Identifier @1:1 value='value'>,
op='=',
right=<TemplateLiteral @1:9 ?children=[
<TemplateHead @1:9 value='`string is ${'>,
<Identifier @1:22 value='wat'>,
<TemplateTail @1:25 value='}}}}`'>
]>
>>
]>
""",
), (
'template_in_template',
"""
Expand All @@ -2895,6 +2913,88 @@ def parse_to_repr(value):
>>
]>
""",
), (
'template_tail_with_object',
"""
value = `object${{1:1}}}`
""",
"""
<Program @1:1 ?children=[
<ExprStatement @1:1 expr=<Assign @1:7 left=<
Identifier @1:1 value='value'>, op='=',
right=<TemplateLiteral @1:9 ?children=[
<TemplateHead @1:9 value='`object${'>,
<Object @1:18 properties=[
<Assign @1:20 left=<Number @1:19 value='1'>,
op=':',
right=<Number @1:21 value='1'>>
]>,
<TemplateTail @1:23 value='}}`'>
]>
>>
]>
""",
), (
'template_middle_with_object',
"""
value = `object${{1:1}}middle${tail}`
""",
"""
<ES2015Program @1:1 ?children=[
<ExprStatement @1:1 expr=<Assign @1:7 left=<
Identifier @1:1 value='value'>,
op='=', right=<TemplateLiteral @1:9 ?children=[
<TemplateHead @1:9 value='`object${'>,
<Object @1:18 properties=[
<Assign @1:20 left=<Number @1:19 value='1'>,
op=':', right=<Number @1:21 value='1'>>
]>,
<TemplateMiddle @1:23 value='}middle${'>,
<Identifier @1:32 value='tail'>,
<TemplateTail @1:36 value='}`'>
]>
>>
]>
""",
), (
'template_with_object_with_template_with_object',
"""
value = `object
${{1:`${{
2:`${{3:3}}`
}}`}}
`
""",
r"""
<Program @1:1 ?children=[
<ExprStatement @1:1 expr=<Assign @1:7 left=<
Identifier @1:1 value='value'>,
op='=', right=<TemplateLiteral @1:9 ?children=[
<TemplateHead @1:9 value='`object\n${'>,
<Object @2:3 properties=[
<Assign @2:5 left=<Number @2:4 value='1'>, op=':',
right=<TemplateLiteral @2:6 ?children=[
<TemplateHead @2:6 value='`${'>,
<Object @2:9 properties=[
<Assign @3:4 left=<Number @3:3 value='2'>, op=':',
right=<TemplateLiteral @3:5 ?children=[
<TemplateHead @3:5 value='`${'>,
<Object @3:8 properties=[
<Assign @3:10 left=<Number @3:9 value='3'>,
op=':', right=<Number @3:11 value='3'>>
]>,
<TemplateTail @3:13 value='}`'>
]>>
]>,
<TemplateTail @4:2 value='}`'>
]>
>
]>,
<TemplateTail @4:5 value='}\n`'>
]>
>>
]>
""",
)]))


Expand Down Expand Up @@ -2983,6 +3083,22 @@ def build_es2015_syntax_error_test_cases(clsname, parse):
'empty_expression_in_template',
'`head${}tail`',
"Unexpected '}tail`' at 1:8 after '`head${' at 1:1",
), (
'mismatched_template_termination_eof',
"var foo = `${`${foo}bar${baz}fail`",
"Unexpected end of input after '}fail`' at 1:29",
), (
'mismatched_template_termination',
"var foo = `head${`${foo}bar${baz}fail`}",
"Unterminated template literal '`head${...}' at 1:11",
), (
'unexpected_block',
"var foo = `${{11}}`",
"Unexpected '}' at 1:17 after '11' at 1:15",
), (
'object_no_template_keys',
"var foo = {`foo`: `foo`}",
"Unexpected '`foo`' at 1:12 between '{' at 1:11 and ':' at 1:17",
)]), ECMASyntaxError)


Expand Down
8 changes: 8 additions & 0 deletions src/calmjs/parse/tests/test_es2015_lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ def test_initial_template_character(self):
self.assertEqual(
str(e.exception), "Unterminated template literal '`' at 1:1")

def test_invalid_template_token(self):
lexer = Lexer()
lexer.input('""')
token = lexer.next()
# force an invalid token into the statement
with self.assertRaises(ValueError):
lexer.t_TEMPLATE_RAW(token)


LexerKeywordTestCase = build_equality_testcase(
'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
Expand Down

0 comments on commit 9fb50df

Please sign in to comment.