Correct tracking of nested template states

- This must be done as templates can nest templates - Not to mention the fact that objects can be provided as values. Given they share the RBRACE symbol, there needs to be a way to disambiguate that symbol for objects and the opening of the template middle/tail fragments.
calmjs · Jul 3, 2020 · 9fb50df · 9fb50df
1 parent 35266cf
commit 9fb50df
Show file tree

Hide file tree

Showing 4 changed files with 182 additions and 8 deletions.
diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py
@@ -3,6 +3,8 @@
 ES2015 (ECMAScript 6th Edition/ES6) lexer.
 """
 
+from __future__ import unicode_literals
+
 import re
 import ply
 from itertools import chain
@@ -75,9 +77,9 @@ def broken_template_token_handler(lexer, token):
     if lexer.current_template_tokens:
         # join all tokens together
         tmpl = '...'.join(
-            t.value for t in chain(lexer.current_template_tokens, [token]))
-        lineno = lexer.current_template_tokens[0].lineno
-        colno = lexer.current_template_tokens[0].colno
+            t.value for t in chain(lexer.current_template_tokens[-1], [token]))
+        lineno = lexer.current_template_tokens[-1][0].lineno
+        colno = lexer.current_template_tokens[-1][0].colno
     else:
         tmpl = token.value
         lineno = token.lineno
@@ -96,6 +98,7 @@ def __init__(self, with_comments=False, yield_comments=False):
             with_comments=with_comments, yield_comments=yield_comments)
         self.error_token_handlers.append(broken_template_token_handler)
         self.current_template_tokens = []
+        self.current_template_tokens_braces = []
 
     # Punctuators (ES6)
     # t_DOLLAR_LBRACE  = r'${'
@@ -149,6 +152,7 @@ def __init__(self, with_comments=False, yield_comments=False):
     (?:`|\${))                         # closing ` or ${
     """
 
+    LBRACE        = r'{'
     RBRACE        = r'}'
 
     @ply.lex.TOKEN(template)
@@ -157,13 +161,38 @@ def t_TEMPLATE_RAW(self, token):
             if patt.match(token.value):
                 token.type = token_type
                 break
-        if token.type == 'TEMPLATE_HEAD':
-            self.current_template_tokens = [token]
-        elif token.type == 'TEMPLATE_MIDDLE':
-            self.current_template_tokens.append(token)
         else:
-            self.current_template_tokens = []
+            raise ValueError("invalid token %r" % token)
+
+        if token.type == 'TEMPLATE_HEAD':
+            self.current_template_tokens.append([token])
+            self.current_template_tokens_braces.append(0)
+            return token
+        elif token.type == 'TEMPLATE_NOSUB':
+            return token
+
+        if not self.current_template_tokens_braces:
+            raise ECMASyntaxError('Unexpected %s at %s:%s' % (
+                repr_compat('}'), token.lineno, self._get_colno(token)))
+        if self.current_template_tokens_braces[-1] > 0:
+            # produce a LBRACE token instead
+            self.current_template_tokens_braces[-1] -= 1
+            self.lexer.lexpos = self.lexer.lexpos - len(token.value) + 1
+            token.value = token.value[0]
+            token.type = 'RBRACE'
+            return token
+
+        if token.type == 'TEMPLATE_MIDDLE':
+            self.current_template_tokens[-1].append(token)
+        elif token.type == 'TEMPLATE_TAIL':
+            self.current_template_tokens_braces.pop()
+            self.current_template_tokens.pop()
+        return token
 
+    @ply.lex.TOKEN(LBRACE)
+    def t_LBRACE(self, token):
+        if self.current_template_tokens_braces:
+            self.current_template_tokens_braces[-1] += 1
         return token
 
     @ply.lex.TOKEN(RBRACE)

diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py
@@ -629,6 +629,18 @@
         'template_literal_escape',
         (r'`f\`o`',
          [r'TEMPLATE_NOSUB `f\`o`']),
+    ), (
+        'template_middle_with_object',
+        ('`object${{1:1}} ${foo}`',
+         ['TEMPLATE_HEAD `object${',
+          'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }',
+          'TEMPLATE_MIDDLE } ${', 'ID foo', 'TEMPLATE_TAIL }`']),
+    ), (
+        'template_tail_with_object',
+        ('`object${{1:1}}`',
+         ['TEMPLATE_HEAD `object${',
+          'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }',
+          'TEMPLATE_TAIL }`']),
     ), (
         'template_literal_assignment',
         ('s = `hello world`',
@@ -684,6 +696,15 @@
         "var foo = `${foo}bar${baz}fail",
         # the specific identifiers are not tracked, thus ...
         "Unterminated template literal '`${...}bar${...}...' at 1:11",
+    ), (
+        'unterminated_template_nested',
+        "var foo = `${`${foo}bar${baz}fail`}",
+        # the specific identifiers are not tracked, thus ...
+        "Unterminated template literal '`${...}' at 1:11",
+    ), (
+        'unexpected_template_tail',
+        "var foo = `${value}`}`",
+        "Unexpected '}' at 1:21",
     ), (
         'invalid_hex_sequence',
         "var foo = `fail\\x1`",

diff --git a/src/calmjs/parse/tests/parser.py b/src/calmjs/parse/tests/parser.py
@@ -2871,6 +2871,24 @@ def parse_to_repr(value):
           >>
         ]>
         """,
+    ), (
+        'template_with_many_rbrace',
+        """
+        value = `string is ${wat}}}}`
+        """,
+        """
+        <Program @1:1 ?children=[
+          <ExprStatement @1:1 expr=<Assign @1:7 left=<
+              Identifier @1:1 value='value'>,
+            op='=',
+            right=<TemplateLiteral @1:9 ?children=[
+              <TemplateHead @1:9 value='`string is ${'>,
+              <Identifier @1:22 value='wat'>,
+              <TemplateTail @1:25 value='}}}}`'>
+            ]>
+          >>
+        ]>
+        """,
     ), (
         'template_in_template',
         """
@@ -2895,6 +2913,88 @@ def parse_to_repr(value):
           >>
         ]>
         """,
+    ), (
+        'template_tail_with_object',
+        """
+        value = `object${{1:1}}}`
+        """,
+        """
+        <Program @1:1 ?children=[
+          <ExprStatement @1:1 expr=<Assign @1:7 left=<
+              Identifier @1:1 value='value'>, op='=',
+            right=<TemplateLiteral @1:9 ?children=[
+              <TemplateHead @1:9 value='`object${'>,
+              <Object @1:18 properties=[
+                <Assign @1:20 left=<Number @1:19 value='1'>,
+                  op=':',
+                  right=<Number @1:21 value='1'>>
+              ]>,
+              <TemplateTail @1:23 value='}}`'>
+            ]>
+          >>
+        ]>
+        """,
+    ), (
+        'template_middle_with_object',
+        """
+        value = `object${{1:1}}middle${tail}`
+        """,
+        """
+        <ES2015Program @1:1 ?children=[
+          <ExprStatement @1:1 expr=<Assign @1:7 left=<
+              Identifier @1:1 value='value'>,
+            op='=', right=<TemplateLiteral @1:9 ?children=[
+              <TemplateHead @1:9 value='`object${'>,
+              <Object @1:18 properties=[
+                <Assign @1:20 left=<Number @1:19 value='1'>,
+                  op=':', right=<Number @1:21 value='1'>>
+              ]>,
+              <TemplateMiddle @1:23 value='}middle${'>,
+              <Identifier @1:32 value='tail'>,
+              <TemplateTail @1:36 value='}`'>
+            ]>
+          >>
+        ]>
+        """,
+    ), (
+        'template_with_object_with_template_with_object',
+        """
+        value = `object
+        ${{1:`${{
+          2:`${{3:3}}`
+        }}`}}
+        `
+        """,
+        r"""
+        <Program @1:1 ?children=[
+          <ExprStatement @1:1 expr=<Assign @1:7 left=<
+              Identifier @1:1 value='value'>,
+            op='=', right=<TemplateLiteral @1:9 ?children=[
+              <TemplateHead @1:9 value='`object\n${'>,
+              <Object @2:3 properties=[
+                <Assign @2:5 left=<Number @2:4 value='1'>, op=':',
+                  right=<TemplateLiteral @2:6 ?children=[
+                    <TemplateHead @2:6 value='`${'>,
+                    <Object @2:9 properties=[
+                      <Assign @3:4 left=<Number @3:3 value='2'>, op=':',
+                        right=<TemplateLiteral @3:5 ?children=[
+                          <TemplateHead @3:5 value='`${'>,
+                        <Object @3:8 properties=[
+                          <Assign @3:10 left=<Number @3:9 value='3'>,
+                            op=':', right=<Number @3:11 value='3'>>
+                        ]>,
+                        <TemplateTail @3:13 value='}`'>
+                      ]>>
+                    ]>,
+                  <TemplateTail @4:2 value='}`'>
+                  ]>
+                >
+              ]>,
+              <TemplateTail @4:5 value='}\n`'>
+            ]>
+          >>
+        ]>
+        """,
     )]))
 
 
@@ -2983,6 +3083,22 @@ def build_es2015_syntax_error_test_cases(clsname, parse):
         'empty_expression_in_template',
         '`head${}tail`',
         "Unexpected '}tail`' at 1:8 after '`head${' at 1:1",
+    ), (
+        'mismatched_template_termination_eof',
+        "var foo = `${`${foo}bar${baz}fail`",
+        "Unexpected end of input after '}fail`' at 1:29",
+    ), (
+        'mismatched_template_termination',
+        "var foo = `head${`${foo}bar${baz}fail`}",
+        "Unterminated template literal '`head${...}' at 1:11",
+    ), (
+        'unexpected_block',
+        "var foo = `${{11}}`",
+        "Unexpected '}' at 1:17 after '11' at 1:15",
+    ), (
+        'object_no_template_keys',
+        "var foo = {`foo`: `foo`}",
+        "Unexpected '`foo`' at 1:12 between '{' at 1:11 and ':' at 1:17",
     )]), ECMASyntaxError)
 
 

diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py
@@ -29,6 +29,14 @@ def test_initial_template_character(self):
         self.assertEqual(
             str(e.exception), "Unterminated template literal '`' at 1:1")
 
+    def test_invalid_template_token(self):
+        lexer = Lexer()
+        lexer.input('""')
+        token = lexer.next()
+        # force an invalid token into the statement
+        with self.assertRaises(ValueError):
+            lexer.t_TEMPLATE_RAW(token)
+
 
 LexerKeywordTestCase = build_equality_testcase(
     'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (