Provide actual template literal lexical components

- Mark the generated tokens with the appropriate types.
calmjs · Apr 22, 2018 · 6fe44ff · 6fe44ff
1 parent 4fbf1db
commit 6fe44ff
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 21 deletions.
diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py
@@ -3,10 +3,22 @@
 ES2015 (ECMAScript 6th Edition/ES6) lexer.
 """
 
+import re
 import ply
 
 from calmjs.parse.lexers.es5 import Lexer as ES5Lexer
 
+template_token_types = (
+    (re.compile(r'`.*`', re.S),
+        'TEMPLATE_NOSUB'),
+    (re.compile(r'`.*\${', re.S),
+        'TEMPLATE_HEAD'),
+    (re.compile(r'}.*\${', re.S),
+        'TEMPLATE_MIDDLE'),
+    (re.compile(r'}.*`', re.S),
+        'TEMPLATE_TAIL'),
+)
+
 
 class Lexer(ES5Lexer):
     """
@@ -25,7 +37,7 @@ class Lexer(ES5Lexer):
         'ARROW', 'SPREAD',    # => ...
 
         # ES2015 terminal types
-        'TEMPLATE',
+        'TEMPLATE_NOSUB', 'TEMPLATE_HEAD', 'TEMPLATE_MIDDLE', 'TEMPLATE_TAIL',
     )
 
     template = r"""
@@ -42,8 +54,8 @@ class Lexer(ES5Lexer):
     """  # `
 
     @ply.lex.TOKEN(template)
-    def t_TEMPLATE(self, token):
-        # remove escape + new line sequence used for strings
-        # written across multiple lines of code
-        token.value = token.value.replace('\\\n', '')
+    def t_TEMPLATE_RAW(self, token):
+        for patt, token_type in template_token_types:
+            if patt.match(token.value):
+                token.type = token_type
         return token
diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py
@@ -464,56 +464,62 @@
         ('const c',
          ['CONST const', 'ID c']),
     ), (
-        'arrow_punctuator',
-        ('=>',
-         ['ARROW =>']),
+        'punctuators',
+        ('=> ...',
+         ['ARROW =>', 'SPREAD ...']),
     ), (
         'arrow_functions',
         ('const c = (name) => { return name; }',
          ['CONST const', 'ID c', 'EQ =', 'LPAREN (', 'ID name', 'RPAREN )',
           'ARROW =>', 'LBRACE {', 'RETURN return', 'ID name', 'SEMI ;',
           'RBRACE }']),
+    ), (
+        'spread',
+        ('[...spring, ...summer]',
+         ['LBRACKET [', 'SPREAD ...', 'ID spring', 'COMMA ,', 'SPREAD ...',
+          'ID summer', 'RBRACKET ]']),
     ), (
         'template_literal',
         ('`foo`',
-         ['TEMPLATE `foo`']),
+         ['TEMPLATE_NOSUB `foo`']),
     ), (
         'template_multiline',
         ('`foo\nbar\r\nfoo`',
-         ['TEMPLATE `foo\nbar\r\nfoo`']),
+         ['TEMPLATE_NOSUB `foo\nbar\r\nfoo`']),
     ), (
         'template_other_newlines',
         ('`foo\u2028\u2029foo`',
-         ['TEMPLATE `foo\u2028\u2029foo`']),
+         ['TEMPLATE_NOSUB `foo\u2028\u2029foo`']),
     ), (
         'template_literal_with_dollar',
         ('`foo$`',
-         ['TEMPLATE `foo$`']),
+         ['TEMPLATE_NOSUB `foo$`']),
     ), (
         'template_head_tail',
         (r'`hello ${name} while this`',
-         ['TEMPLATE `hello ${', 'ID name', 'TEMPLATE } while this`']),
+         ['TEMPLATE_HEAD `hello ${', 'ID name', 'TEMPLATE_TAIL } while this`']),
     ), (
         'template_empty_head_tail',
         (r'`${name}`',
-         ['TEMPLATE `${', 'ID name', 'TEMPLATE }`']),
+         ['TEMPLATE_HEAD `${', 'ID name', 'TEMPLATE_TAIL }`']),
     ), (
         'template_nested',
         (r'`${`${a * 2}`} ${b}`',
-         ['TEMPLATE `${', 'TEMPLATE `${', 'ID a', 'MULT *', 'NUMBER 2',
-          'TEMPLATE }`', 'TEMPLATE } ${', 'ID b', 'TEMPLATE }`']),
+         ['TEMPLATE_HEAD `${', 'TEMPLATE_HEAD `${', 'ID a', 'MULT *',
+          'NUMBER 2', 'TEMPLATE_TAIL }`', 'TEMPLATE_MIDDLE } ${', 'ID b',
+          'TEMPLATE_TAIL }`']),
     ), (
         'template_some_keywords',
         (r'`this -> ${this}.`',
-         ['TEMPLATE `this -> ${', 'THIS this', 'TEMPLATE }.`']),
+         ['TEMPLATE_HEAD `this -> ${', 'THIS this', 'TEMPLATE_TAIL }.`']),
     ), (
         'template_literal_escape',
         (r'`f\`o`',
-         [r'TEMPLATE `f\`o`']),
+         [r'TEMPLATE_NOSUB `f\`o`']),
     ), (
         'template_literal_assignment',
         ('s = `hello world`',
-         ['ID s', 'EQ =', 'TEMPLATE `hello world`']),
+         ['ID s', 'EQ =', 'TEMPLATE_NOSUB `hello world`']),
     )
 ]
 

diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py
@@ -65,11 +65,11 @@ def test_initial_template_character(self):
 )
 
 LexerES5TestCase = build_equality_testcase(
-    'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
+    'LexerES5TestCase', partial(run_lexer, lexer_cls=Lexer), (
         (label, data[0], data[1],) for label, data in es5_cases))
 
 LexerES5PosTestCase = build_equality_testcase(
-    'LexerPosTestCase', partial(
+    'LexerES5PosTestCase', partial(
         run_lexer_pos, lexer_cls=Lexer), es5_pos_cases)
 
 LexerES2015TestCase = build_equality_testcase(