From cfb43c1d2247c0ee77c8d2933ef647faaeac1541 Mon Sep 17 00:00:00 2001 From: James Prevett Date: Sat, 14 Dec 2024 13:23:56 -0600 Subject: [PATCH] BNF grammar now ignores new lines Fixed BNF strings and comments Fixed parser crashing when ending on an ignored token --- {test => grammars}/asm.bnf | 3 ++- src/bnf.bnf | 12 ++++++------ src/bnf.json | 11 ++++------- src/parser.ts | 1 + 4 files changed, 13 insertions(+), 14 deletions(-) rename {test => grammars}/asm.bnf (91%) diff --git a/test/asm.bnf b/grammars/asm.bnf similarity index 91% rename from test/asm.bnf rename to grammars/asm.bnf index 1a80021..994dab4 100644 --- a/test/asm.bnf +++ b/grammars/asm.bnf @@ -10,7 +10,8 @@ comment = "#.*"; ##ignore whitespace line_terminator comment -operand = register | immediate | number; +operand = register +| immediate | number; operand_list = operand, {",", operand}; diff --git a/src/bnf.bnf b/src/bnf.bnf index f536a55..b31378b 100644 --- a/src/bnf.bnf +++ b/src/bnf.bnf @@ -1,9 +1,9 @@ -string = '"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\''; +string = '("(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\')'; identifier = '[a-zA-Z_]\w*'; whitespace = '[ \t]+'; -line_terminator = "[\n;]+"; -comment = "#[^\n]*"; +line_terminator = "\n+"; +comment = "#(?!#)([^\n]*)"; directive = "##\w+ [^\n]*"; # Simplifies internals @@ -16,7 +16,7 @@ right_bracket = '\]'; left_brace = '\{'; right_brace = '\}'; -##ignore whitespace comment +##ignore whitespace comment line_terminator parenthesized = left_param, expression, right_param; optional = left_bracket, expression, right_bracket; @@ -32,6 +32,6 @@ expression = term, {',', term}; rule = identifier, '=', expression, ';'; -rule_list = (rule | comment | directive), {line_terminator, rule | comment | directive}; +rule_list = (rule | directive), {rule | directive}; -##root rule_list \ No newline at end of file +##root rule_list diff --git a/src/bnf.json b/src/bnf.json index 5c284e2..dbdeae5 100644 --- a/src/bnf.json +++ b/src/bnf.json @@ -92,7 +92,6 @@ "type": "oneof", "pattern": [ { "kind": "rule", "type": "required" }, - { "kind": "comment", "type": "required" }, { "kind": "directive", "type": "required" } ] }, @@ -100,9 +99,7 @@ "name": "rule_list#1", "type": "oneof", "pattern": [ - { "kind": "line_terminator", "type": "required" }, { "kind": "rule", "type": "required" }, - { "kind": "comment", "type": "required" }, { "kind": "directive", "type": "required" } ] }, @@ -116,11 +113,11 @@ } ], "literals": [ - { "name": "string", "pattern": "\"(?:\\\\.|[^\"\\\\])*\"|'(?:\\\\.|[^'\\\\])*'" }, + { "name": "string", "pattern": "(\"(?:\\\\.|[^\"\\\\])*\"|'(?:\\\\.|[^'\\\\])*')" }, { "name": "identifier", "pattern": "[a-zA-Z_]\\w*" }, { "name": "whitespace", "pattern": "[ \\t]+" }, - { "name": "line_terminator", "pattern": "[\\n;]+" }, - { "name": "comment", "pattern": "#[^\\n]*" }, + { "name": "line_terminator", "pattern": "\\n+" }, + { "name": "comment", "pattern": "#(?!#)([^\\n]*)" }, { "name": "directive", "pattern": "##\\w+ [^\\n]*" }, { "name": "pipe", "pattern": "\\|" }, { "name": "left_param", "pattern": "\\(" }, @@ -134,5 +131,5 @@ { "name": ";", "pattern": ";" } ], "rootNode": "rule_list", - "ignoreLiterals": ["whitespace", "comment"] + "ignoreLiterals": ["whitespace", "comment", "line_terminator"] } diff --git a/src/parser.ts b/src/parser.ts index da02fc6..d813fea 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -147,6 +147,7 @@ export function parse(options: ParseOptions): Node[] { while (position < tokens.length) { const node = parseNode(options.rootNode); if (!node) { + if (position >= tokens.length && options.ignoreLiterals.includes(tokens.at(-1)!.kind)) break; const token = tokens[dirtyPosition || position]; throw new Error(`Unexpected token "${token.text}" at line ${token.line}, column ${token.column}`); }