From 037d814b10b30e43babccab531ccdb91e51a7a52 Mon Sep 17 00:00:00 2001 From: "SPRENGER Julia, NEA/SCI/DB" Date: Wed, 9 Oct 2024 13:01:38 +0200 Subject: [PATCH 1/6] Add draft version of multi-line grammar --- src/larktools/ebnf_grammar.py | 6 +++++- src/larktools/evaluation.py | 20 ++++++++++++++++++++ tests/test_syntax.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/test_syntax.py diff --git a/src/larktools/ebnf_grammar.py b/src/larktools/ebnf_grammar.py index 8432ceb..2ae9def 100644 --- a/src/larktools/ebnf_grammar.py +++ b/src/larktools/ebnf_grammar.py @@ -15,7 +15,7 @@ start: assign_var - assign_var: VARNAME "=" arith_expr + assign_var: VARNAME "=" multi_lines VARNAME: LETTER (LETTER | DIGIT)* @@ -24,6 +24,9 @@ // but without the fancy tree shaping directives explained at // https://lark-parser.readthedocs.io/en/stable/tree_construction.html + multi_lines: arith_expr | composed_lines + composed_lines: arith_expr _NL multi_lines + arith_expr: sum sum: product | addition | subtraction addition: sum "+" product @@ -55,4 +58,5 @@ %import common.WS_INLINE %ignore WS_INLINE + %import common.NEWLINE -> _NL """ diff --git a/src/larktools/evaluation.py b/src/larktools/evaluation.py index d480b68..b2128f1 100644 --- a/src/larktools/evaluation.py +++ b/src/larktools/evaluation.py @@ -109,3 +109,23 @@ def eval_bracketed_arith_expr(node, env): child = get_children(node)[0] assert get_name(child) == "arith_expr" return eval_arith_expr(child, env) + +def eval_composed_lines(node, env): + # the newline break does not appear as child node + # return only the result of the latter evaluation + child1 = get_children(node)[0] + child2 = get_children(node)[1] + assert get_name(child1) == "arith_expr" + eval_arith_expr(child1, env) + assert get_name(child2) == "multi_line" + return eval_multi_line(child2, env) + +def eval_multi_line(node, env): + # this can be either an arithmetic expression or + # composed lines + child = get_children(node)[0] + child_name = get_name(child) + if child_name == "arith_expr": + return eval_arith_expr(child) + elif child_name == "composed_lines": + return eval_composed_lines(child) diff --git a/tests/test_syntax.py b/tests/test_syntax.py new file mode 100644 index 0000000..26e93cf --- /dev/null +++ b/tests/test_syntax.py @@ -0,0 +1,28 @@ +import pytest +from typing import Optional, Union + +from lark import Lark + +from larktools.ebnf_grammar import grammar +from larktools.evaluation import eval_arith_expr + + +class SyntaxParser: + def __init__(self): + self.parser = Lark(grammar, parser="lalr", start="arith_expr") + self.parse = self.parser.parse + + def parse_and_eval(self, expression: str, env: Optional[Union[None, dict]] = None) -> Union[int, float]: + tree = self.parse(expression) + res = eval_arith_expr(tree, {} if env is None else env) + return res + + +def _parse_and_assert(expression: str, expected: Union[int, float]) -> None: + parser = SyntaxParser() + res = parser.parse_and_eval(expression) + assert expected == res + +def test_multi_line(): + _parse_and_assert("5\n8",8) + _parse_and_assert("5+5\n3+4\1+2", 3) From 754fa3b04ecedd7d127948e7f94bcc9a4a3f367e Mon Sep 17 00:00:00 2001 From: "SPRENGER Julia, NEA/SCI/DB" Date: Wed, 9 Oct 2024 14:03:17 +0200 Subject: [PATCH 2/6] Fix grammar start point for tests --- src/larktools/ebnf_grammar.py | 6 +++--- tests/test_syntax.py | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/larktools/ebnf_grammar.py b/src/larktools/ebnf_grammar.py index 2ae9def..e9ca98b 100644 --- a/src/larktools/ebnf_grammar.py +++ b/src/larktools/ebnf_grammar.py @@ -15,7 +15,7 @@ start: assign_var - assign_var: VARNAME "=" multi_lines + assign_var: VARNAME "=" multi_line_block VARNAME: LETTER (LETTER | DIGIT)* @@ -24,8 +24,8 @@ // but without the fancy tree shaping directives explained at // https://lark-parser.readthedocs.io/en/stable/tree_construction.html - multi_lines: arith_expr | composed_lines - composed_lines: arith_expr _NL multi_lines + multi_line_block: arith_expr | composed_lines | _NL* multi_line_block _NL* + composed_lines: arith_expr _NL multi_line_block arith_expr: sum sum: product | addition | subtraction diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 26e93cf..ba4c27a 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -4,17 +4,17 @@ from lark import Lark from larktools.ebnf_grammar import grammar -from larktools.evaluation import eval_arith_expr +from larktools.evaluation import eval_multi_line_block class SyntaxParser: def __init__(self): - self.parser = Lark(grammar, parser="lalr", start="arith_expr") + self.parser = Lark(grammar, parser="lalr", start="multi_line_block") self.parse = self.parser.parse def parse_and_eval(self, expression: str, env: Optional[Union[None, dict]] = None) -> Union[int, float]: tree = self.parse(expression) - res = eval_arith_expr(tree, {} if env is None else env) + res = eval_multi_line_block(tree, {} if env is None else env) return res @@ -25,4 +25,5 @@ def _parse_and_assert(expression: str, expected: Union[int, float]) -> None: def test_multi_line(): _parse_and_assert("5\n8",8) - _parse_and_assert("5+5\n3+4\1+2", 3) + _parse_and_assert("5+5\n3+4\n1+2", 3) + _parse_and_assert("\n\n5\n\n3\8", 8) From 87d8041f4890bdaf458c54b701fc77dc3dd09135 Mon Sep 17 00:00:00 2001 From: "SPRENGER Julia, NEA/SCI/DB" Date: Wed, 9 Oct 2024 14:03:48 +0200 Subject: [PATCH 3/6] Add missing eval params --- src/larktools/evaluation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/larktools/evaluation.py b/src/larktools/evaluation.py index b2128f1..de5f1db 100644 --- a/src/larktools/evaluation.py +++ b/src/larktools/evaluation.py @@ -117,15 +117,15 @@ def eval_composed_lines(node, env): child2 = get_children(node)[1] assert get_name(child1) == "arith_expr" eval_arith_expr(child1, env) - assert get_name(child2) == "multi_line" - return eval_multi_line(child2, env) + assert get_name(child2) == "multi_line_block" + return eval_multi_line_block(child2, env) -def eval_multi_line(node, env): +def eval_multi_line_block(node, env): # this can be either an arithmetic expression or # composed lines child = get_children(node)[0] child_name = get_name(child) if child_name == "arith_expr": - return eval_arith_expr(child) + return eval_arith_expr(child, env) elif child_name == "composed_lines": - return eval_composed_lines(child) + return eval_composed_lines(child, env) From 489234189a5fef4922c2efae3e4528f4c5620bf3 Mon Sep 17 00:00:00 2001 From: "SPRENGER Julia, NEA/SCI/DB" Date: Wed, 9 Oct 2024 17:14:39 +0200 Subject: [PATCH 4/6] Add development notes --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7d87f48..764befa 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,7 @@ To run the test suite, `pytest` is recommended, and can be done via: ```bash pytest -v --maxfail=1 larktools/tests/test_suite.py ``` + +### Debugging + +For grammar development using an alternative, less optimized parsing strategy can help avoiding rule conflicts: Use `earley` instead of `lalr` From 8c1b806caaebbaba3ce8039886804ffe531433d8 Mon Sep 17 00:00:00 2001 From: "SPRENGER Julia, NEA/SCI/DB" Date: Wed, 9 Oct 2024 17:15:32 +0200 Subject: [PATCH 5/6] Fix grammar to be lalr compatible and extend evaluation and tests --- src/larktools/ebnf_grammar.py | 6 ++++-- src/larktools/evaluation.py | 28 +++++++++++++--------------- tests/test_syntax.py | 7 +++++-- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/larktools/ebnf_grammar.py b/src/larktools/ebnf_grammar.py index e9ca98b..ac90bbc 100644 --- a/src/larktools/ebnf_grammar.py +++ b/src/larktools/ebnf_grammar.py @@ -24,8 +24,10 @@ // but without the fancy tree shaping directives explained at // https://lark-parser.readthedocs.io/en/stable/tree_construction.html - multi_line_block: arith_expr | composed_lines | _NL* multi_line_block _NL* - composed_lines: arith_expr _NL multi_line_block + + line: arith_expr + + multi_line_block: (line _NL? | _NL )* arith_expr: sum sum: product | addition | subtraction diff --git a/src/larktools/evaluation.py b/src/larktools/evaluation.py index de5f1db..c22c0d8 100644 --- a/src/larktools/evaluation.py +++ b/src/larktools/evaluation.py @@ -110,22 +110,20 @@ def eval_bracketed_arith_expr(node, env): assert get_name(child) == "arith_expr" return eval_arith_expr(child, env) -def eval_composed_lines(node, env): - # the newline break does not appear as child node - # return only the result of the latter evaluation - child1 = get_children(node)[0] - child2 = get_children(node)[1] - assert get_name(child1) == "arith_expr" - eval_arith_expr(child1, env) - assert get_name(child2) == "multi_line_block" - return eval_multi_line_block(child2, env) - -def eval_multi_line_block(node, env): - # this can be either an arithmetic expression or - # composed lines +def eval_line(node, env): + # this is the content of a single line of input child = get_children(node)[0] child_name = get_name(child) if child_name == "arith_expr": return eval_arith_expr(child, env) - elif child_name == "composed_lines": - return eval_composed_lines(child, env) + +def eval_multi_line_block(node, env): + # this can be either an arithmetic expression or + # composed lines + children = get_children(node) + for child in children: + child_name = get_name(child) + assert child_name == "line" + res = eval_line(child, env) + return res + diff --git a/tests/test_syntax.py b/tests/test_syntax.py index ba4c27a..b158516 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -24,6 +24,9 @@ def _parse_and_assert(expression: str, expected: Union[int, float]) -> None: assert expected == res def test_multi_line(): - _parse_and_assert("5\n8",8) + _parse_and_assert("5\n8", 8) + _parse_and_assert("\n\n\n8", 8) + _parse_and_assert("8\n\n\n", 8) _parse_and_assert("5+5\n3+4\n1+2", 3) - _parse_and_assert("\n\n5\n\n3\8", 8) + _parse_and_assert("\n\n5\n\n3\n8", 8) + From c57b57193b12621085ad50e463c11fe5db1d6f2b Mon Sep 17 00:00:00 2001 From: julia-sprenger <144245197+julia-sprenger@users.noreply.github.com> Date: Thu, 10 Oct 2024 12:07:36 +0200 Subject: [PATCH 6/6] only complete expressions can be assigned to a variable --- src/larktools/ebnf_grammar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/larktools/ebnf_grammar.py b/src/larktools/ebnf_grammar.py index ac90bbc..3cc58d1 100644 --- a/src/larktools/ebnf_grammar.py +++ b/src/larktools/ebnf_grammar.py @@ -15,7 +15,7 @@ start: assign_var - assign_var: VARNAME "=" multi_line_block + assign_var: VARNAME "=" arith_expr VARNAME: LETTER (LETTER | DIGIT)*