From 274d22c3649277dca19df335cfdd1e060e280790 Mon Sep 17 00:00:00 2001 From: "R. Bernstein" Date: Sun, 5 Jan 2025 18:11:55 -0500 Subject: [PATCH] Add code to recognize nested box operators (#1263) A first cut at detecting nested box operators. --- mathics/core/parser/parser.py | 349 +++++++++++++++++----------- mathics/data/.gitignore | 1 + test/core/parser/test_box_parser.py | 68 ++++++ test/core/parser/test_parser.py | 2 +- 4 files changed, 288 insertions(+), 132 deletions(-) create mode 100644 test/core/parser/test_box_parser.py diff --git a/mathics/core/parser/parser.py b/mathics/core/parser/parser.py index edc16c07c..f0010db28 100644 --- a/mathics/core/parser/parser.py +++ b/mathics/core/parser/parser.py @@ -24,9 +24,10 @@ String, Symbol, ) -from mathics.core.parser.operators import ( # box_operators, # Soon to come... +from mathics.core.parser.operators import ( all_operators, binary_operators, + box_operators, flat_binary_operators, inequality_operators, left_binary_operators, @@ -123,16 +124,16 @@ def expect(self, expected_tag: str): self.tokeniser.sntx_message(token.pos) raise InvalidSyntaxError() - def incomplete(self, pos): + def incomplete(self, pos: int): self.tokeniser.incomplete() self.backtrack(pos) - def next(self): + def next(self) -> Token: if self.current_token is None: self.current_token = self.tokeniser.next() return self.current_token - def next_noend(self): + def next_noend(self) -> Token: "returns next token which is not END" while True: token = self.next() @@ -140,9 +141,7 @@ def next_noend(self): return token self.incomplete(token.pos) - # FIXME: Should be Optional[Node], but parse_seq returns - # a list. - def parse(self, feeder) -> Union[Node, Optional[list]]: + def parse(self, feeder) -> Optional[Node]: """ top-level parsing routine. This kicks off parsing by doing some initialization and then calling @@ -155,16 +154,14 @@ def parse(self, feeder) -> Union[Node, Optional[list]]: self.box_depth = 0 return self.parse_e() - # FIXME: Should be Optional[Node], but parse_seq returns - # a list. - def parse_e(self) -> Union[Node, Optional[list]]: + def parse_e(self) -> Optional[Node]: """ Parse the single top-level or "start" expression. This is called right after doing parse setup. """ result = [] while self.next().tag != "END": - result.append(self.parse_exp(NEVER_ADD_PARENTHESIS)) + result.append(self.parse_expr(NEVER_ADD_PARENTHESIS)) if len(result) > 1: return Node("Times", *result) if len(result) == 1: @@ -172,7 +169,7 @@ def parse_e(self) -> Union[Node, Optional[list]]: else: return None - def parse_binary( + def parse_binary_operator( self, expr1, token: Token, expr1_precedence: int ) -> Optional[Node]: """ @@ -211,7 +208,7 @@ def parse_binary( self.consume() if tag not in right_binary_operators: operator_precedence += 1 - expr2 = self.parse_exp(operator_precedence) + expr2 = self.parse_expr(operator_precedence) # Handle nonassociative operators if ( @@ -230,22 +227,33 @@ def parse_binary( return result - def parse_box(self, precedence: int) -> Union[String, Node]: - """ - Return the parsed boxed expression for the current - sequence of tokens. + def parse_box_expr(self, precedence: int) -> Union[String, Node]: + r""" + Parse a box expression returning an AST Node tree for this. If there is only an Atom we return a String of that. - Otherwise we return the Node parse expression. + Otherwise we return an AST Node tree for this. + + This code recognizes grammar rules of the form: + + + | \( box-expr \) + | \( box-expr box-expr \) + """ result = None new_result = None while True: - token = self.next() + if self.box_depth > 0: + token = self.next_noend() + else: + token = self.next() tag = token.tag method = getattr(self, "b_" + tag, None) if method is not None: new_result = method(result, token, precedence) + elif tag in box_operators: + new_result = self.parse_box_operator(result, token, precedence) elif tag in ("OtherscriptBox", "RightRowBox"): break elif tag == "END": @@ -265,6 +273,64 @@ def parse_box(self, precedence: int) -> Union[String, Node]: result = NullString return result + def parse_box_operator( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + """ + Implements parsing and tranformation of box operators: + box_expr1 box_expr2 + when it is applicable. + + When called, we have parsed "box_expr1" and seen passed as "token". This routine + may cause box_expr2 to get scanned and parsed. + + "box_expr1_precendence" is the precedence of "box_expr1" and is used + to determine whether parsing should be interpreted as: + + (... box_expr1) box_expr2 + + or: + ... (box_expr1 box_expr2) + + In the first case, we will return None (no further tokens + added). A higher level will handle group (... box_expr1) and + pass that as box_expr1 in another call to this routine. + + In this situation, this routine will get called again with a + new box_expr1 that contains (... box_expr1). + + However, in the second case: + ...(box_expr1 box_expr2), + + we return Node(, expr1, expr2) + """ + tag = token.tag + operator_precedence = binary_operators[tag] + if box_expr1_precedence > operator_precedence: + return None + self.consume() + + # We don't handle any notion of right associativity yet, + # if there is such a thing.... + # if tag not in right_box_operators: + # operator_precedence += 1 + + box_expr2 = self.parse_expr(operator_precedence) + + # Is there such a thing as non-associative box operators? + # Handle nonassociative box operators + # if ( + # tag in nonassoc_binary_operators + # and expr1.get_head_name() == tag + # and not expr1.parenthesised + # ): + # self.tokeniser.sntx_message(token.pos) + # raise InvalidSyntaxError() + + result = Node(tag, box_expr1, box_expr2) + + return result + def parse_comparison( self, expr1, token: Token, expr1_precedence: int ) -> Optional[Node]: @@ -301,7 +367,7 @@ def parse_comparison( return None self.consume() head = expr1.get_head_name() - expr2 = self.parse_exp(operator_precedence + 1) + expr2 = self.parse_expr(operator_precedence + 1) if head == "Inequality" and not expr1.parenthesised: expr1.children.append(Symbol(tag)) expr1.children.append(expr2) @@ -320,24 +386,35 @@ def parse_comparison( expr1 = Node(tag, expr1, expr2).flatten() return expr1 - def parse_exp(self, expr1_precedence: int) -> Optional[Node]: + def parse_expr(self, precedence: int) -> Optional[Node]: """ - Parse an expression. + Parse an expression returning an AST Node tree for this. - Used to implement recognizing grammar rules of the form: + This code recognizes grammar rules of the form: - expr : - | expr1 inequality_operator expr2 ... - | expr1 binary_operator expr2 ... - | expr1 ternary_operator expr2 ternary_operator2 expr3 ... - | expr1 postfix_operator ... - | expr1 expr2 ... (* implicit multiplication *) + + | expr1 inequality_operator expr2 ... + | expr1 binary_operator expr2 ... + | expr1 ternary_operator expr2 ternary_operator2 expr3 ... + | expr1 postfix_operator ... + | expr1 expr2 ... (* implicit multiplication *) and transforming this into its corresponding Node S-expression form. + + "precedence" is an operator precedence of the parent node which is + often the operator token seen just before a grouping symbol which probably + caused "parse_expr" to get called. For example in: + (a + b) * (c + d) + or equivalently: + (a + b) (c + d) + or + (a + b)(c + d) + + if we have been called to parse "(c + d)", "precedence" will be the + precedence for "Times", also sometimes known as "*". """ result = self.parse_p() - # The while loop below flattens expressions. while True: if self.bracket_depth > 0: token = self.next_noend() @@ -346,22 +423,22 @@ def parse_exp(self, expr1_precedence: int) -> Optional[Node]: tag = token.tag method = getattr(self, "e_" + tag, None) if method is not None: - new_result = method(result, token, expr1_precedence) + new_result = method(result, token, precedence) elif tag in inequality_operators: - new_result = self.parse_comparison(result, token, expr1_precedence) + new_result = self.parse_comparison(result, token, precedence) elif tag in binary_operators: - new_result = self.parse_binary(result, token, expr1_precedence) + new_result = self.parse_binary_operator(result, token, precedence) elif tag in ternary_operators: - new_result = self.parse_ternary(result, token, expr1_precedence) + new_result = self.parse_ternary_operator(result, token, precedence) elif tag in postfix_operators: - new_result = self.parse_postfix(result, token, expr1_precedence) + new_result = self.parse_postfix(result, token, precedence) elif ( tag not in self.halt_tags - and flat_binary_operators["Times"] >= expr1_precedence + and flat_binary_operators["Times"] >= precedence ): # implicit multiplication q = flat_binary_operators["Times"] - child = self.parse_exp(q + 1) + child = self.parse_expr(q + 1) new_result = Node("Times", result, child).flatten() else: new_result = None @@ -384,8 +461,8 @@ def parse_p(self): return method(token) elif tag in prefix_operators: self.consume() - q = prefix_operators[tag] - child = self.parse_exp(q) + operator_precedence = prefix_operators[tag] + child = self.parse_expr(operator_precedence) return Node(tag, child) else: self.tokeniser.sntx_message(token.pos) @@ -433,7 +510,7 @@ def parse_seq(self) -> list: result.append(NullSymbol) break else: - result.append(self.parse_exp(NEVER_ADD_PARENTHESIS)) + result.append(self.parse_expr(NEVER_ADD_PARENTHESIS)) token = self.next_noend() tag = token.tag if tag == "RawComma": @@ -443,14 +520,14 @@ def parse_seq(self) -> list: break return result - def parse_ternary( + def parse_ternary_operator( self, expr1, token: Token, expr1_precedence: int ) -> Optional[Node]: raise NotImplementedError # B methods # - # b_xxx methods are called from parse_box. + # b_xxx methods are called from parse_box_expr. # They expect args (Node, Token precedence) and return Node or None. # The first argument may be None if the LHS is absent. # Used for boxes. @@ -459,98 +536,110 @@ def b_SqrtBox(self, box0, token: Token, p: int) -> Optional[Node]: if box0 is not None: return None self.consume() - q = all_operators["SqrtBox"] - box1 = self.parse_box(q) + operator_precedence = all_operators["SqrtBox"] + box_expr1 = self.parse_box_expr(operator_precedence) if self.next().tag == "OtherscriptBox": self.consume() - box2 = self.parse_box(q) - return Node("RadicalBox", box1, box2) + box2 = self.parse_box_expr(operator_precedence) + return Node("RadicalBox", box_expr1, box2) else: - return Node("SqrtBox", box1) + return Node("SqrtBox", box_expr1) - def b_SuperscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["SuperscriptBox"] - if q < p: + def b_SuperscriptBox( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + operator_precedence = all_operators["SuperscriptBox"] + if box_expr1_precedence > operator_precedence: return None - if box1 is None: - box1 = NullString + if box_expr1 is None: + box_expr1 = NullString self.consume() - box2 = self.parse_box(q) + box2 = self.parse_box_expr(operator_precedence) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(all_operators["SubsuperscriptBox"]) - return Node("SubsuperscriptBox", box1, box3, box2) + box3 = self.parse_box_expr(all_operators["SubsuperscriptBox"]) + return Node("SubsuperscriptBox", box_expr1, box3, box2) else: - return Node("SuperscriptBox", box1, box2) + return Node("SuperscriptBox", box_expr1, box2) - def b_SubscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["SubscriptBox"] - if q < p: + def b_SubscriptBox( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + operator_precedence = all_operators["SubscriptBox"] + if box_expr1_precedence > operator_precedence: return None - if box1 is None: - box1 = NullString + if box_expr1 is None: + box_expr1 = NullString self.consume() - box2 = self.parse_box(q) + box_expr2 = self.parse_box_expr(operator_precedence) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(all_operators["SubsuperscriptBox"]) - return Node("SubsuperscriptBox", box1, box2, box3) + box_expr3 = self.parse_box_expr(all_operators["SubsuperscriptBox"]) + return Node("SubsuperscriptBox", box_expr1, box_expr2, box_expr3) else: - return Node("SubscriptBox", box1, box2) + return Node("SubscriptBox", box_expr1, box_expr2) - def b_UnderscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["UnderscriptBox"] - if q < p: + def b_UnderscriptBox( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + operator_precedence = all_operators["UnderscriptBox"] + if box_expr1_precedence > operator_precedence: return None - if box1 is None: - box1 = NullString + if box_expr1 is None: + box_expr1 = NullString self.consume() - box2 = self.parse_box(q) + box_expr2 = self.parse_box_expr(operator_precedence) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(all_operators["UnderoverscriptBox"]) - return Node("UnderoverscriptBox", box1, box2, box3) + box_expr3 = self.parse_box_expr(all_operators["UnderoverscriptBox"]) + return Node("UnderoverscriptBox", box_expr1, box_expr2, box_expr3) else: - return Node("UnderscriptBox", box1, box2) + return Node("UnderscriptBox", box_expr1, box_expr2) - def b_FractionBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["FractionBox"] - if q < p: + def b_FractionBox( + self, box_expr1, token: Token, box_expr1_precendence: int + ) -> Optional[Node]: + operator_precedence = all_operators["FractionBox"] + if box_expr1_precendence > operator_precedence: return None - if box1 is None: - box1 = NullString + if box_expr1 is None: + box_expr1 = NullString self.consume() - box2 = self.parse_box(q + 1) - return Node("FractionBox", box1, box2) + box_expr2 = self.parse_box_expr(operator_precedence + 1) + return Node("FractionBox", box_expr1, box_expr2) - def b_FormBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["FormBox"] - if q < p: + def b_FormBox( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + operator_precedence = all_operators["FormBox"] + if box_expr1_precedence > operator_precedence: return None - if box1 is None: - box1 = Symbol("StandardForm") # RawForm - elif is_symbol_name(box1.value): - box1 = Symbol(box1.value, context=None) + if box_expr1 is None: + box_expr1 = Symbol("StandardForm") # RawForm + elif is_symbol_name(box_expr1.value): + box_expr1 = Symbol(box_expr1.value, context=None) else: - box1 = Node("Removed", String("$$Failure")) + box_expr1 = Node("Removed", String("$$Failure")) self.consume() - box2 = self.parse_box(q) - return Node("FormBox", box2, box1) + box2 = self.parse_box_expr(operator_precedence) + return Node("FormBox", box2, box_expr1) - def b_OverscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = all_operators["OverscriptBox"] - if q < p: + def b_OverscriptBox( + self, box_expr1, token: Token, box_expr1_precedence: int + ) -> Optional[Node]: + operator_precedence = all_operators["OverscriptBox"] + if box_expr1_precedence > operator_precedence: return None - if box1 is None: - box1 = NullString + if box_expr1 is None: + box_expr1 = NullString self.consume() - box2 = self.parse_box(q) + box_expr2 = self.parse_box_expr(operator_precedence) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(all_operators["UnderoverscriptBox"]) - return Node("UnderoverscriptBox", box1, box3, box2) + box_expr3 = self.parse_box_expr(all_operators["UnderoverscriptBox"]) + return Node("UnderoverscriptBox", box_expr1, box_expr3, box_expr2) else: - return Node("OverscriptBox", box1, box2) + return Node("OverscriptBox", box_expr1, box_expr2) # E methods # @@ -564,7 +653,7 @@ def e_ApplyList(self, expr1, token: Token, p: int) -> Optional[Node]: if operator_precedence < p: return None self.consume() - expr2 = self.parse_exp(operator_precedence) + expr2 = self.parse_expr(operator_precedence) expr3 = Node("List", Number1) return Node("Apply", expr1, expr2, expr3) @@ -573,7 +662,7 @@ def e_Alternatives(self, expr1, token: Token, p: int) -> Optional[Node]: if q < p: return None self.consume() - expr2 = self.parse_exp(q + 1) + expr2 = self.parse_expr(q + 1) return Node("Alternatives", expr1, expr2).flatten() def e_Derivative(self, expr1, token: Token, p: int) -> Optional[Node]: @@ -619,7 +708,7 @@ def e_Divide(self, expr1, token: Token, expr1_precedence: int): if expr1_precedence > operator_precedence: return None self.consume() - expr2 = self.parse_exp(operator_precedence + 1) + expr2 = self.parse_expr(operator_precedence + 1) return Node("Times", expr1, Node("Power", expr2, NumberM1)).flatten() def e_Infix(self, expr1, token: Token, expr1_precedence) -> Optional[Node]: @@ -643,9 +732,9 @@ def e_Infix(self, expr1, token: Token, expr1_precedence) -> Optional[Node]: if expr1_precedence > operator_precedence: return None self.consume() - expr2 = self.parse_exp(operator_precedence + 1) + expr2 = self.parse_expr(operator_precedence + 1) self.expect("Infix") - expr3 = self.parse_exp(operator_precedence + 1) + expr3 = self.parse_expr(operator_precedence + 1) return Node(expr2, expr1, expr3) def e_Prefix(self, expr1, token: Token, expr1_precedence: int) -> Optional[Node]: @@ -671,7 +760,7 @@ def e_Prefix(self, expr1, token: Token, expr1_precedence: int) -> Optional[Node] if expr1_precedence > operator_precedence: return None self.consume() - expr2 = self.parse_exp(operator_precedence) + expr2 = self.parse_expr(operator_precedence) return Node(expr1, expr2) def e_Function(self, expr1, token: Token, p: int) -> Optional[Node]: @@ -683,7 +772,7 @@ def e_Function(self, expr1, token: Token, p: int) -> Optional[Node]: if token.text == "&": return Node("Function", expr1) else: - expr2 = self.parse_exp(operator_precedence) + expr2 = self.parse_expr(operator_precedence) return Node("Function", expr1, expr2) def e_MessageName(self, expr1, token: Token, p: int) -> Node: @@ -708,7 +797,7 @@ def e_Minus(self, expr1, token: Token, p: int) -> Optional[Node]: if q < p: return None self.consume() - expr2 = self.parse_exp(q + 1) + expr2 = self.parse_expr(q + 1) if isinstance(expr2, Number) and not expr2.value.startswith("-"): expr2.value = "-" + expr2.value else: @@ -742,7 +831,7 @@ def e_Postfix(self, expr1, token: Token, expr1_precedence: int) -> Optional[Node self.consume() # Precedence[Postix] is lower than expr1; Postfix[] is left associative. - expr2 = self.parse_exp(operator_precedence + 1) + expr2 = self.parse_expr(operator_precedence + 1) return Node(expr2, expr1) def e_RawColon(self, expr1, token: Token, p: int) -> Optional[Node]: @@ -763,7 +852,7 @@ def e_RawColon(self, expr1, token: Token, p: int) -> Optional[Node]: if p == 151: return None self.consume() - expr2 = self.parse_exp(q + 1) + expr2 = self.parse_expr(q + 1) return Node(head, expr1, expr2) def e_RawLeftBracket(self, expr, token: Token, p: int) -> Optional[Node]: @@ -826,7 +915,7 @@ def e_Semicolon(self, expr1, token: Token, expr1_precedence: int) -> Optional[No # XXX look for next expr otherwise backtrack try: - expr2 = self.parse_exp(operator_precedence + 1) + expr2 = self.parse_expr(operator_precedence + 1) except TranslateError: self.backtrack(pos) self.feeder.messages = messages @@ -853,7 +942,7 @@ def e_Span(self, expr1, token: Token, p) -> Optional[Node]: else: messages = list(self.feeder.messages) try: - expr2 = self.parse_exp(q + 1) + expr2 = self.parse_expr(q + 1) except TranslateError: expr2 = Symbol("All") self.backtrack(token.pos) @@ -863,7 +952,7 @@ def e_Span(self, expr1, token: Token, p) -> Optional[Node]: self.consume() messages = list(self.feeder.messages) try: - expr3 = self.parse_exp(q + 1) + expr3 = self.parse_expr(q + 1) return Node("Span", expr1, expr2, expr3) except TranslateError: self.backtrack(token.pos) @@ -875,7 +964,7 @@ def e_TagSet(self, expr1, token: Token, p: int) -> Optional[Node]: if q < p: return None self.consume() - expr2 = self.parse_exp(q + 1) + expr2 = self.parse_expr(q + 1) # examine next token token = self.next_noend() tag = token.tag @@ -891,7 +980,7 @@ def e_TagSet(self, expr1, token: Token, p: int) -> Optional[Node]: self.consume() if head == "TagUnset": return Node(head, expr1, expr2) - expr3 = self.parse_exp(q + 1) + expr3 = self.parse_expr(q + 1) return Node(head, expr1, expr2, expr3) def e_Unset(self, expr1, token: Token, p: int) -> Optional[Node]: @@ -911,17 +1000,17 @@ def e_Unset(self, expr1, token: Token, p: int) -> Optional[Node]: def p_Decrement(self, token: Token) -> Node: self.consume() q = prefix_operators["PreDecrement"] - return Node("PreDecrement", self.parse_exp(q)) + return Node("PreDecrement", self.parse_expr(q)) def p_Increment(self, token: Token) -> Node: self.consume() q = prefix_operators["PreIncrement"] - return Node("PreIncrement", self.parse_exp(q)) + return Node("PreIncrement", self.parse_expr(q)) def p_Information(self, token: Token) -> Node: self.consume() q = prefix_operators["Information"] - child = self.parse_exp(q) + child = self.parse_expr(q) if child.__class__ is not Symbol: raise InvalidSyntaxError() return Node( @@ -931,15 +1020,15 @@ def p_Information(self, token: Token) -> Node: def p_Integral(self, token: Token) -> Node: self.consume() inner_prec, outer_prec = all_operators["Sum"] + 1, all_operators["Power"] - 1 - expr1 = self.parse_exp(inner_prec) + expr1 = self.parse_expr(inner_prec) self.expect("DifferentialD") - expr2 = self.parse_exp(outer_prec) + expr2 = self.parse_expr(outer_prec) return Node("Integrate", expr1, expr2) def p_Factorial2(self, token: Token) -> Node: self.consume() q = prefix_operators["Not"] - child = self.parse_exp(q) + child = self.parse_expr(q) return Node("Not", Node("Not", child)) def p_Filename(self, token: Token) -> Filename: @@ -951,10 +1040,9 @@ def p_LeftRowBox(self, token: Token) -> Union[Node, String]: self.consume() children = [] self.box_depth += 1 - self.bracket_depth += 1 token = self.next() while token.tag not in ("RightRowBox", "OtherscriptBox"): - newnode = self.parse_box(NEVER_ADD_PARENTHESIS) + newnode = self.parse_box_expr(NEVER_ADD_PARENTHESIS) children.append(newnode) token = self.next() result: Union[Node, String] @@ -966,7 +1054,6 @@ def p_LeftRowBox(self, token: Token) -> Union[Node, String]: result = Node("RowBox", Node("List", *children)) self.expect("RightRowBox") self.box_depth -= 1 - self.bracket_depth -= 1 result.parenthesised = True return result @@ -980,7 +1067,7 @@ def p_Minus(self, token: Token) -> Optional[Node]: """ self.consume() q = prefix_operators["Minus"] - expr = self.parse_exp(q) + expr = self.parse_expr(q) if isinstance(expr, Number) and not expr.value.startswith("-"): expr.value = "-" + expr.value return expr @@ -998,12 +1085,12 @@ def p_MinusPlus(self, token: Token) -> Node: """ self.consume() operator_precedence = operator_precedences["UnaryMinusPlus"] - return Node("MinusPlus", self.parse_exp(operator_precedence)) + return Node("MinusPlus", self.parse_expr(operator_precedence)) def p_Not(self, token: Token) -> Node: self.consume() operator_precedence = prefix_operators["Not"] - child = self.parse_exp(operator_precedence) + child = self.parse_expr(operator_precedence) return Node("Not", child) # p_Factorial sometimes gets called when p_Not would be more @@ -1102,7 +1189,7 @@ def p_Pattern(self, token: Token) -> Node: def p_PatternTest(self, token: Token) -> Node: self.consume() q = prefix_operators["Definition"] - child = self.parse_exp(q) + child = self.parse_expr(q) return Node( "Information", child, Node("Rule", Symbol("LongForm"), Symbol("False")) ) @@ -1119,7 +1206,7 @@ def p_Plus(self, token: Token): self.consume() operator_precedence = prefix_operators["UnaryPlus"] # note flattening here even flattens e.g. + a + b - return Node("Plus", self.parse_exp(operator_precedence)).flatten() + return Node("Plus", self.parse_expr(operator_precedence)).flatten() def p_PlusMinus(self, token: Token) -> Node: """ @@ -1132,7 +1219,7 @@ def p_PlusMinus(self, token: Token) -> Node: """ self.consume() operator_precedence = operator_precedences["UnaryPlusMinus"] - return Node("PlusMinus", self.parse_exp(operator_precedence)) + return Node("PlusMinus", self.parse_expr(operator_precedence)) def p_RawLeftAssociation(self, token: Token) -> Node: self.consume() @@ -1153,7 +1240,7 @@ def p_RawLeftBrace(self, token: Token) -> Node: def p_RawLeftParenthesis(self, token: Token) -> Node: self.consume() self.bracket_depth += 1 - result = self.parse_exp(NEVER_ADD_PARENTHESIS) + result = self.parse_expr(NEVER_ADD_PARENTHESIS) self.expect("RawRightParenthesis") self.bracket_depth -= 1 assert result is not None diff --git a/mathics/data/.gitignore b/mathics/data/.gitignore index 1767b20fe..ba1ae5260 100644 --- a/mathics/data/.gitignore +++ b/mathics/data/.gitignore @@ -1,3 +1,4 @@ +/.python-version /doc_latex_data.pcl /doctest_latex_data.pcl /op-tables.json diff --git a/test/core/parser/test_box_parser.py b/test/core/parser/test_box_parser.py new file mode 100644 index 000000000..b113fc5b5 --- /dev/null +++ b/test/core/parser/test_box_parser.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for Box-expression parsing of mathics.core.parser.parser +""" + +import time +from typing import Optional + +from mathics_scanner import SingleLineFeeder + +from mathics.core.parser.parser import Parser + +# Set up a Parser that we can use to parse expressions. +# Note we don't use or pull in sessions here since we +# want are testing just the parse layer, not the evaluation layer. +parser = Parser() + + +def check_evaluation(str_expr: str, str_expected: str, assert_message: Optional[str]): + def parse(s: str): + return parser.parse(SingleLineFeeder(s)) + + result = parse(str_expr) + expected = parse(str_expected) + + print(time.asctime()) + if assert_message: + print((result, expected)) + assert result == expected, assert_message + else: + print((result, expected)) + assert result == expected + + +def test_box_parsing(): + for str_expr, str_expected, assert_message in ( + ( + r"\( 1 \)", + '"1"', + "Box parsing a non-box expression should strip boxing and convert to String", + ), + ( + r"\( 2 \^ n \)", + 'SuperscriptBox["2", "n"]', + "Box parsing a Superscript box operator should find box function name", + ), + ( + r"\( x \_ i \)", + 'SubscriptBox["x", "i"]', + "Box parsing a Subscript operator should find box function name", + ), + ( + r"\( x \_ i \^ n \)", + 'SuperscriptBox[SubscriptBox["x", "i"], "n"]', + "Box parsing multiple box operators should work", + ), + ( + r"\( x \_ \( i \^ n \) \)", + 'SubscriptBox["x", SuperscriptBox["i", "n"]]', + "Box parsing multiple box operators with box parenthesis should work", + ), + ( + r"\( x \^ \( i \/ 2 + 5 \) \)", + 'SuperscriptBox["x", RowBox[{FractionBox["i", "2"], "+", "5"}]]', + "Box parsing using FractionBox and parenthesis should work", + ), + ): + check_evaluation(str_expr, str_expected, assert_message) diff --git a/test/core/parser/test_parser.py b/test/core/parser/test_parser.py index 0e84695b3..398e2217c 100644 --- a/test/core/parser/test_parser.py +++ b/test/core/parser/test_parser.py @@ -21,7 +21,7 @@ class ParserTests(unittest.TestCase): def setUp(self): self.parser = Parser() - def parse(self, s): + def parse(self, s: str): return self.parser.parse(SingleLineFeeder(s)) def check(self, expr1, expr2):