From bd7e5c441db498b1c1b576443d15574d0ff1f83d Mon Sep 17 00:00:00 2001 From: "Jon J. C" Date: Wed, 18 Oct 2023 13:16:43 +0200 Subject: [PATCH 1/4] Adapt PEG syntax to the one defined by Bryan Ford on the original paper. Breaks arpeggio tests. --- src/peg/adql2.1.peg | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/peg/adql2.1.peg b/src/peg/adql2.1.peg index 171d18a..9172e2c 100644 --- a/src/peg/adql2.1.peg +++ b/src/peg/adql2.1.peg @@ -1,9 +1,4 @@ # Note: in the actual PEG definition comments start with # -# =========================== Configurables for deployers - -# additional prefixes to be added here -udf_prefix <- - 'ivo_' # ============================ The Gramma's root symbol @@ -245,13 +240,13 @@ identifier <- (regular_identifier / delimited_identifier) delimited_identifier <- - '"' ('""' / '[^"]')+ '"' + '"' ('""' / !["])+ '"' regular_identifier <- (!(keyword) letter (letter / digit / '_')*) character_string_literal <- - ("'" ("''" / r"[^']")* "'" (Space+ comment _)*)+ + ("'" ("''" / !['])* "'" (Space+ comment _)*)+ fold_function <- ('UPPER' / 'LOWER') _ @@ -280,7 +275,7 @@ geometry_function <- / extract_coord_sys bitwise_op <- - '&' / '|' / '^' + [&|^] bitwise_expression <- '~' numeric_value_expression @@ -456,13 +451,13 @@ point <- _ ',' _ coordinates _ ')' numeric_value_expression <- - term (_ ('+' / '-') _ numeric_value_expression)* + term (_ [-+] _ numeric_value_expression)* term <- - factor (_ ('*' / '/') _ term)* + factor (_ [*/] _ term)* factor <- - ('+' / '-')? numeric_primary + [-+]? numeric_primary numeric_value_function <- math_function @@ -531,7 +526,7 @@ numeric_expression_operand <- numeric_value_expression numeric_expression_rest <- - ('+' / '-' / '*' / '/') _ numeric_expression_operand + [-+*/] _ numeric_expression_operand approximate_numeric_literal <- exact_numeric_literal 'E' @@ -541,7 +536,7 @@ exact_numeric_literal <- (unsigned_integer '.')* unsigned_integer signed_integer <- - ('+' / '-')? unsigned_integer + [-+]? unsigned_integer # TODO: We should take out character_string_literal here, MD thinks -- # what sort of use case did people have in mind here? @@ -566,13 +561,13 @@ unsigned_hexadecimal <- '0x' hex_digit+ digit <- - '[0-9]' + [0-9] hex_digit <- - '[0-9A-F]' + [0-9A-F] letter <- - '[a-zA-Z]' + [a-zA-Z] # Reserved words @@ -684,7 +679,7 @@ ANY_CHAR <- letter / digit / ' ' / '\t' / ',' / '' / '.' comment <- - '--' '[^\n\r]*' + '--' (![\n\r])* _ <- (comment / Space / EOL)* @@ -693,10 +688,19 @@ __ <- (comment / Space / EOL)+ _a <- - !'[A-Z0-9_]' + ![A-Z0-9_] Space <- ' '+ / '\t' EOL <- '\r\n' / '\n' / '\r' + +EOF <- + !. + +# =========================== Configurables for deployers +# additional prefixes to be added here +udf_prefix <- + 'ivo_' + From 7524aa9f0f48e6edf02e2d7d980cf769e229ec45 Mon Sep 17 00:00:00 2001 From: "Jon J. C" Date: Wed, 18 Oct 2023 13:20:00 +0200 Subject: [PATCH 2/4] Adapt classical PEG syntax to arpeggio equivalent. Some tests fail --- src/peg/testpeg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/peg/testpeg.py b/src/peg/testpeg.py index 685e8fc..1725e10 100644 --- a/src/peg/testpeg.py +++ b/src/peg/testpeg.py @@ -48,7 +48,10 @@ def get_parser(debug=False, root='query_specification'): peg_rules = re.sub('#', '// ', peg_rules) # adapt character range syntax - peg_rules = re.sub("'\\[", "r'[", peg_rules) + peg_rules = re.sub("\\[", "r'[", peg_rules) + peg_rules = re.sub("\\!r'\\[", "r'[^", peg_rules) + peg_rules = re.sub("\\]", "]'", peg_rules) + peg_rules = re.sub("EOF <-[^;]*;", "", peg_rules) return ParserPEG(peg_rules, root, From 486116dbf8809f3ee6cd0c2977bf7d63839fa4b7 Mon Sep 17 00:00:00 2001 From: "Jon J. C" Date: Wed, 18 Oct 2023 15:26:57 +0200 Subject: [PATCH 3/4] Remove duplicated rule numeric_primary --- src/peg/adql2.1.peg | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/peg/adql2.1.peg b/src/peg/adql2.1.peg index 9172e2c..7203c62 100644 --- a/src/peg/adql2.1.peg +++ b/src/peg/adql2.1.peg @@ -477,10 +477,6 @@ user_defined_function <- (_ value_expression (_ ',' _ value_expression)* _)? ')' -numeric_primary <- - value_expression_primary - / numeric_value_function - # We need to seriously re-write value_expression because PEG # doesn't have an actual longest-match operator. Thus, we # cannot decide on the type of the first operand. From 216a3d8cd03025056becae73a1d32258a70d9a2d Mon Sep 17 00:00:00 2001 From: "Jon J. C" Date: Wed, 18 Oct 2023 16:59:31 +0200 Subject: [PATCH 4/4] Fix typo --- src/peg/adql2.1.peg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/peg/adql2.1.peg b/src/peg/adql2.1.peg index 7203c62..d4cf6b7 100644 --- a/src/peg/adql2.1.peg +++ b/src/peg/adql2.1.peg @@ -1,6 +1,6 @@ # Note: in the actual PEG definition comments start with # -# ============================ The Gramma's root symbol +# ============================ The Grammar's root symbol query_specification <- with_clause? _