diff --git a/languages/rlang/pom.xml b/languages/rlang/pom.xml index a9b550503b..b6fb07329a 100644 --- a/languages/rlang/pom.xml +++ b/languages/rlang/pom.xml @@ -14,6 +14,11 @@ org.antlr antlr4-runtime + + de.jplag + language-antlr-utils + ${revision} + diff --git a/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/R.g4 b/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/R.g4 index 73bd2389c7..0917ceaeab 100644 --- a/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/R.g4 +++ b/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/R.g4 @@ -2,6 +2,7 @@ [The "BSD licence"] Copyright (c) 2013 Terence Parr All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -12,6 +13,7 @@ documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. @@ -27,28 +29,27 @@ /** derived from http://svn.r-project.org/R/trunk/src/main/gram.y http://cran.r-project.org/doc/manuals/R-lang.html#Parser + I'm no R genius but this seems to work. + Requires RFilter.g4 to strip away NL that are really whitespace, not end-of-command. See TestR.java + Usage: + $ antlr4 R.g4 RFilter.g4 $ javac *.java $ java TestR sample.R ... prints parse tree ... */ -/* -Modified version of the original in https://github.com/antlr/grammars-v4/blob/master/r/R.g4 so that I can separate the most relevant tokens of R in -the JplagRListenter.java file. -Author of the modification: Antonio Javier Rodriguez Perez -*/ +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging grammar R; -prog: ( expr (';'|NL) - | NL - )* - EOF +prog + : ((SEMICOLON | NL)+ | expr )* EOF ; /* @@ -58,159 +59,208 @@ expr_or_assign ; */ -expr: expr index_statement // '[[' follows R's yacc grammar - | expr access_package expr - | expr ('$'|'@') expr - | expr '^' expr - | ('-'|'+') expr - | expr ':' expr - | expr USER_OP expr // anything wrappedin %: '%' .* '%' - | expr ('*'|'/') expr - | expr ('+'|'-') expr - | expr ('>'|'>='|'<'|'<='|'=='|'!=') expr - | '!' expr - | expr ('&'|'&&') expr - | expr ('|'|'||') expr - | '~' expr - | expr '~' expr - | expr assign_value expr - | function_definition // define function - | expr function_call // call function - | compound_statement - | if_statement - | for_statement - | while_statement - | repeat_statement - | help - | next_statement - | break_statement - | '(' expr ')' - | ID - | constant +expr + : expr LIST_ACCESS_START sublist LIST_ACCESS_END #ListAccess // '[[' follows R's yacc grammar + | expr ARRAY_ACCESS_START sublist ARRAY_ACCESS_END #ArrayAccess + | expr NAMESPACE_ACCESS expr #NamespaceAccess + | expr COMPONENT_ACCESS expr #ComponentAccess + | expr '^' expr #Exponent + | ADD_SUB expr #Sign + | expr RANGE_OPERATOR expr #Range + | expr USER_OP expr #UserDefinedOperation // anything wrappedin %: '%' .* '%' + | expr MULT_DIV expr #MultOrDiv + | expr ADD_SUB expr #AddOrSub + | expr COMPARATOR expr #Comparison + | NOT expr #Not + | expr AND expr #And + | expr OR expr #Or + | '~' expr #ModelFormulaePrefix + | expr '~' expr #ModelFormulaeInfix + | expr (ASSIGN | EQUALS) expr #Assignment + | FUNCTION PAREN_L formlist? PAREN_R expr #FunctionDefinition // define function + | expr PAREN_L sublist PAREN_R #FunctionCall // call function + | CURLY_L exprlist CURLY_R #CompoundStatement // compound statement + | IF PAREN_L expr PAREN_R expr #If + | IF PAREN_L expr PAREN_R expr NL* ELSE expr #IfElse + | FOR PAREN_L ID IN expr PAREN_R expr #For + | WHILE PAREN_L expr PAREN_R expr #While + | REPEAT expr #Repeat + | HELP expr #Help // get help on expr, usually string or ID + | NEXT #Next + | BREAK #Break + | PAREN_L expr PAREN_R #BracketTerm + | ID #Id + | STRING #String + | HEX #Hex + | INT #Int + | FLOAT #Float + | COMPLEX #Complex + | NULL #Null + | NA #Na + | INF #Inf + | NAN #Nan + | TRUE #True + | FALSE #False + | NL+ expr #Newline ; -index_statement : '[[' sublist ']' ']' | '[' sublist ']' ; - -access_package: '::'|':::' ; - -function_definition: 'function' '(' formlist? ')' expr ; - -function_call : '(' sublist ')' ; - -constant: constant_number | constant_string | constant_bool | 'NULL' | 'NA' | 'Inf' | 'NaN' ; - -constant_number: HEX | INT | FLOAT | COMPLEX ; - -constant_string: STRING ; - -constant_bool: 'TRUE' | 'FALSE' ; - -help: '?' expr ; // get help on expr, usually string or ID - -if_statement : 'if' '(' expr ')' expr | 'if' '(' expr ')' expr 'else' expr ; - -for_statement : 'for' '(' ID 'in' expr ')' expr ; - -while_statement : 'while' '(' expr ')' expr ; - -repeat_statement: 'repeat' expr ; - -next_statement: 'next' ; - -break_statement: 'break' ; - -compound_statement: '{' exprlist '}' ; - exprlist - : expr ((';'|NL) expr?)* - | + : expr ((SEMICOLON | NL) expr?)* ; -formlist : form (',' form)* ; +formlist + : form (',' form)* + ; -form: ID - | assign_func_declaration +form + : ID + | ID EQUALS expr + | '...' + | '.' ; -sublist : sub (',' sub)* ; +sublist + : sub (',' sub)* + ; -sub : expr - | assign_value_list +sub + : expr + | ID EQUALS + | ID EQUALS expr + | STRING EQUALS + | STRING EQUALS expr + | NULL EQUALS + | NULL EQUALS expr + | '...' + | '.' | ; -assign_value: '<-'|'<<-'|'='|'->'|'->>'|':='; - -assign_func_declaration: ID '=' expr | '...' ; - -assign_value_list: ID '=' | ID '=' expr | constant_string '=' | constant_string '=' expr | 'NULL' '=' | 'NULL' '=' expr | '...' ; - - - -HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ; +IF: 'if'; +FOR: 'for'; +WHILE: 'while'; +REPEAT: 'repeat'; +FUNCTION: 'function'; +ELSE: 'else'; +IN: 'in'; + +LIST_ACCESS_START: '[['; +LIST_ACCESS_END: ']]'; +ARRAY_ACCESS_START: '['; +ARRAY_ACCESS_END: ']'; +NAMESPACE_ACCESS: ':::' | '::'; +COMPONENT_ACCESS: '$' | '@'; + +HELP: '?'; +NEXT: 'next'; +BREAK: 'break'; + +NULL: 'NULL'; +NA: 'NA'; +INF: 'inf'; +NAN: 'NaN'; +TRUE: 'TRUE'; +FALSE: 'FALSE'; + +NOT: '!'; +RANGE_OPERATOR: ':'; + +MULT_DIV: '*' | '/'; +ADD_SUB: '+' | '-'; +COMPARATOR: '>' | '>=' | '<' | '<=' | '==' | '!='; +ASSIGN: '<-' | '<<-' | '->' | '->>' | ':='; +EQUALS: '='; +AND: '&&' | '&'; +OR: '||' | '|'; + +PAREN_L: '('; +PAREN_R: ')'; +CURLY_L: '{'; +CURLY_R: '}'; + +HEX + : '0' ('x' | 'X') HEXDIGIT+ [Ll]? + ; -INT : DIGIT+ [Ll]? ; +INT + : DIGIT+ [Ll]? + ; -fragment -HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ; +fragment HEXDIGIT + : ('0' ..'9' | 'a' ..'f' | 'A' ..'F') + ; -FLOAT: DIGIT+ '.' DIGIT* EXP? [Ll]? - | DIGIT+ EXP? [Ll]? - | '.' DIGIT+ EXP? [Ll]? +FLOAT + : DIGIT+ '.' DIGIT* EXP? [Ll]? + | DIGIT+ EXP? [Ll]? + | '.' DIGIT+ EXP? [Ll]? ; -fragment -DIGIT: '0'..'9' ; +fragment DIGIT + : '0' ..'9' + ; -fragment -EXP : ('E' | 'e') ('+' | '-')? INT ; +fragment EXP + : ('E' | 'e') ('+' | '-')? INT + ; COMPLEX - : INT 'i' - | FLOAT 'i' + : INT 'i' + | FLOAT 'i' ; STRING - : '"' ( ESC | ~[\\"] )*? '"' - | '\'' ( ESC | ~[\\'] )*? '\'' - | '`' ( ESC | ~[\\'] )*? '`' + : '"' (ESC | ~[\\"])*? '"' + | '\'' ( ESC | ~[\\'])*? '\'' + | '`' ( ESC | ~[\\'])*? '`' ; -fragment -ESC : '\\' [abtnfrv"'\\] - | UNICODE_ESCAPE - | HEX_ESCAPE - | OCTAL_ESCAPE + +fragment ESC + : '\\' [abtnfrv"'\\] + | UNICODE_ESCAPE + | HEX_ESCAPE + | OCTAL_ESCAPE ; -fragment -UNICODE_ESCAPE - : '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT - | '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}' +fragment UNICODE_ESCAPE + : '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + | '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}' ; -fragment -OCTAL_ESCAPE - : '\\' [0-3] [0-7] [0-7] - | '\\' [0-7] [0-7] - | '\\' [0-7] +fragment OCTAL_ESCAPE + : '\\' [0-3] [0-7] [0-7] + | '\\' [0-7] [0-7] + | '\\' [0-7] ; -fragment -HEX_ESCAPE - : '\\' HEXDIGIT HEXDIGIT? +fragment HEX_ESCAPE + : '\\' HEXDIGIT HEXDIGIT? ; -ID : '.' (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')* - | LETTER (LETTER|DIGIT|'_'|'.')* +ID + : '.' (LETTER | '_' | '.') (LETTER | DIGIT | '_' | '.')* + | LETTER (LETTER | DIGIT | '_' | '.')* ; - -fragment LETTER : [a-zA-Z] ; -USER_OP : '%' .*? '%' ; +fragment LETTER + : [a-zA-Z] + ; -COMMENT : '#' .*? '\r'? '\n' -> type(NL) ; +USER_OP + : '%' .*? '%' + ; + +COMMENT + : '#' .*? '\r'? '\n' -> type(NL) + ; // Match both UNIX and Windows newlines -NL : '\r'? '\n' ; +NL + : '\r'? '\n' + ; + +SEMICOLON: ';'; -WS : [ \t\u000C]+ -> skip ; \ No newline at end of file +WS + : [ \t\u000C]+ -> skip + ; \ No newline at end of file diff --git a/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/RFilter.g4 b/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/RFilter.g4 index d66b85aa2f..44b0a174fc 100644 --- a/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/RFilter.g4 +++ b/languages/rlang/src/main/antlr4/de/jplag/rlang/grammar/RFilter.g4 @@ -2,6 +2,7 @@ [The "BSD licence"] Copyright (c) 2013 Terence Parr All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -12,6 +13,7 @@ documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. @@ -28,41 +30,54 @@ We strip NL inside expressions. */ +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + parser grammar RFilter; -options { tokenVocab=R; } +options { + tokenVocab = R; +} @members { protected int curlies = 0; } // TODO: MAKE THIS GET ONE COMMAND ONLY -stream : (element|NL|';')* EOF ; - -eat : (NL {((WritableToken)$NL).setChannel(Token.HIDDEN_CHANNEL);})+ ; - -element: op eat? - | atom - | '{' eat? {curlies++;} (element|NL|';')* {curlies--;} '}' - | '(' (element|eat)* ')' - | '[' (element|eat)* ']' - | '[[' (element|eat)* ']' ']' - | 'function' eat? '(' (element|eat)* ')' eat? - | 'for' eat? '(' (element|eat)* ')' eat? - | 'while' eat? '(' (element|eat)* ')' eat? - | 'if' eat? '(' (element|eat)* ')' eat? - | 'else' - { +stream + : (elem | NL | SEMICOLON)* EOF + ; + +eat + : (NL {((WritableToken)$NL).setChannel(Token.HIDDEN_CHANNEL);})+ + ; + +elem + : op eat? + | atom + | CURLY_L eat? {curlies++;} (elem | NL | SEMICOLON)* {curlies--;} CURLY_R + | PAREN_L (elem | eat)* PAREN_R + | ARRAY_ACCESS_START (elem | eat)* ARRAY_ACCESS_END + | LIST_ACCESS_START (elem | eat)* LIST_ACCESS_END + | FUNCTION eat? PAREN_L (elem | eat)* PAREN_R eat? + | FOR eat? PAREN_L (elem | eat)* PAREN_R eat? + | WHILE eat? PAREN_L (elem | eat)* PAREN_R eat? + | IF eat? PAREN_L (elem | eat)* PAREN_R eat? + | ELSE { // ``inside a compound expression, a newline before else is discarded, // whereas at the outermost level, the newline terminates the if // construction and a subsequent else causes a syntax error.'' /* Works here if (1==0) { print(1) } else { print(2) } + and correctly gets error here: + if (1==0) { print(1) } else { print(2) } + this works too: + if (1==0) { if (2==0) print(1) else print(2) @@ -73,11 +88,41 @@ element: op eat? } ; -atom: 'next' | 'break' | ID | STRING | HEX | INT | FLOAT | COMPLEX | 'NULL' - | 'NA' | 'Inf' | 'NaN' | 'TRUE' | 'FALSE' +atom + : NEXT + | BREAK + | ID + | STRING + | HEX + | INT + | FLOAT + | COMPLEX + | NULL + | NA + | INF + | NAN + | TRUE + | FALSE ; -op : '+'|'-'|'*'|'/'|'^'|'<'|'<='|'>='|'>'|'=='|'!='|'&'|'&&'|USER_OP| - 'repeat'|'in'|'?'|'!'|'='|':'|'~'|'$'|'@'|'<-'|'->'|'='|'::'|':::'| - ','|'...'|'||'| '|' +op + : ADD_SUB + | MULT_DIV + | '^' + | COMPARATOR + | AND + | USER_OP + | REPEAT + | IN + | HELP + | NOT + | EQUALS + | RANGE_OPERATOR + | '~' + | COMPONENT_ACCESS + | ASSIGN + | NAMESPACE_ACCESS + | ',' + | '...' + | OR ; \ No newline at end of file diff --git a/languages/rlang/src/main/java/de/jplag/rlang/JPlagRListener.java b/languages/rlang/src/main/java/de/jplag/rlang/JPlagRListener.java deleted file mode 100644 index 383a67aea0..0000000000 --- a/languages/rlang/src/main/java/de/jplag/rlang/JPlagRListener.java +++ /dev/null @@ -1,183 +0,0 @@ -package de.jplag.rlang; - -import static de.jplag.rlang.RTokenType.ASSIGN; -import static de.jplag.rlang.RTokenType.ASSIGN_FUNC; -import static de.jplag.rlang.RTokenType.ASSIGN_LIST; -import static de.jplag.rlang.RTokenType.BEGIN_FUNCTION; -import static de.jplag.rlang.RTokenType.BOOL; -import static de.jplag.rlang.RTokenType.BREAK; -import static de.jplag.rlang.RTokenType.COMPOUND_BEGIN; -import static de.jplag.rlang.RTokenType.COMPOUND_END; -import static de.jplag.rlang.RTokenType.END_FUNCTION; -import static de.jplag.rlang.RTokenType.FOR_BEGIN; -import static de.jplag.rlang.RTokenType.FOR_END; -import static de.jplag.rlang.RTokenType.FUNCTION_CALL; -import static de.jplag.rlang.RTokenType.HELP; -import static de.jplag.rlang.RTokenType.IF_BEGIN; -import static de.jplag.rlang.RTokenType.IF_END; -import static de.jplag.rlang.RTokenType.INDEX; -import static de.jplag.rlang.RTokenType.NEXT; -import static de.jplag.rlang.RTokenType.NUMBER; -import static de.jplag.rlang.RTokenType.PACKAGE; -import static de.jplag.rlang.RTokenType.REPEAT_BEGIN; -import static de.jplag.rlang.RTokenType.REPEAT_END; -import static de.jplag.rlang.RTokenType.STRING; -import static de.jplag.rlang.RTokenType.WHILE_BEGIN; -import static de.jplag.rlang.RTokenType.WHILE_END; - -import org.antlr.v4.runtime.Token; - -import de.jplag.rlang.grammar.RParser; - -/** - * Listener class for visiting the R ANTLR parse tree. Transforms the visited ANTLR token into JPlag tokens. Based on an - * R module for JPlag v2.15 by Olmo Kramer, see their - * JPlag fork. - * @author Robin Maisch - */ -public class JPlagRListener extends RCombinedBaseListener { - - private final RParserAdapter parserAdapter; - - /** - * Creates the listener. - * @param parserAdapter the JPlag parser adapter which receives the transformed tokens. - */ - public JPlagRListener(RParserAdapter parserAdapter) { - this.parserAdapter = parserAdapter; - } - - /** - * Transforms an ANTLR Token into a JPlag token and transfers it to the token adapter. - * @param targetType the type of the JPlag token to be created. - * @param token the ANTLR token. - */ - private void transformToken(RTokenType targetType, Token token) { - parserAdapter.addToken(targetType, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length()); - } - - private void transformToken(RTokenType targetType, Token start, Token end) { - parserAdapter.addToken(targetType, start.getLine(), start.getCharPositionInLine() + 1, end.getStopIndex() - start.getStartIndex() + 1); - } - - @Override - public void enterIndex_statement(RParser.Index_statementContext context) { - transformToken(INDEX, context.getStart(), context.getStop()); - } - - @Override - public void enterAccess_package(RParser.Access_packageContext context) { - transformToken(PACKAGE, context.getStart()); - } - - @Override - public void enterFunction_definition(RParser.Function_definitionContext context) { - transformToken(BEGIN_FUNCTION, context.getStart()); - } - - @Override - public void exitFunction_definition(RParser.Function_definitionContext context) { - transformToken(END_FUNCTION, context.getStop()); - } - - @Override - public void enterFunction_call(RParser.Function_callContext context) { - transformToken(FUNCTION_CALL, context.getStart(), context.getStop()); - } - - @Override - public void enterConstant_number(RParser.Constant_numberContext context) { - transformToken(NUMBER, context.getStart()); - } - - @Override - public void enterConstant_string(RParser.Constant_stringContext context) { - transformToken(STRING, context.getStart()); - } - - @Override - public void enterConstant_bool(RParser.Constant_boolContext context) { - transformToken(BOOL, context.getStart()); - } - - @Override - public void enterHelp(RParser.HelpContext context) { - transformToken(HELP, context.getStart()); - } - - @Override - public void enterIf_statement(RParser.If_statementContext context) { - transformToken(IF_BEGIN, context.getStart()); - } - - @Override - public void exitIf_statement(RParser.If_statementContext context) { - transformToken(IF_END, context.getStop()); - } - - @Override - public void enterFor_statement(RParser.For_statementContext context) { - transformToken(FOR_BEGIN, context.getStart()); - } - - @Override - public void exitFor_statement(RParser.For_statementContext context) { - transformToken(FOR_END, context.getStop()); - } - - @Override - public void enterWhile_statement(RParser.While_statementContext context) { - transformToken(WHILE_BEGIN, context.getStart()); - } - - @Override - public void exitWhile_statement(RParser.While_statementContext context) { - transformToken(WHILE_END, context.getStop()); - } - - @Override - public void enterRepeat_statement(RParser.Repeat_statementContext context) { - transformToken(REPEAT_BEGIN, context.getStart()); - } - - @Override - public void exitRepeat_statement(RParser.Repeat_statementContext context) { - transformToken(REPEAT_END, context.getStop()); - } - - @Override - public void enterNext_statement(RParser.Next_statementContext context) { - transformToken(NEXT, context.getStart()); - } - - @Override - public void enterBreak_statement(RParser.Break_statementContext context) { - transformToken(BREAK, context.getStart()); - } - - @Override - public void enterCompound_statement(RParser.Compound_statementContext context) { - transformToken(COMPOUND_BEGIN, context.getStart()); - } - - @Override - public void exitCompound_statement(RParser.Compound_statementContext context) { - transformToken(COMPOUND_END, context.getStop()); - } - - @Override - public void enterAssign_value(RParser.Assign_valueContext context) { - transformToken(ASSIGN, context.getStart()); - } - - @Override - public void enterAssign_func_declaration(RParser.Assign_func_declarationContext context) { - transformToken(ASSIGN_FUNC, context.getStart()); - } - - @Override - public void enterAssign_value_list(RParser.Assign_value_listContext context) { - transformToken(ASSIGN_LIST, context.getStart()); - } - -} \ No newline at end of file diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RCombinedBaseListener.java b/languages/rlang/src/main/java/de/jplag/rlang/RCombinedBaseListener.java deleted file mode 100644 index de9dc1d8e3..0000000000 --- a/languages/rlang/src/main/java/de/jplag/rlang/RCombinedBaseListener.java +++ /dev/null @@ -1,345 +0,0 @@ -package de.jplag.rlang; - -import org.antlr.v4.runtime.ParserRuleContext; -import org.antlr.v4.runtime.tree.ErrorNode; -import org.antlr.v4.runtime.tree.TerminalNode; - -import de.jplag.rlang.grammar.RFilter; -import de.jplag.rlang.grammar.RFilterListener; -import de.jplag.rlang.grammar.RListener; -import de.jplag.rlang.grammar.RParser; - -/** - * Empty base implementation for {@link RListener} and {@link RFilterListener}. - */ -public abstract class RCombinedBaseListener implements RListener, RFilterListener { - @Override - public void enterStream(RFilter.StreamContext context) { - - } - - @Override - public void exitStream(RFilter.StreamContext context) { - - } - - @Override - public void enterEat(RFilter.EatContext context) { - - } - - @Override - public void exitEat(RFilter.EatContext context) { - - } - - @Override - public void enterElement(RFilter.ElementContext context) { - - } - - @Override - public void exitElement(RFilter.ElementContext context) { - - } - - @Override - public void enterAtom(RFilter.AtomContext context) { - - } - - @Override - public void exitAtom(RFilter.AtomContext context) { - - } - - @Override - public void enterOp(RFilter.OpContext context) { - - } - - @Override - public void exitOp(RFilter.OpContext context) { - - } - - @Override - public void enterProg(RParser.ProgContext context) { - - } - - @Override - public void exitProg(RParser.ProgContext context) { - - } - - @Override - public void enterExpr(RParser.ExprContext context) { - - } - - @Override - public void exitExpr(RParser.ExprContext context) { - - } - - @Override - public void enterIndex_statement(RParser.Index_statementContext context) { - - } - - @Override - public void exitIndex_statement(RParser.Index_statementContext context) { - - } - - @Override - public void enterAccess_package(RParser.Access_packageContext context) { - - } - - @Override - public void exitAccess_package(RParser.Access_packageContext context) { - - } - - @Override - public void enterFunction_definition(RParser.Function_definitionContext context) { - - } - - @Override - public void exitFunction_definition(RParser.Function_definitionContext context) { - - } - - @Override - public void enterFunction_call(RParser.Function_callContext context) { - - } - - @Override - public void exitFunction_call(RParser.Function_callContext context) { - - } - - @Override - public void enterConstant(RParser.ConstantContext context) { - - } - - @Override - public void exitConstant(RParser.ConstantContext context) { - - } - - @Override - public void enterConstant_number(RParser.Constant_numberContext context) { - - } - - @Override - public void exitConstant_number(RParser.Constant_numberContext context) { - - } - - @Override - public void enterConstant_string(RParser.Constant_stringContext context) { - - } - - @Override - public void exitConstant_string(RParser.Constant_stringContext context) { - - } - - @Override - public void enterConstant_bool(RParser.Constant_boolContext context) { - - } - - @Override - public void exitConstant_bool(RParser.Constant_boolContext context) { - - } - - @Override - public void enterHelp(RParser.HelpContext context) { - - } - - @Override - public void exitHelp(RParser.HelpContext context) { - - } - - @Override - public void enterIf_statement(RParser.If_statementContext context) { - - } - - @Override - public void exitIf_statement(RParser.If_statementContext context) { - - } - - @Override - public void enterFor_statement(RParser.For_statementContext context) { - - } - - @Override - public void exitFor_statement(RParser.For_statementContext context) { - - } - - @Override - public void enterWhile_statement(RParser.While_statementContext context) { - - } - - @Override - public void exitWhile_statement(RParser.While_statementContext context) { - - } - - @Override - public void enterRepeat_statement(RParser.Repeat_statementContext context) { - - } - - @Override - public void exitRepeat_statement(RParser.Repeat_statementContext context) { - - } - - @Override - public void enterNext_statement(RParser.Next_statementContext context) { - - } - - @Override - public void exitNext_statement(RParser.Next_statementContext context) { - - } - - @Override - public void enterBreak_statement(RParser.Break_statementContext context) { - - } - - @Override - public void exitBreak_statement(RParser.Break_statementContext context) { - - } - - @Override - public void enterCompound_statement(RParser.Compound_statementContext context) { - - } - - @Override - public void exitCompound_statement(RParser.Compound_statementContext context) { - - } - - @Override - public void enterExprlist(RParser.ExprlistContext context) { - - } - - @Override - public void exitExprlist(RParser.ExprlistContext context) { - - } - - @Override - public void enterFormlist(RParser.FormlistContext context) { - - } - - @Override - public void exitFormlist(RParser.FormlistContext context) { - - } - - @Override - public void enterForm(RParser.FormContext context) { - - } - - @Override - public void exitForm(RParser.FormContext context) { - - } - - @Override - public void enterSublist(RParser.SublistContext context) { - - } - - @Override - public void exitSublist(RParser.SublistContext context) { - - } - - @Override - public void enterSub(RParser.SubContext context) { - - } - - @Override - public void exitSub(RParser.SubContext context) { - - } - - @Override - public void enterAssign_value(RParser.Assign_valueContext context) { - - } - - @Override - public void exitAssign_value(RParser.Assign_valueContext context) { - - } - - @Override - public void enterAssign_func_declaration(RParser.Assign_func_declarationContext context) { - - } - - @Override - public void exitAssign_func_declaration(RParser.Assign_func_declarationContext context) { - - } - - @Override - public void enterAssign_value_list(RParser.Assign_value_listContext context) { - - } - - @Override - public void exitAssign_value_list(RParser.Assign_value_listContext context) { - - } - - @Override - public void visitTerminal(TerminalNode node) { - - } - - @Override - public void visitErrorNode(ErrorNode node) { - - } - - @Override - public void enterEveryRule(ParserRuleContext context) { - - } - - @Override - public void exitEveryRule(ParserRuleContext context) { - - } -} diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java b/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java index 98b0171bf9..3e7e3ab7f5 100644 --- a/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java +++ b/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java @@ -1,28 +1,21 @@ package de.jplag.rlang; -import java.io.File; -import java.util.List; -import java.util.Set; - import org.kohsuke.MetaInfServices; -import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.antlr.AbstractAntlrLanguage; /** * This represents the R language as a language supported by JPlag. */ @MetaInfServices(de.jplag.Language.class) -public class RLanguage implements de.jplag.Language { - +public class RLanguage extends AbstractAntlrLanguage { private static final String NAME = "R"; private static final String IDENTIFIER = "rlang"; private static final int DEFAULT_MIN_TOKEN_MATCH = 8; private static final String[] FILE_EXTENSION = {".R", ".r"}; - private final RParserAdapter parserAdapter; public RLanguage() { - this.parserAdapter = new RParserAdapter(); + super(new RParserAdapter()); } @Override @@ -44,9 +37,4 @@ public String getIdentifier() { public int minimumTokenMatch() { return DEFAULT_MIN_TOKEN_MATCH; } - - @Override - public List parse(Set files, boolean normalize) throws ParsingException { - return parserAdapter.parse(files); - } } diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RListener.java b/languages/rlang/src/main/java/de/jplag/rlang/RListener.java new file mode 100644 index 0000000000..b8b1a6eb52 --- /dev/null +++ b/languages/rlang/src/main/java/de/jplag/rlang/RListener.java @@ -0,0 +1,105 @@ +package de.jplag.rlang; + +import static de.jplag.rlang.RTokenType.ASSIGN; +import static de.jplag.rlang.RTokenType.ASSIGN_FUNC; +import static de.jplag.rlang.RTokenType.ASSIGN_LIST; +import static de.jplag.rlang.RTokenType.BEGIN_FUNCTION; +import static de.jplag.rlang.RTokenType.BOOL; +import static de.jplag.rlang.RTokenType.BREAK; +import static de.jplag.rlang.RTokenType.COMPOUND_BEGIN; +import static de.jplag.rlang.RTokenType.COMPOUND_END; +import static de.jplag.rlang.RTokenType.END_FUNCTION; +import static de.jplag.rlang.RTokenType.FOR_BEGIN; +import static de.jplag.rlang.RTokenType.FOR_END; +import static de.jplag.rlang.RTokenType.FUNCTION_CALL; +import static de.jplag.rlang.RTokenType.HELP; +import static de.jplag.rlang.RTokenType.IF_BEGIN; +import static de.jplag.rlang.RTokenType.IF_END; +import static de.jplag.rlang.RTokenType.INDEX; +import static de.jplag.rlang.RTokenType.NEXT; +import static de.jplag.rlang.RTokenType.NUMBER; +import static de.jplag.rlang.RTokenType.PACKAGE; +import static de.jplag.rlang.RTokenType.REPEAT_BEGIN; +import static de.jplag.rlang.RTokenType.REPEAT_END; +import static de.jplag.rlang.RTokenType.STRING; +import static de.jplag.rlang.RTokenType.WHILE_BEGIN; +import static de.jplag.rlang.RTokenType.WHILE_END; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.rlang.grammar.RParser.ArrayAccessContext; +import de.jplag.rlang.grammar.RParser.AssignmentContext; +import de.jplag.rlang.grammar.RParser.BreakContext; +import de.jplag.rlang.grammar.RParser.ComplexContext; +import de.jplag.rlang.grammar.RParser.CompoundStatementContext; +import de.jplag.rlang.grammar.RParser.FalseContext; +import de.jplag.rlang.grammar.RParser.FloatContext; +import de.jplag.rlang.grammar.RParser.ForContext; +import de.jplag.rlang.grammar.RParser.FunctionCallContext; +import de.jplag.rlang.grammar.RParser.FunctionDefinitionContext; +import de.jplag.rlang.grammar.RParser.HelpContext; +import de.jplag.rlang.grammar.RParser.HexContext; +import de.jplag.rlang.grammar.RParser.IfContext; +import de.jplag.rlang.grammar.RParser.IfElseContext; +import de.jplag.rlang.grammar.RParser.IntContext; +import de.jplag.rlang.grammar.RParser.ListAccessContext; +import de.jplag.rlang.grammar.RParser.NamespaceAccessContext; +import de.jplag.rlang.grammar.RParser.NextContext; +import de.jplag.rlang.grammar.RParser.RepeatContext; +import de.jplag.rlang.grammar.RParser.StringContext; +import de.jplag.rlang.grammar.RParser.SubContext; +import de.jplag.rlang.grammar.RParser.TrueContext; +import de.jplag.rlang.grammar.RParser.WhileContext; + +/** + * Contains mapping for RLang to create JPlag tokens from ANTLR tokens + */ +public class RListener extends AbstractAntlrListener { + public RListener() { + addHeaderRules(); + addFunctionRules(); + addLiteralRules(); + addControlStructureRules(); + addAssigmentRules(); + } + + private void addHeaderRules() { + visit(ArrayAccessContext.class).mapRange(INDEX); + visit(ListAccessContext.class).mapRange(INDEX); + visit(NamespaceAccessContext.class).map(PACKAGE); + } + + private void addFunctionRules() { + visit(FunctionDefinitionContext.class).map(BEGIN_FUNCTION, END_FUNCTION); + visit(FunctionCallContext.class).mapRange(FUNCTION_CALL); + + visit(CompoundStatementContext.class).map(COMPOUND_BEGIN, COMPOUND_END); + visit(HelpContext.class).map(HELP); + } + + private void addLiteralRules() { + visit(HexContext.class).map(NUMBER); + visit(IntContext.class).map(NUMBER); + visit(FloatContext.class).map(NUMBER); + visit(ComplexContext.class).map(NUMBER); + visit(StringContext.class).map(STRING); + visit(TrueContext.class).map(BOOL); + visit(FalseContext.class).map(BOOL); + } + + private void addControlStructureRules() { + visit(IfContext.class).map(IF_BEGIN, IF_END); + visit(IfElseContext.class).map(IF_BEGIN, IF_END); + visit(ForContext.class).map(FOR_BEGIN, FOR_END); + visit(WhileContext.class).map(WHILE_BEGIN, WHILE_END); + visit(RepeatContext.class).map(REPEAT_BEGIN, REPEAT_END); + + visit(NextContext.class).map(NEXT); + visit(BreakContext.class).map(BREAK); + } + + private void addAssigmentRules() { + visit(AssignmentContext.class, context -> context.ASSIGN() != null).map(ASSIGN); + visit(AssignmentContext.class, context -> context.EQUALS() != null).map(ASSIGN_FUNC); + visit(SubContext.class).map(ASSIGN_LIST); + } +} diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java b/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java index e0ee215f75..d4df15e425 100644 --- a/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java +++ b/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java @@ -1,94 +1,37 @@ package de.jplag.rlang; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.ParserRuleContext; -import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeWalker; -import de.jplag.AbstractParser; -import de.jplag.ParsingException; -import de.jplag.Token; -import de.jplag.TokenType; -import de.jplag.rlang.grammar.RFilter; +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.AbstractAntlrParserAdapter; import de.jplag.rlang.grammar.RLexer; import de.jplag.rlang.grammar.RParser; -import de.jplag.util.FileUtils; /** * This class sets up the lexer and parser generated by ANTLR4, feeds the submissions through them and passes the * selected tokens on to the main program. */ -public class RParserAdapter extends AbstractParser { - - private File currentFile; - private List tokens; - - /** - * Creates the RParserAdapter - */ - public RParserAdapter() { - super(); +public class RParserAdapter extends AbstractAntlrParserAdapter { + @Override + protected Lexer createLexer(CharStream input) { + return new RLexer(input); } - /** - * Parsers a set of files into a single token list of {@link Token}s. - * @param files the set of files. - * @return a list containing all tokens of all files. - */ - public List parse(Set files) throws ParsingException { - tokens = new ArrayList<>(); - for (File file : files) { - parseFile(file); - tokens.add(Token.fileEnd(file)); - } - return tokens; + @Override + protected RParser createParser(CommonTokenStream tokenStream) { + return new RParser(tokenStream); } - private void parseFile(File file) throws ParsingException { - try (BufferedReader reader = FileUtils.openFileReader(file)) { - currentFile = file; - - // create a lexer, a parser and a buffer between them. - RLexer lexer = new RLexer(CharStreams.fromReader(reader)); - CommonTokenStream tokens = new CommonTokenStream(lexer); - - RFilter filter = new RFilter(tokens); - filter.stream(); - tokens.seek(0); - - RParser parser = new RParser(tokens); - - // Create a tree walker and the entry context defined by the parser grammar - ParserRuleContext entryContext = parser.prog(); - ParseTreeWalker treeWalker = new ParseTreeWalker(); - - // Walk over the parse tree: - for (int i = 0; i < entryContext.getChildCount(); i++) { - ParseTree parseTree = entryContext.getChild(i); - treeWalker.walk(new JPlagRListener(this), parseTree); - } - } catch (IOException exception) { - throw new ParsingException(file, exception.getMessage(), exception); - } + @Override + protected ParserRuleContext getEntryContext(RParser parser) { + return parser.prog(); } - /** - * Adds a new {@link Token} to the current token list. - * @param type the type of the new {@link Token} - * @param line the lineNumber of the Token in the current file - * @param start the start column of the Token in the lineNumber - * @param length the length of the Token - */ - /* package-private */ void addToken(TokenType type, int line, int start, int length) { - tokens.add(new Token(type, currentFile, line, start, length)); - + @Override + protected AbstractAntlrListener getListener() { + return new RListener(); } }