Skip to content

Commit

Permalink
Allow for multi-character operators
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jun 26, 2024
1 parent fd4bf35 commit d480df1
Showing 1 changed file with 54 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
public class TestUnicodeInvariants {
private static final boolean DEBUG = false;

// private static final Pattern IN_PATTERN = Pattern.compile("(.*)([≠=])(.*)");
private static final boolean ICU_VERSION = false; // ignore the versions if this is true
private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion);
private static final boolean SHOW_LOOKUP = false;
Expand Down Expand Up @@ -691,26 +690,69 @@ private static void inLine(
}

// A one-token lookahead.
// Tokens are defined as runs of [^\p{Pattern_White_Space}\p{Pattern_Syntax}],
// or single code points in \p{Pattern_Syntax}.
// Tokens are defined as:
// 1. words: runs of [^\p{Pattern_White_Space}\p{Pattern_Syntax}];
// 2. simple operators: sequences of the form \p{Pattern_Syntax} \p{Mn}*;
// 3. explicitly expected sequences of words and simple operators without intervening spaces;
// this allows for contextually accepting operators such as :=, <<, ’s, or .GT.,
// without treating, e.g., every << as atomic.
private static class Lookahead {
// Advances pp through any pattern white space, then looks ahead one token.
public static Lookahead oneToken(ParsePosition pp, String text) {
scan(PATTERN_WHITE_SPACE, text, pp, true);
return oneTokenNoSpace(pp, text);
}

// Advances pp through any pattern white space, then looks ahead one token,
// treating the given sequences as single tokens.
public static Lookahead oneToken(ParsePosition pp, String text, String... sequences) {
scan(PATTERN_WHITE_SPACE, text, pp, true);
Lookahead result = oneTokenNoSpace(pp, text);
if (result == null) {
return result;
}
Lookahead candidate = result;
for (; ; ) {
boolean candidateIsSequencePrefix = false;
for (String s : sequences) {
if (s.startsWith(candidate.token)) {
candidateIsSequencePrefix = true;
break;
}
}
if (!candidateIsSequencePrefix) {
break;
}
final Lookahead continuation = oneTokenNoSpace(candidate.next, text);
if (continuation == null) {
break;
}
candidate =
new Lookahead(candidate.token + continuation.token, pp, continuation.next);

for (String s : sequences) {
if (candidate.token.equals(s)) {
result = candidate;
break;
}
}
}
return result;
}

// Returns null if pp is before pattern white space; otherwise, looks ahead one token.
// This function does not alter pp.
public static Lookahead oneTokenNoSpace(ParsePosition pp, String text) {
ParsePosition next = new ParsePosition(pp.getIndex());
if (next.getIndex() == text.length()) {
return null;
}
int start = next.getIndex();
if (PATTERN_SYNTAX.contains(text.codePointAt(start))) {
final String result = Character.toString(text.codePointAt(start));
next.setIndex(start + result.length());
return new Lookahead(result, pp, next);
final String syntax = Character.toString(text.codePointAt(start));
next.setIndex(start + syntax.length());
final String marks = scan(NONSPACING_MARK, text, next, true);
return new Lookahead(syntax + marks, pp, next);
} else {
final String result = scan(PATTERN_SYNTAX_OR_WHITE_SPACE, text, next, false);
return result.isEmpty() ? null : new Lookahead(result, pp, next);
Expand Down Expand Up @@ -748,8 +790,10 @@ public boolean accept(String expected) {

private static void expectToken(String token, ParsePosition pp, String text)
throws ParseException {
if (!Lookahead.oneToken(pp, text).accept(token)) {
throw new ParseException("Expected '" + token + "'", pp.getIndex());
final var lookahead = Lookahead.oneToken(pp, text, token);
if (!lookahead.accept(token)) {
throw new ParseException(
"Expected '" + token + "', got '" + lookahead.token + "'", pp.getIndex());
}
}

Expand Down Expand Up @@ -1454,6 +1498,7 @@ private static int parseError(
final int eol = source.indexOf("\n", statementStart);
source = source.substring(sol >= 0 ? sol : 0, eol >= 0 ? eol : source.length());
}
source = source.trim();

printErrorLine("Parse Failure", Side.START, parseErrorCount);
println("**** PARSE ERROR:\t" + source);
Expand Down Expand Up @@ -1506,6 +1551,7 @@ private static void printErrorLine(String title, Side side, int testFailureCount
private static final UnicodeSet PATTERN_WHITE_SPACE =
new UnicodeSet("\\p{pattern white space}").freeze();
private static final UnicodeSet PATTERN_SYNTAX = new UnicodeSet("\\p{pattern syntax}").freeze();
private static final UnicodeSet NONSPACING_MARK = new UnicodeSet("\\p{Mn}").freeze();
private static final UnicodeSet PATTERN_SYNTAX_OR_WHITE_SPACE =
new UnicodeSet("[\\p{pattern white space}\\p{pattern syntax}]").freeze();

Expand Down

0 comments on commit d480df1

Please sign in to comment.