Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use := for definitions in the invariant tests #866

Merged
merged 4 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -42,7 +43,6 @@
public class TestUnicodeInvariants {
private static final boolean DEBUG = false;

// private static final Pattern IN_PATTERN = Pattern.compile("(.*)([≠=])(.*)");
private static final boolean ICU_VERSION = false; // ignore the versions if this is true
private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion);
private static final boolean SHOW_LOOKUP = false;
Expand Down Expand Up @@ -690,27 +690,68 @@ private static void inLine(
}
}

// A one-token lookahead.
// Tokens are defined as runs of [^\p{Pattern_White_Space}\p{Pattern_Syntax}],
// or single code points in \p{Pattern_Syntax}.
/**
* A one-token lookahead. Tokens are defined as: 1. words: runs of
* [^\p{Pattern_White_Space}\p{Pattern_Syntax}]; 2. simple operators: sequences of the form
* \p{Pattern_Syntax} \p{Mn}*; 3. explicitly expected sequences of words and simple operators
* without intervening spaces; this allows for contextually accepting operators such as :=, >>,
* ’s, or .GT., without treating, e.g., every >> as atomic.
*/
private static class Lookahead {
// Advances pp through any pattern white space, then looks ahead one token.
public static Lookahead oneToken(ParsePosition pp, String text) {
scan(PATTERN_WHITE_SPACE, text, pp, true);
return oneTokenNoSpace(pp, text);
}

// Returns null if pp is before pattern white space; otherwise, looks ahead one token.
/**
* Advances pp through any pattern white space, then looks ahead one token, treating the
* given sequences as single tokens.
*/
public static Lookahead oneToken(ParsePosition pp, String text, String... sequences) {
scan(PATTERN_WHITE_SPACE, text, pp, true);
Lookahead result = oneTokenNoSpace(pp, text);
if (result == null) {
return result;
}
Lookahead candidate = result;
for (; ; ) {
final String candidateToken = candidate.token;
final boolean candidateIsSequencePrefix =
Arrays.asList(sequences).stream()
.anyMatch(s -> s.startsWith(candidateToken));
if (!candidateIsSequencePrefix) {
break;
}
final Lookahead continuation = oneTokenNoSpace(candidate.next, text);
if (continuation == null) {
break;
}
candidate =
new Lookahead(candidateToken + continuation.token, pp, continuation.next);

if (Arrays.asList(sequences).contains(candidate.token)) {
result = candidate;
}
}
return result;
}

/**
* Returns null if pp is before pattern white space; otherwise, looks ahead one token. This
* function does not alter pp.
*/
public static Lookahead oneTokenNoSpace(ParsePosition pp, String text) {
ParsePosition next = new ParsePosition(pp.getIndex());
if (next.getIndex() == text.length()) {
return null;
}
int start = next.getIndex();
if (PATTERN_SYNTAX.contains(text.codePointAt(start))) {
final String result = Character.toString(text.codePointAt(start));
next.setIndex(start + result.length());
return new Lookahead(result, pp, next);
final String syntax = Character.toString(text.codePointAt(start));
next.setIndex(start + syntax.length());
final String marks = scan(NONSPACING_MARK, text, next, true);
return new Lookahead(syntax + marks, pp, next);
} else {
final String result = scan(PATTERN_SYNTAX_OR_WHITE_SPACE, text, next, false);
return result.isEmpty() ? null : new Lookahead(result, pp, next);
Expand All @@ -723,15 +764,18 @@ private Lookahead(String token, ParsePosition pp, ParsePosition next) {
this.next = next;
}

// Advances the ParsePosition passed at construction past the token, and returns the token.
/**
* Advances the ParsePosition passed at construction past the token, and returns the token.
*/
public String consume() {
pp.setIndex(next.getIndex());
return token;
}

// If this token is expected, advances the ParsePosition passed at construction past the
// token past it and returns true.
// Otherwise, this function no effect and returns false.
/**
* If this token is expected, advances the ParsePosition passed at construction past the
* token past it and returns true. Otherwise, this function no effect and returns false.
*/
public boolean accept(String expected) {
if (expected.equals(token)) {
consume();
Expand All @@ -748,8 +792,10 @@ public boolean accept(String expected) {

private static void expectToken(String token, ParsePosition pp, String text)
throws ParseException {
if (!Lookahead.oneToken(pp, text).accept(token)) {
throw new ParseException("Expected '" + token + "'", pp.getIndex());
final var lookahead = Lookahead.oneToken(pp, text, token);
if (!lookahead.accept(token)) {
throw new ParseException(
"Expected '" + token + "', got '" + lookahead.token + "'", pp.getIndex());
}
}

Expand Down Expand Up @@ -1069,7 +1115,7 @@ protected String _getVersion() {
private static void letLine(ParsePosition pp, String source) throws ParseException {
expectToken("$", pp, source);
final String variable = Lookahead.oneTokenNoSpace(pp, source).consume();
expectToken("=", pp, source);
expectToken(":=", pp, source);
final int valueStart = pp.getIndex();
final UnicodeSet valueSet = parseUnicodeSet(source, pp);
valueSet.complement().complement();
Expand Down Expand Up @@ -1454,6 +1500,7 @@ private static int parseError(
final int eol = source.indexOf("\n", statementStart);
source = source.substring(sol >= 0 ? sol : 0, eol >= 0 ? eol : source.length());
}
source = source.trim();

printErrorLine("Parse Failure", Side.START, parseErrorCount);
println("**** PARSE ERROR:\t" + source);
Expand Down Expand Up @@ -1506,6 +1553,7 @@ private static void printErrorLine(String title, Side side, int testFailureCount
private static final UnicodeSet PATTERN_WHITE_SPACE =
new UnicodeSet("\\p{pattern white space}").freeze();
private static final UnicodeSet PATTERN_SYNTAX = new UnicodeSet("\\p{pattern syntax}").freeze();
private static final UnicodeSet NONSPACING_MARK = new UnicodeSet("\\p{Mn}").freeze();
private static final UnicodeSet PATTERN_SYNTAX_OR_WHITE_SPACE =
new UnicodeSet("[\\p{pattern white space}\\p{pattern syntax}]").freeze();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

\p{Identifier_Type=Not_NFKC} = [\p{NFKC_QC=No}-\p{Deprecated}-\p{Default_Ignorable_Code_Point}]

Let $Strongly_Restricted = [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}]
Let $Strongly_Restricted := [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}]

\p{Identifier_Type=Not_XID} = [\P{XID_Continue}-$Strongly_Restricted-\p{Identifier_Type=Inclusion}]

Expand Down
Loading
Loading