From 0a04be9dd97ce190042caec6c042974de41cd881 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 25 Jun 2024 23:14:51 +0200
Subject: [PATCH 1/6] Allow line breaks in invariant test statements

---
 .../text/UCD/TestUnicodeInvariants.java       | 388 ++++++++++--------
 .../unicode/text/UCD/UnicodeInvariantTest.txt |   2 +-
 2 files changed, 223 insertions(+), 167 deletions(-)
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index 320b7d120..a245e641b 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -121,7 +121,6 @@ enum Expected {
     };
 
     static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[=\u2282\u2283\u2286\u2287∥≉]");
-    static final ParsePosition pp = new ParsePosition(0);
 
     private static PrintWriter out;
 
@@ -179,102 +178,117 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                 } else {
                     out3.write('\uFEFF'); // BOM
                 }
+                final var noComments = new StringBuilder();
+                final List<String> lines = new ArrayList<>();
+                final List<Integer> lineBeginnings = new ArrayList();
                 try (final BufferedReader in = getInputReader(inputFile)) {
-                    errorLister =
-                            new BagFormatter()
-                                    .setMergeRanges(doRange)
-                                    .setLabelSource(null)
-                                    .setUnicodePropertyFactory(LATEST_PROPS)
-                                    // .setTableHtml("<table class='e'>")
-                                    .setShowLiteral(toHTML)
-                                    .setFixName(toHTML);
-                    errorLister.setShowTotal(false);
-                    if (doHtml) {
-                        errorLister.setTabber(htmlTabber);
-                    }
+                    in.lines()
+                            .forEach(
+                                    line -> {
+                                        if (line.startsWith("\uFEFF")) {
+                                            line = line.substring(1);
+                                        }
+                                        lines.add(line);
+                                        lineBeginnings.add(noComments.length());
+                                        final int pos = line.indexOf('#');
+                                        if (pos >= 0) {
+                                            line = line.substring(0, pos);
+                                        }
+                                        noComments.append(line.trim() + '\n');
+                                    });
+                }
+                errorLister =
+                        new BagFormatter()
+                                .setMergeRanges(doRange)
+                                .setLabelSource(null)
+                                .setUnicodePropertyFactory(LATEST_PROPS)
+                                // .setTableHtml("<table class='e'>")
+                                .setShowLiteral(toHTML)
+                                .setFixName(toHTML);
+                errorLister.setShowTotal(false);
+                if (doHtml) {
+                    errorLister.setTabber(htmlTabber);
+                }
 
-                    showLister =
-                            new BagFormatter()
-                                    .setMergeRanges(doRange)
-                                    // .setLabelSource(null)
-                                    .setUnicodePropertyFactory(LATEST_PROPS)
-                                    // .setTableHtml("<table class='s'>")
-                                    .setShowLiteral(toHTML);
-                    showLister.setShowTotal(false);
-                    if (showScript) {
-                        showLister.setValueSource(LATEST_PROPS.getProperty("script"));
-                    }
-                    if (doHtml) {
-                        showLister.setTabber(htmlTabber);
-                    }
+                showLister =
+                        new BagFormatter()
+                                .setMergeRanges(doRange)
+                                // .setLabelSource(null)
+                                .setUnicodePropertyFactory(LATEST_PROPS)
+                                // .setTableHtml("<table class='s'>")
+                                .setShowLiteral(toHTML);
+                showLister.setShowTotal(false);
+                if (showScript) {
+                    showLister.setValueSource(LATEST_PROPS.getProperty("script"));
+                }
+                if (doHtml) {
+                    showLister.setTabber(htmlTabber);
+                }
 
-                    // symbolTable = new ChainedSymbolTable();
-                    //      new ChainedSymbolTable(new SymbolTable[] {
-                    //
-                    // ToolUnicodePropertySource.make(UCD.lastVersion).getSymbolTable("\u00D7"),
-                    //
-                    // ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
-                    for (int lineNumber = 1; ; ++lineNumber) {
-                        String line = in.readLine();
-                        if (line == null) {
-                            break;
-                        }
-                        try {
-                            if (line.startsWith("\uFEFF")) {
-                                line = line.substring(1);
-                            }
-                            println(line);
-                            line = line.trim();
-                            final int pos = line.indexOf('#');
-                            if (pos >= 0) {
-                                line = line.substring(0, pos).trim();
-                            }
-                            if (line.length() == 0) {
-                                continue;
-                            }
-                            if (line.equalsIgnoreCase("Stop")) {
-                                break;
-                            } else if (line.startsWith("Let")) {
-                                letLine(pp, line);
-                            } else if (line.startsWith("In")) {
-                                inLine(pp, line, inputFile, lineNumber);
-                            } else if (line.startsWith("Propertywise")) {
-                                propertywiseLine(pp, line, inputFile, lineNumber);
-                            } else if (line.startsWith("ShowScript")) {
-                                showScript = true;
-                            } else if (line.startsWith("HideScript")) {
-                                showScript = false;
-                            } else if (line.startsWith("Map")) {
-                                testMapLine(line, pp, lineNumber);
-                            } else if (line.startsWith("ShowMap")) {
-                                showMapLine(line, pp);
-                            } else if (line.startsWith("Show")) {
-                                showLine(line, pp);
-                            } else if (line.startsWith("OnPairsOf")) {
-                                equivalencesLine(line, pp, inputFile, lineNumber);
-                            } else {
-                                testLine(line, pp, inputFile, lineNumber);
+                final String source = noComments.toString();
+                final Function<ParsePosition, Integer> getLineNumber =
+                        position -> {
+                            for (int i = 0; i < lineBeginnings.size(); ++i) {
+                                if (lineBeginnings.get(i) > position.getIndex()) {
+                                    return i; // 1-based line number.
+                                }
                             }
-                        } catch (final Exception e) {
-                            parseErrorCount =
-                                    parseError(parseErrorCount, line, e, inputFile, lineNumber);
-                            continue;
-                        }
+                            return lineBeginnings.size();
+                        };
+                int lastPrintedLine = 0;
+                final ParsePosition pp = new ParsePosition(0);
+                for (; ; ) {
+                    final int statementStart = pp.getIndex();
+                    final String nextToken = nextToken(pp, source);
+                    while (getLineNumber.apply(pp) > lastPrintedLine) {
+                        println(lines.get(lastPrintedLine++));
+                    }
+                    if (nextToken == null) {
+                        break;
                     }
-                    println();
-                    println("**** SUMMARY ****");
-                    println();
-                    println("# ParseErrorCount=" + parseErrorCount);
-                    System.out.println("ParseErrorCount=" + parseErrorCount);
-                    println("# TestFailureCount=" + testFailureCount);
-                    System.out.println("TestFailureCount=" + testFailureCount);
-                    if (doHtml) {
-                        out3.println("</body></html>");
+                    try {
+                        if (nextToken.equals("Let")) {
+                            letLine(pp, source);
+                        } else if (nextToken.equals("In")) {
+                            inLine(pp, source, inputFile, getLineNumber);
+                        } else if (nextToken.equals("Propertywise")) {
+                            propertywiseLine(pp, source, inputFile, getLineNumber);
+                        } else if (nextToken.equals("Map")) {
+                            testMapLine(source, pp, getLineNumber);
+                        } else if (nextToken.equals("ShowMap")) {
+                            showMapLine(source, pp);
+                        } else if (nextToken.equals("Show")) {
+                            showLine(source, pp);
+                        } else if (nextToken.equals("OnPairsOf")) {
+                            equivalencesLine(source, pp, inputFile, getLineNumber);
+                        } else {
+                            pp.setIndex(statementStart);
+                            testLine(source, pp, inputFile, getLineNumber);
+                        }
+                    } catch (final Exception e) {
+                        parseErrorCount =
+                                parseError(
+                                        parseErrorCount,
+                                        source,
+                                        e,
+                                        statementStart,
+                                        inputFile,
+                                        getLineNumber.apply(pp));
+                        break;
                     }
-                    out2.append(writer.getBuffer());
                 }
+                println();
+                println("**** SUMMARY ****");
+                println();
+                println("# ParseErrorCount=" + parseErrorCount);
+                System.out.println("ParseErrorCount=" + parseErrorCount);
+                println("# TestFailureCount=" + testFailureCount);
+                System.out.println("TestFailureCount=" + testFailureCount);
+                if (doHtml) {
+                    out3.println("</body></html>");
+                }
+                out2.append(writer.getBuffer());
             }
-            out = null;
         }
         return parseErrorCount + testFailureCount;
     }
@@ -332,17 +346,20 @@ protected String getFailure(int codepoint) {
         }
     }
 
-    private static void propertywiseLine(ParsePosition pp, String line, String file, int lineNumber)
+    private static void propertywiseLine(
+            ParsePosition pp,
+            String line,
+            String file,
+            Function<ParsePosition, Integer> getLineNumber)
             throws ParseException {
-        pp.setIndex("Propertywise".length());
-        final UnicodeSet set = new UnicodeSet(line, pp, symbolTable);
+        final UnicodeSet set = parseUnicodeSet(line, pp);
         if (set.hasStrings()) {
             throw new ParseException(
                     "Set should contain only single code points for property comparison",
                     pp.getIndex());
         }
         expectToken("AreAlike", pp, line);
-        if (pp.getIndex() < line.length()) {
+        if (",".equals(nextToken(new ParsePosition(pp.getIndex()), line))) {
             expectToken(",", pp, line);
             expectToken("Except", pp, line);
             expectToken(":", pp, line);
@@ -398,7 +415,9 @@ private static void propertywiseLine(ParsePosition pp, String line, String file,
             testFailureCount++;
             printErrorLine("Test Failure", Side.START, testFailureCount);
             reportTestFailure(
-                    file, lineNumber, String.join("\n", errorMessageLines).replace('\t', ' '));
+                    file,
+                    getLineNumber.apply(pp),
+                    String.join("\n", errorMessageLines).replace('\t', ' '));
             out.println("<table class='f'>");
             for (String errorMessageLine : errorMessageLines) {
                 out.println("<tr><td>");
@@ -410,10 +429,13 @@ private static void propertywiseLine(ParsePosition pp, String line, String file,
         }
     }
 
-    private static void equivalencesLine(String line, ParsePosition pp, String file, int lineNumber)
+    private static void equivalencesLine(
+            String line,
+            ParsePosition pp,
+            String file,
+            Function<ParsePosition, Integer> getLineNumber)
             throws ParseException {
-        pp.setIndex("OnPairsOf".length());
-        final UnicodeSet domain = new UnicodeSet(line, pp, symbolTable);
+        final UnicodeSet domain = parseUnicodeSet(line, pp);
         expectToken(",", pp, line);
         expectToken("EqualityOf", pp, line);
         final var leftProperty = CompoundProperty.of(LATEST_PROPS, line, pp);
@@ -592,7 +614,9 @@ private static void equivalencesLine(String line, ParsePosition pp, String file,
         errorMessageLines.addAll(counterexamples);
         if (failure) {
             reportTestFailure(
-                    file, lineNumber, String.join("\n", errorMessageLines).replace('\t', ' '));
+                    file,
+                    getLineNumber.apply(pp),
+                    String.join("\n", errorMessageLines).replace('\t', ' '));
         }
         out.println(failure ? "<table class='f'>" : "<table>");
         for (String counterexample : counterexamples) {
@@ -606,9 +630,12 @@ private static void equivalencesLine(String line, ParsePosition pp, String file,
         }
     }
 
-    private static void inLine(ParsePosition pp, String line, String file, int lineNumber)
+    private static void inLine(
+            ParsePosition pp,
+            String line,
+            String file,
+            Function<ParsePosition, Integer> getLineNumber)
             throws ParseException {
-        pp.setIndex(2);
         final PropertyPredicate propertyPredicate = getPropertyPredicate(pp, line);
         final UnicodeMap<String> failures = propertyPredicate.getFailures();
         final UnicodeSet failureSet = failures.keySet();
@@ -627,7 +654,8 @@ private static void inLine(ParsePosition pp, String line, String file, int lineN
             errorLister.setLineSeparator("\n");
             errorLister.showSetNames(new PrintWriter(monoTable), failureSet);
             errorLister.setTabber(htmlTabber);
-            reportTestFailure(file, lineNumber, errorMessage + "\n" + monoTable.toString());
+            reportTestFailure(
+                    file, getLineNumber.apply(pp), errorMessage + "\n" + monoTable.toString());
 
             if (doHtml) {
                 out.println("<table class='f'>");
@@ -642,21 +670,42 @@ private static void inLine(ParsePosition pp, String line, String file, int lineN
         }
     }
 
-    private static void expectToken(String token, ParsePosition pp, String line)
+    private static String nextTokenNoSpace(ParsePosition pp, String text) {
+        if (pp.getIndex() == text.length()) {
+            return null;
+        }
+        int start = pp.getIndex();
+        if (PATTERN_SYNTAX.contains(text.codePointAt(start))) {
+            final String result = Character.toString(text.codePointAt(start));
+            pp.setIndex(start + result.length());
+            return result;
+        } else {
+            final String result = scan(PATTERN_SYNTAX_OR_WHITE_SPACE, text, pp, false);
+            return result.isEmpty() ? null : result;
+        }
+    }
+
+    private static String nextToken(ParsePosition pp, String text) {
+        scan(PATTERN_WHITE_SPACE, text, pp, true);
+        return nextTokenNoSpace(pp, text);
+    }
+
+    private static void expectToken(String token, ParsePosition pp, String text)
             throws ParseException {
-        scan(PATTERN_WHITE_SPACE, line, pp, true);
-        if (!line.substring(pp.getIndex()).startsWith(token)) {
-            throw new ParseException("Expected " + token, pp.getIndex());
+        final var next = new ParsePosition(pp.getIndex());
+        final String actual = nextToken(next, text);
+        if (!token.equals(actual)) {
+            throw new ParseException(
+                    "Expected '" + token + "', got '" + actual + "'", pp.getIndex());
         }
-        pp.setIndex(pp.getIndex() + token.length());
-        scan(PATTERN_WHITE_SPACE, line, pp, true);
+        pp.setIndex(next.getIndex());
     }
 
     private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String line)
             throws ParseException {
         PropertyPredicate predicate;
 
-        final UnicodeSet valueSet = new UnicodeSet(line, pp, symbolTable);
+        final UnicodeSet valueSet = parseUnicodeSet(line, pp);
         expectToken(",", pp, line);
         final UnicodeProperty property1 = CompoundProperty.of(LATEST_PROPS, line, pp);
         final int cp = line.codePointAt(pp.getIndex());
@@ -674,7 +723,7 @@ private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String l
                 final var containment = new PropertyValueContainment();
                 containment.shouldBeInSet = cp == '∈';
                 pp.setIndex(pp.getIndex() + 1);
-                containment.set = new UnicodeSet(line, pp, symbolTable);
+                containment.set = parseUnicodeSet(line, pp);
                 predicate = containment;
                 break;
             default:
@@ -683,9 +732,6 @@ private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String l
         predicate.valueSet = valueSet;
         predicate.property1 = property1;
         scan(PATTERN_WHITE_SPACE, line, pp, true);
-        if (pp.getIndex() != line.length()) {
-            throw new ParseException(line, pp.getIndex());
-        }
         return predicate;
     }
 
@@ -705,8 +751,8 @@ enum Type {
             private Function<List<String>, String> sequenceReduction;
         }
 
-        private static final UnicodeSet PROPCHARS =
-                new UnicodeSet("[a-zA-Z0-9.\\:\\-\\_\\u0020\\p{pattern white space}]");
+        // TODO(egg): Consider bringing back Pattern_White_Space if requiring semicolons.
+        private static final UnicodeSet PROPCHARS = new UnicodeSet("[a-zA-Z0-9.\\:\\-\\_\\u0020}]");
         private final List<FilterOrProp> propOrFilters = new ArrayList<FilterOrProp>();
 
         static UnicodeProperty of(
@@ -722,7 +768,7 @@ static UnicodeProperty of(
                 } else if (line.charAt(pp.getIndex()) == '(') {
                     final FilterOrProp propOrFilter = new FilterOrProp();
                     final var matcher =
-                            Pattern.compile("(\\( *([^ )]+)(?: +([^)]+))? *\\)).*")
+                            Pattern.compile("(\\( *([^ )]+)(?: +([^)]+))? *\\)).*", Pattern.DOTALL)
                                     .matcher(line.substring(pp.getIndex()));
                     if (!matcher.matches()) {
                         throw new IllegalArgumentException(
@@ -964,37 +1010,33 @@ protected String _getVersion() {
         }
     }
 
-    private static void letLine(ParsePosition pp, String line) {
-        final int x = line.indexOf('=');
-        final String variable = line.substring(3, x).trim();
-        if (!variable.startsWith("$")) {
-            throw new IllegalArgumentException("Variable must begin with '$': ");
-        }
-        final String value = line.substring(x + 1).trim();
-        pp.setIndex(0);
-        final UnicodeSet valueSet = new UnicodeSet("[" + value + "]", pp, symbolTable);
+    private static void letLine(ParsePosition pp, String source) throws ParseException {
+        expectToken("$", pp, source);
+        final String variable = nextTokenNoSpace(pp, source);
+        expectToken("=", pp, source);
+        final int valueStart = pp.getIndex();
+        final UnicodeSet valueSet = parseUnicodeSet(source, pp);
         valueSet.complement().complement();
 
-        symbolTable.add(variable.substring(1), valueSet.toPattern(false));
+        symbolTable.add(variable, valueSet.toPattern(false));
+        final String value = source.substring(valueStart, pp.getIndex());
         if (DEBUG) {
             System.out.println("Added variable: <" + variable + "><" + value + ">");
         }
-        showSet(pp, value);
+        showSet(new ParsePosition(0), value);
     }
 
-    private static void showLine(String line, ParsePosition pp) {
-        String part = line.substring(4).trim();
-        if (part.startsWith("Each")) {
-            part = part.substring(4).trim();
+    private static void showLine(String source, ParsePosition pp) {
+        final var next = new ParsePosition(pp.getIndex());
+        if (next.equals("Each")) {
             showLister.setMergeRanges(false);
         }
-        showSet(pp, part);
+        showSet(pp, source);
         showLister.setMergeRanges(doRange);
     }
 
     private static void showMapLine(String line, ParsePosition pp) {
         String part = line.substring(7).trim();
-        pp.setIndex(0);
         pp.setErrorIndex(-1);
         if (part.startsWith("Each")) {
             part = part.substring(4).trim();
@@ -1009,33 +1051,28 @@ private static void showMapLine(String line, ParsePosition pp) {
         showLister.setMergeRanges(doRange);
     }
 
-    private static void testLine(String line, ParsePosition pp, String file, int lineNumber)
+    private static void testLine(
+            String source,
+            ParsePosition pp,
+            String file,
+            Function<ParsePosition, Integer> getLineNumber)
             throws ParseException {
-        if (line.startsWith("Test")) {
-            line = line.substring(4).trim();
-        }
-
         char relation = 0;
         String rightSide = null;
         String leftSide = null;
         UnicodeSet leftSet = null;
         UnicodeSet rightSet = null;
 
-        pp.setIndex(0);
-        leftSet = new UnicodeSet(line, pp, symbolTable);
-        leftSide = line.substring(0, pp.getIndex());
-        scan(PATTERN_WHITE_SPACE, line, pp, true);
-        relation = line.charAt(pp.getIndex());
+        final int leftStart = pp.getIndex();
+        leftSet = parseUnicodeSet(source, pp);
+        leftSide = source.substring(leftStart, pp.getIndex());
+        scan(PATTERN_WHITE_SPACE, source, pp, true);
+        relation = source.charAt(pp.getIndex());
         checkRelation(pp, relation);
         pp.setIndex(pp.getIndex() + 1); // skip char
-        scan(PATTERN_WHITE_SPACE, line, pp, true);
-        final int start = pp.getIndex();
-        rightSet = new UnicodeSet(line, pp, symbolTable);
-        rightSide = line.substring(start, pp.getIndex());
-        scan(PATTERN_WHITE_SPACE, line, pp, true);
-        if (line.length() != pp.getIndex()) {
-            throw new ParseException("Extra characters at end", pp.getIndex());
-        }
+        final int rightStart = pp.getIndex();
+        rightSet = parseUnicodeSet(source, pp);
+        rightSide = source.substring(rightStart, pp.getIndex());
 
         Expected right_left = Expected.irrelevant;
         Expected rightAndLeft = Expected.irrelevant;
@@ -1078,7 +1115,7 @@ private static void testLine(String line, ParsePosition pp, String file, int lin
                 "But Not In",
                 leftSide,
                 file,
-                lineNumber);
+                getLineNumber.apply(pp));
         checkExpected(
                 rightAndLeft,
                 new UnicodeSet(rightSet).retainAll(leftSet),
@@ -1087,7 +1124,7 @@ private static void testLine(String line, ParsePosition pp, String file, int lin
                 "And In",
                 leftSide,
                 file,
-                lineNumber);
+                getLineNumber.apply(pp));
         checkExpected(
                 left_right,
                 new UnicodeSet(leftSet).removeAll(rightSet),
@@ -1096,7 +1133,7 @@ private static void testLine(String line, ParsePosition pp, String file, int lin
                 "But Not In",
                 rightSide,
                 file,
-                lineNumber);
+                getLineNumber.apply(pp));
     }
 
     public static void checkRelation(ParsePosition pp, char relation) throws ParseException {
@@ -1176,7 +1213,8 @@ private static void checkExpected(
                             getProperties(Settings.lastVersion),
                             IndexUnicodeProperties.make(Settings.lastVersion)));
 
-    private static void testMapLine(String line, ParsePosition pp, int lineNumber)
+    private static void testMapLine(
+            String line, ParsePosition pp, Function<ParsePosition, Integer> getLineNumber)
             throws ParseException {
         char relation = 0;
         String rightSide = null;
@@ -1184,7 +1222,6 @@ private static void testMapLine(String line, ParsePosition pp, int lineNumber)
         UnicodeMap<String> leftSet = null;
         UnicodeMap<String> rightSet = null;
 
-        pp.setIndex(3);
         leftSet = UMP.parse(line, pp);
         leftSide = line.substring(3, pp.getIndex());
         scan(PATTERN_WHITE_SPACE, line, pp, true);
@@ -1240,7 +1277,7 @@ private static void testMapLine(String line, ParsePosition pp, int lineNumber)
                 rightSide,
                 "But Not In",
                 leftSide,
-                lineNumber);
+                getLineNumber.apply(pp));
         checkExpected(
                 rightAndLeft,
                 UnicodeMapParser.retainAll(new UnicodeMap<String>().putAll(rightSet), leftSet),
@@ -1248,7 +1285,7 @@ private static void testMapLine(String line, ParsePosition pp, int lineNumber)
                 rightSide,
                 "And In",
                 leftSide,
-                lineNumber);
+                getLineNumber.apply(pp));
         checkExpected(
                 left_right,
                 UnicodeMapParser.removeAll(new UnicodeMap<String>().putAll(leftSet), rightSet),
@@ -1256,7 +1293,7 @@ private static void testMapLine(String line, ParsePosition pp, int lineNumber)
                 leftSide,
                 "But Not In",
                 rightSide,
-                lineNumber);
+                getLineNumber.apply(pp));
     }
 
     private static void checkExpected(
@@ -1303,8 +1340,7 @@ private static void checkExpected(
     }
 
     private static void showSet(ParsePosition pp, final String value) {
-        pp.setIndex(0);
-        UnicodeSet valueSet = new UnicodeSet(value, pp, symbolTable);
+        UnicodeSet valueSet = parseUnicodeSet(value, pp);
         final int totalSize = valueSet.size();
         int abbreviated = 0;
         if (showRangeLimit >= 0) {
@@ -1344,15 +1380,28 @@ private static void showSet(ParsePosition pp, final String value) {
     }
 
     private static int parseError(
-            int parseErrorCount, String line, Exception e, String file, int lineNumber) {
+            int parseErrorCount,
+            String source,
+            Exception e,
+            int statementStart,
+            String file,
+            int lineNumber) {
         parseErrorCount++;
         if (e instanceof ParseException) {
             final int index = ((ParseException) e).getErrorOffset();
-            line = line.substring(0, index) + "☞" + line.substring(index);
+            final int eol = source.indexOf("\n", index);
+            source =
+                    source.substring(statementStart, index)
+                            + "☞"
+                            + source.substring(index, eol >= 0 ? eol : source.length());
+        } else {
+            final int sol = source.lastIndexOf("\n", statementStart);
+            final int eol = source.indexOf("\n", statementStart);
+            source = source.substring(sol >= 0 ? sol : 0, eol >= 0 ? eol : source.length());
         }
 
         printErrorLine("Parse Failure", Side.START, parseErrorCount);
-        println("**** PARSE ERROR:\t" + line);
+        println("**** PARSE ERROR:\t" + source);
         out.println("<pre>");
         final String message = e.getMessage();
         if (message != null) {
@@ -1400,6 +1449,10 @@ private static void printErrorLine(String title, Side side, int testFailureCount
             Transliterator.createFromRules("any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
     private static final UnicodeSet PATTERN_WHITE_SPACE =
             new UnicodeSet("\\p{pattern white space}").freeze();
+    private static final UnicodeSet PATTERN_SYNTAX = new UnicodeSet("\\p{pattern syntax}").freeze();
+    private static final UnicodeSet PATTERN_SYNTAX_OR_WHITE_SPACE =
+            new UnicodeSet("[\\p{pattern white space}\\p{pattern syntax}]").freeze();
+
     private static int testFailureCount;
     private static int parseErrorCount;
     private static BagFormatter errorLister;
@@ -1518,7 +1571,7 @@ public int compare(String o1, String o2) {
 
         public void add(String variable, String value) {
             if (variables.containsKey(variable)) {
-                throw new IllegalArgumentException("Attempt to reset variable");
+                throw new IllegalArgumentException("Attempt to reset variable " + variable);
             }
             variables.put(variable, value.toCharArray());
         }
@@ -1576,7 +1629,10 @@ public boolean applyPropertyAlias(
         }
     }
 
-    public static UnicodeSet parseUnicodeSet(String line, ParsePosition pp) {
-        return new UnicodeSet(line, pp, symbolTable);
+    public static UnicodeSet parseUnicodeSet(String source, ParsePosition pp) {
+        final var relative = new ParsePosition(0);
+        final var result = new UnicodeSet(source.substring(pp.getIndex()), relative, symbolTable);
+        pp.setIndex(pp.getIndex() + relative.getIndex());
+        return result;
     }
 }
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index 65d0004eb..6d56fa53b 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -547,7 +547,7 @@ Let $anyNumericValue = \p{Numeric_Value=/-?[0-9]+(.[0-9]+)?/}
 
 # Musical symbol combining marks, other oddities
 
-Let $AlphaExclusions = [\uAA7D \u0F3E\u0F3F\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u1CE1\u1CF7\uAA7B\uABEC\U0001D165\U0001D166\U0001D16D-\U0001D172][[:gc=mc:]&[:ccc=9:][\u302E\u302F]]
+Let $AlphaExclusions = [[\uAA7D \u0F3E\u0F3F\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u1CE1\u1CF7\uAA7B\uABEC\U0001D165\U0001D166\U0001D16D-\U0001D172][[:gc=mc:]&[:ccc=9:][\u302E\u302F]]]
 # 6.1.0 Added HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
 # 7.0 Added AA7D
 # 10.0 Added 1CF7 (similar to 1CE1)

From 1e5b44e6b12c5f9a580d6b48ff0a9ec933bc38b5 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 26 Jun 2024 00:38:37 +0200
Subject: [PATCH 2/6] Break some overly long lines

---
 .../text/UCD/TestUnicodeInvariants.java       |   9 +-
 .../unicode/text/UCD/UnicodeInvariantTest.txt | 191 +++++++++++++++---
 2 files changed, 166 insertions(+), 34 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index a245e641b..8af2ce387 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -239,8 +239,9 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                 final ParsePosition pp = new ParsePosition(0);
                 for (; ; ) {
                     final int statementStart = pp.getIndex();
+                    final int statementLineNumber = getLineNumber.apply(pp);
                     final String nextToken = nextToken(pp, source);
-                    while (getLineNumber.apply(pp) > lastPrintedLine) {
+                    while (statementLineNumber >= lastPrintedLine) {
                         println(lines.get(lastPrintedLine++));
                     }
                     if (nextToken == null) {
@@ -266,6 +267,10 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                             testLine(source, pp, inputFile, getLineNumber);
                         }
                     } catch (final Exception e) {
+                        final int lineNumber = getLineNumber.apply(pp);
+                        while (lineNumber > lastPrintedLine) {
+                            println(lines.get(lastPrintedLine++));
+                        }
                         parseErrorCount =
                                 parseError(
                                         parseErrorCount,
@@ -274,7 +279,7 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                                         statementStart,
                                         inputFile,
                                         getLineNumber.apply(pp));
-                        break;
+                        continue;
                     }
                 }
                 println();
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index 6d56fa53b..d712d1b37 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -170,7 +170,15 @@ Let $fii = \p{toNFD=/$foo/}
 Let $codepoints = [\u0000-\U0010FFFF]
 
 Let $gcAllPunctuation = \p{gc=/_Punctuation/}
-$gcAllPunctuation = [\p{gc=Close_Punctuation}\p{gc=Connector_Punctuation}}\p{gc=Dash_Punctuation}\p{gc=Final_Punctuation}\p{gc=Initial_Punctuation}\p{gc=Open_Punctuation}\p{gc=Other_Punctuation}]
+$gcAllPunctuation = [
+    \p{gc=Close_Punctuation}
+    \p{gc=Connector_Punctuation}
+    \p{gc=Dash_Punctuation}
+    \p{gc=Final_Punctuation}
+    \p{gc=Initial_Punctuation}
+    \p{gc=Open_Punctuation}
+    \p{gc=Other_Punctuation}
+]
 
 Let $gcAllSymbols = \p{gc=/_Symbol/}
 $gcAllSymbols = [\p{gc=Math_Symbol}\p{gc=Currency_Symbol}\p{gc=Modifier_Symbol}\p{gc=Other_Symbol}]
@@ -269,13 +277,34 @@ Let $BMExclusions = [ ≠ ∤ ∦ ≢ ≭ ⫝̸ ]
 In [\p{dt=canonical}-$BMExclusions], (delete-adjacent-duplicates) * Bidi_M * \P{bc=NSM} * dm = Bidi_M * \P{bc=NSM}
 
 # Additional BIDI invariant constants
-Let $AL_blocks = [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
-Let $R_blocks = [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
-# 6.1.0 updated blocks
-# 10.0 updated blocks (Syriac Supplement is bc=AL)
-# 11.0 updated blocks (Hanifi Rohingya, Sogdian, Indic Siyaq Numbers are bc=AL); Old Sogdian is bc=R
-# 12.0 updated blocks (Ottoman Siyaq Numbers is bc=AL)
-# 14.0 updated blocks (Arabic Extended-B is bc=AL)
+Let $AL_blocks = [
+    \u0600-\u07BF
+    \u0860-\u086F          # Syriac Supplement,     10.0
+    \u0870-\u089F          # Arabic Extended-B,     14.0
+    \u08A0-\u08FF
+    \uFB50-\uFDCF
+    \uFDF0-\uFDFF
+    \uFE70-\uFEFF
+    \U00010D00-\U00010D3F  # Hanifi Rohingya,       11.0
+    \U00010EC0-\U00010EFF
+    \U00010F30-\U00010F6F  # Sogdian,               11.0
+    \U0001EC70-\U0001ECBF  # Indic Siyaq Numbers,   11.0
+    \U0001ED00-\U0001ED4F  # Ottoman Siyaq Numbers, 12.0
+    \U0001EE00-\U0001EEFF
+]
+Let $R_blocks = [
+    \u0590-\u05FF
+    \u07C0-\u085F
+    \uFB1D-\uFB4F
+    \U00010800-\U00010CFF
+    \U00010D40-\U00010EBF
+    \U00010F00-\U00010F2F
+    \U00010F70-\U00010FFF
+    \U0001E800-\U0001EC6F
+    \U0001ECC0-\U0001ECFF
+    \U0001ED50-\U0001EDFF
+    \U0001EF00-\U0001EFFF
+]
 
 # Unassigned characters in these blocks have R or AL respectively
 \p{Bidi_Class=R} ⊇ [$R_blocks & \p{gc=Cn}]
@@ -292,7 +321,14 @@ $AL_blocks ∥ [\p{Bidi_Class=L} \p{Bidi_Class=R}]
 
 Let $BN_Exceptions = [\u001C-\u001F\u17B4\u17B5]
 
-[\p{Bidi_Class=BN}] = [\p{di}\p{nchar}\p{gc=Cc}-\p{gc=Mc}-\p{gc=Mn}-\p{gc=Me}-\p{Bidi_C}-\p{alpha}-\p{wspace} - $BN_Exceptions]
+[\p{Bidi_Class=BN}] = [
+    \p{di}\p{nchar}\p{gc=Cc}
+    - \p{gc=Mc} - \p{gc=Mn} - \p{gc=Me}
+    - \p{Bidi_C}
+    - \p{alpha}
+    - \p{wspace}
+    - $BN_Exceptions
+]
 
 # Nonspacing and enclosing combining marks are bc=NSM, with a few exceptions (all of which are nonspacing)
 Let $gcMn_bcL = [\u0CBF\u0CC6\U00011A07\U00011A08\U00011C3F]
@@ -425,25 +461,37 @@ In \P{U-1:GC=Cn}, ccc=U-1:ccc
 
 # Canonical decompositions (minus exclusions) must be identical across releases (also required by strong normalization stability),
 # except where a character and at least one character in its decomposition are both new in the release.
-Let $New_Decompositions = [[\p{Decomposition_Type=Canonical} - \p{Full_Composition_Exclusion}] - [\p{U-1:Decomposition_Type=Canonical} - \p{U-1:Full_Composition_Exclusion}]]
+Let $New_Decompositions = [
+      [    \p{Decomposition_Type=Canonical} -     \p{Full_Composition_Exclusion}]
+    - [\p{U-1:Decomposition_Type=Canonical} - \p{U-1:Full_Composition_Exclusion}]
+]
 $New_Decompositions ⊆ \p{U-1:GC=Cn}
 # Stripping previously-unassigned characters from the current NFD does
 # something, that is, the decomposition contains newly-assigned characters.
 In $New_Decompositions, toNFD * \P{U-1:GC=Cn} ≠ toNFD
 
-Let $Unicode_13_Decompositions = [[\p{U13.0.0:Decomposition_Type=Canonical} - \p{U13.0.0:Full_Composition_Exclusion}] - [\p{U12.1.0:Decomposition_Type=Canonical} - \p{U12.1.0:Full_Composition_Exclusion}]]
+Let $Unicode_13_Decompositions = [
+      [\p{U13.0.0:Decomposition_Type=Canonical} - \p{U13.0.0:Full_Composition_Exclusion}]
+    - [\p{U12.1.0:Decomposition_Type=Canonical} - \p{U12.1.0:Full_Composition_Exclusion}]
+]
 $Unicode_13_Decompositions ⊆ \p{U12.1.0:GC=Cn}
 In $Unicode_13_Decompositions, toNFD * \P{U12.1.0:GC=Cn} ≠ toNFD
 $Unicode_13_Decompositions = [\U00011938]
 $Unicode_13_Decompositions = [\p{Name=DIVES AKURU VOWEL SIGN O}]
 
-Let $Unicode_7_Decompositions = [[\p{U7.0.0:Decomposition_Type=Canonical} - \p{U7.0.0:Full_Composition_Exclusion}] - [\p{U6.3.0:Decomposition_Type=Canonical} - \p{U6.3.0:Full_Composition_Exclusion}]]
+Let $Unicode_7_Decompositions = [
+      [\p{U7.0.0:Decomposition_Type=Canonical} - \p{U7.0.0:Full_Composition_Exclusion}]
+    - [\p{U6.3.0:Decomposition_Type=Canonical} - \p{U6.3.0:Full_Composition_Exclusion}]
+]
 $Unicode_7_Decompositions ⊆ \p{U6.3.0:GC=Cn}
 In $Unicode_7_Decompositions, toNFD * \P{U6.3.0:GC=Cn} ≠ toNFD
 $Unicode_7_Decompositions = [\U0001134B-\U0001134C \U000114BB-\U000114BC \U000114BE \U000115BA-\U000115BB]
 $Unicode_7_Decompositions ⊆ [\p{Name=/^(GRANTHA|TIRHUTA|SIDDHAM) VOWEL SIGN /}]
 
-Let $Unicode_6_1_Decompositions = [[\p{U6.1.0:Decomposition_Type=Canonical} - \p{U6.1.0:Full_Composition_Exclusion}] - [\p{U6.0.0:Decomposition_Type=Canonical} - \p{U6.0.0:Full_Composition_Exclusion}]]
+Let $Unicode_6_1_Decompositions = [
+      [\p{U6.1.0:Decomposition_Type=Canonical} - \p{U6.1.0:Full_Composition_Exclusion}]
+    - [\p{U6.0.0:Decomposition_Type=Canonical} - \p{U6.0.0:Full_Composition_Exclusion}]
+]
 $Unicode_6_1_Decompositions ⊆ \p{U6.0.0:GC=Cn}
 In $Unicode_6_1_Decompositions, toNFD * \P{U6.0.0:GC=Cn} ≠ toNFD
 $Unicode_6_1_Decompositions = [\U0001112E-\U0001112F]
@@ -469,7 +517,9 @@ In $expandingCanonicalDecompositions, Decomposition_Type * (drop 1) * Decomposit
 # Not a stability policy, but it happens to be the case that the second
 # character does not have a decomposition mapping at all:
 In $expandingCanonicalDecompositions, Decomposition_Type * (drop 1) * Decomposition_Mapping = (constant None)
-In $expandingCanonicalDecompositions, Decomposition_Mapping * (drop 1) * Decomposition_Mapping = (drop 1) * Decomposition_Mapping
+In $expandingCanonicalDecompositions,
+      Decomposition_Mapping * (drop 1) * Decomposition_Mapping
+    =                         (drop 1) * Decomposition_Mapping
 
 # Stability: Canonical mappings (Decomposition_Mapping property values) are
 # always limited so that no string when normalized to NFC expands to more than
@@ -488,7 +538,8 @@ In \P{U-1:GC=Cn}, dm=U-1:dm
 # must have ccc=0, except for the Decomposition_Mapping of the following four
 # characters: U+0344, U+0F73, U+0F75, U+0F81.
 Let $canonicallyExpandingNonstarters = [\u0344 \u0F73 \u0F75 \u0F81]
-In [$expandingCanonicalDecompositions - $canonicallyExpandingNonstarters], ccc * (take 1) * Decomposition_Mapping = (constant Not_Reordered)
+In [$expandingCanonicalDecompositions - $canonicallyExpandingNonstarters],
+    ccc * (take 1) * Decomposition_Mapping = (constant Not_Reordered)
 
 # U6.0: Construction of Full_Composition_Exclusion
 # Primary Composites don't include singletons, ccc!=0, or sequences starting with ccc!=0
@@ -584,7 +635,13 @@ Show [\u20b9]
 Let $nonAlphabeticBindus = []
 [\p{InSc=Bindu} - \p{Alphabetic}] = $nonAlphabeticBindus
 
-Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MAITAIKHU}\N{LIMBU SIGN KEMPHRENG}\N{SHARADA VOWEL MODIFIER MARK}\N{SHARADA EXTRA SHORT VOWEL MARK}]
+Let $nonAlphabeticDependentVowels = [
+    \N{ORIYA SIGN OVERLINE}
+    \N{THAI CHARACTER MAITAIKHU}
+    \N{LIMBU SIGN KEMPHRENG}
+    \N{SHARADA VOWEL MODIFIER MARK}
+    \N{SHARADA EXTRA SHORT VOWEL MARK}
+]
 [\p{InSC=Vowel_Dependent} - \p{Alphabetic}] = $nonAlphabeticDependentVowels
 
 # Several invariants from L2/24-009 item 2.2.
@@ -596,7 +653,11 @@ Let $nonAlphabeticAvagrahas = [\N{TIBETAN MARK PALUTA}]  # A punctuation mark.
 [\p{InSC=Avagraha} - $nonAlphabeticAvagrahas] ⊆ \p{Alphabetic}
 
 # Name-based checks.
-Let $nonLowercaseSmallLetters = [ \p{name=/^LIMBU SMALL LETTER/} \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ]
+Let $nonLowercaseSmallLetters = [
+    \p{name=/^LIMBU SMALL LETTER/}
+    \N{TURNED GREEK SMALL LETTER IOTA}
+    \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/}
+]
 Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ]
 [ \p{name=/\bSMALL LETTER\b/}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase}
 [ [\p{gc=Lm} & \p{name=/SMALL/}] - $nonLowercaseSmallModifierLetters ] ⊆ \p{Lowercase}
@@ -633,14 +694,39 @@ In \P{Other_Joining_Type=Deduce_From_General_Category}, Joining_Type = Other_Joi
 # LineBreak property
 ##########################
 
-Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000-\U0001FFFF] - [[:block=Symbols for Legacy Computing:][:block=Supplemental Arrows C:]]] & [:gc=Cn:] - [:NChar:]]
-# 9.0 Added range 1F000..1FFFF: all undesignated code points in this range are lb=ID
-# 13.0 exclude those in 1FB00..1FBFF Symbols for Legacy Computing
-# 16.0 exclude Supplemental Arrows C
+Let $IDInclusions = [
+    [:block=/Ideographs/:]
+    [
+        [\U00020000-\U0003FFFF]  # Planes 2 and 3, lb=ID since 5.2.
+        [\U0001F000-\U0001FFFF]  # lb=ID default since 9.0, 147-C25,
+        - [                      # with exceptions:
+              [:block=Symbols for Legacy Computing:]  # since 13.0, 115-C27
+              [:block=Supplemental Arrows C:]         # since 16.0, 177-C47.
+          ]
+    ] & [:gc=Cn:] - [:NChar:]
+]
 \p{LB=ID} ⊃ $IDInclusions
-\p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions - [\u20C0-\u20CF]]
-
-Let $BrahmicLineBreaking = [\p{sc=Balinese}\p{sc=Batak}\p{sc=Brahmi}\p{sc=Cham}\p{sc=DivesAkuru}\p{sc=Grantha}\p{sc=Javanese}\p{sc=Makasar}\p{sc=Kawi}\p{sc=Cham}\p{sc=Makasar}\p{sc=Tulu_Tigalari}\p{sc=Gurung_Khema}]
+\p{Line_Break=Unknown} = [
+      \p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse}
+    - $IDInclusions
+    - [\u20C0-\u20CF]  # Unassigned currency symbols are lb=PR since 6.3, 133-C26.
+]
+
+Let $BrahmicLineBreaking = [
+    \p{sc=Balinese}
+    \p{sc=Batak}
+    \p{sc=Brahmi}
+    \p{sc=Cham}
+    \p{sc=DivesAkuru}
+    \p{sc=Grantha}
+    \p{sc=Javanese}
+    \p{sc=Makasar}
+    \p{sc=Kawi}
+    \p{sc=Cham}
+    \p{sc=Makasar}
+    \p{sc=Tulu_Tigalari}
+    \p{sc=Gurung_Khema}
+]
 Let $VFScripts = [\p{sc=Batak}]
 
 Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288\U00013379\U0001342F\U00013437\U0001343C\U0001343E\U000145CE\U0001E95E-\U0001E95F]
@@ -658,9 +744,15 @@ Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288
 \p{LB=VI} = [[\p{Indic_Syllabic_Category=Virama}\p{Indic_Syllabic_Category=Invisible_Stacker}] & $BrahmicLineBreaking]
 \p{LB=VF} = [\p{Indic_Syllabic_Category=Reordering_Killer} & $VFScripts]
 
-# 15.1: Action item UTC-176-A81: change [[:PCM:]-\u070F] lb=AL->NU
-\p{LB=CM} = [[\u3035] \p{GC=Mn} \p{GC=Me} \p{GC=Mc} \p{GC=Cc} \p{GC=Cf} -[\U00013437\U00013438\U0001343C-\U0001343F] -\p{LB=SA} -\p{LB=WJ} -\p{LB=ZW} -\p{LB=BA} -\p{LB=LF} -\p{LB=BK} -\p{LB=CR} -\p{LB=NL} -\p{LB=GL} -\p{LB=AL} -\p{LB=ZWJ} - \p{LB=VI} - \p{LB=VF} - \p{LB=NU}]
-# Excluded Egyptian controls begin/end segment etc. 13437, 13438 & 1343C..1343F (gc=Cf, lb=OP/CL)
+\p{LB=CM} = [
+      [\u3035] \p{GC=Mn} \p{GC=Me} \p{GC=Mc} \p{GC=Cc} \p{GC=Cf}
+    - [\U00013437\U00013438\U0001343C-\U0001343F]  # Egyptian controls begin/end segment etc. (gc=Cf, lb=OP/CL)
+    - \p{LB=SA} - \p{LB=WJ} - \p{LB=ZW} - \p{LB=BA}
+    - \p{LB=LF} - \p{LB=BK} - \p{LB=CR} - \p{LB=NL}
+    - \p{LB=GL} - \p{LB=AL} - \p{LB=ZWJ}
+    - \p{LB=VI} - \p{LB=VF}
+    - \p{LB=NU}  # 176-A81 changed [[:PCM:]-\u070F] from lb=AL to lb=NU
+]
 
 #  3.0.0: Numeric characters consist of decimal digits (all characters of General_Category Nd),
 #         except those with East_Asian_Width F (Fullwidth)
@@ -725,7 +817,17 @@ Let $QUInclusions = [\u275F-\u2760 \U0001F676-\U0001F678 \u0022 \u0027 \u275B-\u
 # covered by adding them to the exception set $SAScriptExceptions for the test.
 
 # SA are limited to certain scripts:
-Let $SAScripts = [\p{script=ahom} \p{script=thai} \p{script=lao} \p{script=myanmar} \p{script=khmer} \p{script=Tai_Le} \p{script=New_Tai_Lue} \p{script=Tai_Tham} \p{script=Tai_Viet}]
+Let $SAScripts = [
+    \p{script=ahom}
+    \p{script=thai}
+    \p{script=lao}
+    \p{script=myanmar}
+    \p{script=khmer}
+    \p{script=Tai_Le}
+    \p{script=New_Tai_Lue}
+    \p{script=Tai_Tham}
+    \p{script=Tai_Viet}
+]
 $SAScripts ⊇ \p{LineBreak=SA}
 
 # And in $SA scripts, they are all the alphabetic spacing characters, plus some odd Cf & Mn, plus the NEW TAI LUE THAM DIGIT ONE
@@ -845,9 +947,12 @@ Let $PostBaseSpacingMarks_Missed = []
 Let $TwoForgottenMusicalSymbols = \p{Name=/^MUSICAL SYMBOL COMBINING (SPRECHGESANG STEM|AUGMENTATION DOT)$/}
 Let $FourteenSpacingViramas = [\p{U15.1.0:ccc=9}&\p{U15.1.0:gc=Mc}]
 Let $TwoVietnameseReadingMarks = [\p{U15.1.0:ccc=6}]
-[\P{U4.0.0:ccc=0}  - \p{U4.0.0:Grapheme_Extend}] = [$TwoForgottenMusicalSymbols \p{Name=/^MUSICAL SYMBOL COMBINING FLAG-[3-5]$/}]
+[\P{U4.0.0:ccc=0}  - \p{U4.0.0:Grapheme_Extend}] = [$TwoForgottenMusicalSymbols
+                                                    \p{Name=/^MUSICAL SYMBOL COMBINING FLAG-[3-5]$/}]
 [\P{U4.1.0:ccc=0}  - \p{U4.1.0:GCB=Extend}]      = $TwoForgottenMusicalSymbols
-[\P{U15.1.0:ccc=0} - \p{U15.1.0:GCB=Extend}]     = [$TwoForgottenMusicalSymbols $FourteenSpacingViramas $TwoVietnameseReadingMarks]
+[\P{U15.1.0:ccc=0} - \p{U15.1.0:GCB=Extend}]     = [$TwoForgottenMusicalSymbols
+                                                    $FourteenSpacingViramas
+                                                    $TwoVietnameseReadingMarks]
  \P{        ccc=0} ⊆ \p{        GCB=Extend}
 
 # Characters that appear in non-initial position in the canonical decomposition
@@ -1037,7 +1142,17 @@ $NonOtherLetterIdeographs = [\p{Ideographic} - \p{gc=Lo}]
 Let $CommonIdeographs = [〆]
 $CommonIdeographs = [\p{Ideographic} & \p{sc=Common}]
 
-\p{Ideographic} = [ $NonOtherLetterIdeographs $CommonIdeographs [ \p{gc=Lo} & [\p{Script=Han} \p{Script=Tangut} \p{Script=Nushu} \p{Script=Khitan_Small_Script}] ] ]
+\p{Ideographic} = [
+    $NonOtherLetterIdeographs $CommonIdeographs
+    [
+        \p{gc=Lo} & [
+            \p{Script=Han}
+            \p{Script=Tangut}
+            \p{Script=Nushu}
+            \p{Script=Khitan_Small_Script}
+        ]
+    ]
+]
 
 [ [\p{Ideographic}&\p{sc=Han}] - \p{nfkcqc=n} - $NonOtherLetterIdeographs ] = \p{Unified_Ideograph}
 
@@ -1046,7 +1161,19 @@ Let $unihanScope = [\p{Block=/^CJK.(Unified|Compatibility).Ideographs/} - \p{gc=
 $unihanScope = [\p{gc=Lo} & \p{sc=Hani}]
 $unihanScope = \P{kRSUnicode=@none@}
 $unihanScope = \P{kTotalStrokes=@none@}
-$unihanScope = [ \P{kIRG_GSource=@none@} \P{kIRG_HSource=@none@} \P{kIRG_JSource=@none@} \P{kIRG_KPSource=@none@} \P{kIRG_KSource=@none@} \P{kIRG_MSource=@none@} \P{kIRG_SSource=@none@} \P{kIRG_TSource=@none@} \P{kIRG_UKSource=@none@} \P{kIRG_USource=@none@} \P{kIRG_VSource=@none@} ]
+$unihanScope = [
+    \P{kIRG_GSource=@none@}
+    \P{kIRG_HSource=@none@}
+    \P{kIRG_JSource=@none@}
+    \P{kIRG_KPSource=@none@}
+    \P{kIRG_KSource=@none@}
+    \P{kIRG_MSource=@none@}
+    \P{kIRG_SSource=@none@}
+    \P{kIRG_TSource=@none@}
+    \P{kIRG_UKSource=@none@}
+    \P{kIRG_USource=@none@}
+    \P{kIRG_VSource=@none@}
+]
 
 # TODO(eggrobin): Should those two have a kMandarin, or this not actually an invariant?
 # See https://www.unicode.org/review/pri483/feedback.html#ID20240118004124.

From f3a1b4f883cc7e94dd4c521b285cb74b53fa626b Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 26 Jun 2024 01:57:44 +0200
Subject: [PATCH 3/6] Better error reporting

---
 .../text/UCD/TestUnicodeInvariants.java       | 118 +++++++++++-------
 1 file changed, 76 insertions(+), 42 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index 8af2ce387..bb706a526 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -237,11 +237,12 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                         };
                 int lastPrintedLine = 0;
                 final ParsePosition pp = new ParsePosition(0);
+                boolean followingParseError = false;
                 for (; ; ) {
                     final int statementStart = pp.getIndex();
                     final int statementLineNumber = getLineNumber.apply(pp);
                     final String nextToken = nextToken(pp, source);
-                    while (statementLineNumber >= lastPrintedLine) {
+                    while (lastPrintedLine < statementLineNumber) {
                         println(lines.get(lastPrintedLine++));
                     }
                     if (nextToken == null) {
@@ -266,20 +267,35 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                             pp.setIndex(statementStart);
                             testLine(source, pp, inputFile, getLineNumber);
                         }
+                        followingParseError = false;
                     } catch (final Exception e) {
-                        final int lineNumber = getLineNumber.apply(pp);
-                        while (lineNumber > lastPrintedLine) {
-                            println(lines.get(lastPrintedLine++));
+                        if (!followingParseError) {
+                            final int lineNumber = getLineNumber.apply(pp);
+                            while (lineNumber > lastPrintedLine) {
+                                println(lines.get(lastPrintedLine++));
+                            }
+                            parseErrorCount =
+                                    parseError(
+                                            parseErrorCount,
+                                            source,
+                                            e,
+                                            statementStart,
+                                            inputFile,
+                                            getLineNumber.apply(pp));
+                        }
+                        // Give up on the whole line, it is unlikely to contain anything we can
+                        // parse.
+                        // Try parsing the next line, but since that may be the rest of what we
+                        // failed to parse,
+                        // do not report errors until we successfully parse *something*.
+                        final int nextLine = source.indexOf("\n", pp.getIndex());
+                        if (nextLine >= 0) {
+                            pp.setIndex(source.indexOf("\n", pp.getIndex()));
+                            followingParseError = true;
+                            continue;
+                        } else {
+                            break;
                         }
-                        parseErrorCount =
-                                parseError(
-                                        parseErrorCount,
-                                        source,
-                                        e,
-                                        statementStart,
-                                        inputFile,
-                                        getLineNumber.apply(pp));
-                        continue;
                     }
                 }
                 println();
@@ -359,7 +375,7 @@ private static void propertywiseLine(
             throws ParseException {
         final UnicodeSet set = parseUnicodeSet(line, pp);
         if (set.hasStrings()) {
-            throw new ParseException(
+            throw new BackwardParseException(
                     "Set should contain only single code points for property comparison",
                     pp.getIndex());
         }
@@ -697,13 +713,9 @@ private static String nextToken(ParsePosition pp, String text) {
 
     private static void expectToken(String token, ParsePosition pp, String text)
             throws ParseException {
-        final var next = new ParsePosition(pp.getIndex());
-        final String actual = nextToken(next, text);
-        if (!token.equals(actual)) {
-            throw new ParseException(
-                    "Expected '" + token + "', got '" + actual + "'", pp.getIndex());
+        if (!token.equals(nextToken(pp, text))) {
+            throw new BackwardParseException("Expected '" + token + "'", pp.getIndex());
         }
-        pp.setIndex(next.getIndex());
     }
 
     private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String line)
@@ -761,24 +773,28 @@ enum Type {
         private final List<FilterOrProp> propOrFilters = new ArrayList<FilterOrProp>();
 
         static UnicodeProperty of(
-                UnicodeProperty.Factory propSource, String line, ParsePosition pp) {
+                UnicodeProperty.Factory propSource, String source, ParsePosition pp)
+                throws ParseException {
             final CompoundProperty result = new CompoundProperty();
             while (true) {
-                scan(PATTERN_WHITE_SPACE, line, pp, true);
-                if (UnicodeSet.resemblesPattern(line, pp.getIndex())) {
+                scan(PATTERN_WHITE_SPACE, source, pp, true);
+                if (UnicodeSet.resemblesPattern(source, pp.getIndex())) {
                     final FilterOrProp propOrFilter = new FilterOrProp();
-                    propOrFilter.filter = parseUnicodeSet(line, pp);
+                    propOrFilter.filter = parseUnicodeSet(source, pp);
                     propOrFilter.type = FilterOrProp.Type.filter;
                     result.propOrFilters.add(propOrFilter);
-                } else if (line.charAt(pp.getIndex()) == '(') {
+                } else if (source.charAt(pp.getIndex()) == '(') {
                     final FilterOrProp propOrFilter = new FilterOrProp();
                     final var matcher =
                             Pattern.compile("(\\( *([^ )]+)(?: +([^)]+))? *\\)).*", Pattern.DOTALL)
-                                    .matcher(line.substring(pp.getIndex()));
+                                    .matcher(source.subSequence(pp.getIndex(), source.length()));
                     if (!matcher.matches()) {
                         throw new IllegalArgumentException(
                                 "Expected (<operation> <args>), got "
-                                        + line.substring(pp.getIndex()));
+                                        + source.substring(
+                                                pp.getIndex(),
+                                                Math.min(pp.getIndex() + 50, source.length()))
+                                        + "…");
                     }
                     propOrFilter.type = FilterOrProp.Type.sequenceTransformation;
                     final String expression = matcher.group(1);
@@ -851,7 +867,7 @@ static UnicodeProperty of(
                     result.propOrFilters.add(propOrFilter);
                     pp.setIndex(pp.getIndex() + expression.length());
                 } else {
-                    final String propName = scan(PROPCHARS, line, pp, true);
+                    final String propName = scan(PROPCHARS, source, pp, true);
                     if (propName.length() > 0) {
                         final FilterOrProp propOrFilter = new FilterOrProp();
                         final VersionedProperty xprop =
@@ -872,12 +888,12 @@ static UnicodeProperty of(
                         break;
                     }
                 }
-                scan(PATTERN_WHITE_SPACE, line, pp, true);
+                scan(PATTERN_WHITE_SPACE, source, pp, true);
                 final int pos = pp.getIndex();
-                if (pos == line.length()) {
+                if (pos == source.length()) {
                     break;
                 }
-                final int cp = line.charAt(pos);
+                final int cp = source.charAt(pos);
                 if (cp != '*') {
                     break;
                 }
@@ -1031,10 +1047,11 @@ private static void letLine(ParsePosition pp, String source) throws ParseExcepti
         showSet(new ParsePosition(0), value);
     }
 
-    private static void showLine(String source, ParsePosition pp) {
+    private static void showLine(String source, ParsePosition pp) throws ParseException {
         final var next = new ParsePosition(pp.getIndex());
-        if (next.equals("Each")) {
+        if (nextToken(next, source).equals("Each")) {
             showLister.setMergeRanges(false);
+            pp.setIndex(next.getIndex());
         }
         showSet(pp, source);
         showLister.setMergeRanges(doRange);
@@ -1344,7 +1361,7 @@ private static void checkExpected(
         nf.setGroupingUsed(true);
     }
 
-    private static void showSet(ParsePosition pp, final String value) {
+    private static void showSet(ParsePosition pp, final String value) throws ParseException {
         UnicodeSet valueSet = parseUnicodeSet(value, pp);
         final int totalSize = valueSet.size();
         int abbreviated = 0;
@@ -1397,7 +1414,7 @@ private static int parseError(
             final int eol = source.indexOf("\n", index);
             source =
                     source.substring(statementStart, index)
-                            + "☞"
+                            + (e instanceof BackwardParseException ? "☜" : "☞")
                             + source.substring(index, eol >= 0 ? eol : source.length());
         } else {
             final int sol = source.lastIndexOf("\n", statementStart);
@@ -1412,7 +1429,7 @@ private static int parseError(
         if (message != null) {
             println("##" + message);
         }
-        reportParseError(file, lineNumber, message);
+        reportParseError(file, lineNumber, message + "\n" + source);
         e.printStackTrace(out);
 
         out.println("</pre>");
@@ -1448,7 +1465,8 @@ private static void printErrorLine(String title, Side side, int testFailureCount
 
     private static final String HTML_RULES_CONTROLS =
             HTML_RULES
-                    + ":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:] - [\\u0020\\u0009]] hex/unicode ; ";
+                    + ":: [[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:] - [\\u0020\\u0009\\u000A]] hex/unicode ; "
+                    + "\\u000A > '<br>'";
 
     public static final Transliterator toHTMLControl =
             Transliterator.createFromRules("any-html", HTML_RULES_CONTROLS, Transliterator.FORWARD);
@@ -1634,10 +1652,26 @@ public boolean applyPropertyAlias(
         }
     }
 
-    public static UnicodeSet parseUnicodeSet(String source, ParsePosition pp) {
-        final var relative = new ParsePosition(0);
-        final var result = new UnicodeSet(source.substring(pp.getIndex()), relative, symbolTable);
-        pp.setIndex(pp.getIndex() + relative.getIndex());
-        return result;
+    // Some of our parse exceptions are thrown with a parse position before the problem.
+    // However, others are thrown with the parse position after the problem, so the message must be
+    // adjusted accordingly.
+    public static class BackwardParseException extends ParseException {
+        public BackwardParseException(String s, int errorOffset) {
+            super(s, errorOffset);
+        }
+    }
+
+    public static UnicodeSet parseUnicodeSet(String source, ParsePosition pp)
+            throws ParseException {
+        try {
+            final var result = new UnicodeSet(source, pp, symbolTable);
+            return result;
+        } catch (IllegalArgumentException e) {
+            // ICU produces unhelpful messages when parsing UnicodeSet deep into
+            // a large string in a string that contains line terminators, as the
+            // whole string is escaped and printed.
+            final String message = e.getMessage().split(" at \"", 2)[0];
+            throw new BackwardParseException(message, pp.getIndex());
+        }
     }
 }

From cde46bd20c2f69c993212364abad09d4ed4208a9 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 26 Jun 2024 02:14:16 +0200
Subject: [PATCH 4/6] correct references

---
 .../org/unicode/text/UCD/UnicodeInvariantTest.txt         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index d712d1b37..9f684144f 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -696,11 +696,11 @@ In \P{Other_Joining_Type=Deduce_From_General_Category}, Joining_Type = Other_Joi
 
 Let $IDInclusions = [
     [:block=/Ideographs/:]
-    [
-        [\U00020000-\U0003FFFF]  # Planes 2 and 3, lb=ID since 5.2.
-        [\U0001F000-\U0001FFFF]  # lb=ID default since 9.0, 147-C25,
+    [  # Some ranges default to lb=ID even outside of any blocks:
+        [\U00020000-\U0003FFFF]  # Planes 2 and 3, lb=ID since 5.2, 115-C27.
+        [\U0001F000-\U0001FFFF]  # SMP range lb=ID by default since 9.0, 147-C25,
         - [                      # with exceptions:
-              [:block=Symbols for Legacy Computing:]  # since 13.0, 115-C27
+              [:block=Symbols for Legacy Computing:]  # since 13.0, 162-A67;
               [:block=Supplemental Arrows C:]         # since 16.0, 177-C47.
           ]
     ] & [:gc=Cn:] - [:NChar:]

From 8620d050260ef5e04702c4817f6fedb1fd869da5 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 26 Jun 2024 02:37:50 +0200
Subject: [PATCH 5/6] Probably friendlier to make this one a lookahead

---
 .../java/org/unicode/text/UCD/TestUnicodeInvariants.java    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index bb706a526..ef7d5a57c 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -713,9 +713,11 @@ private static String nextToken(ParsePosition pp, String text) {
 
     private static void expectToken(String token, ParsePosition pp, String text)
             throws ParseException {
-        if (!token.equals(nextToken(pp, text))) {
-            throw new BackwardParseException("Expected '" + token + "'", pp.getIndex());
+        ParsePosition next = new ParsePosition(pp.getIndex());
+        if (!token.equals(nextToken(next, text))) {
+            throw new ParseException("Expected '" + token + "'", pp.getIndex());
         }
+        pp.setIndex(next.getIndex());
     }
 
     private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String line)

From e31dbfaaade633128a997bf6abba0157033ecea3 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 26 Jun 2024 04:27:21 +0200
Subject: [PATCH 6/6] Nicer lookahead

---
 .../text/UCD/TestUnicodeInvariants.java       | 95 ++++++++++++-------
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index ef7d5a57c..d809a1659 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -241,7 +241,7 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                 for (; ; ) {
                     final int statementStart = pp.getIndex();
                     final int statementLineNumber = getLineNumber.apply(pp);
-                    final String nextToken = nextToken(pp, source);
+                    final var nextToken = Lookahead.oneToken(pp, source);
                     while (lastPrintedLine < statementLineNumber) {
                         println(lines.get(lastPrintedLine++));
                     }
@@ -249,19 +249,19 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang
                         break;
                     }
                     try {
-                        if (nextToken.equals("Let")) {
+                        if (nextToken.accept("Let")) {
                             letLine(pp, source);
-                        } else if (nextToken.equals("In")) {
+                        } else if (nextToken.accept("In")) {
                             inLine(pp, source, inputFile, getLineNumber);
-                        } else if (nextToken.equals("Propertywise")) {
+                        } else if (nextToken.accept("Propertywise")) {
                             propertywiseLine(pp, source, inputFile, getLineNumber);
-                        } else if (nextToken.equals("Map")) {
+                        } else if (nextToken.accept("Map")) {
                             testMapLine(source, pp, getLineNumber);
-                        } else if (nextToken.equals("ShowMap")) {
+                        } else if (nextToken.accept("ShowMap")) {
                             showMapLine(source, pp);
-                        } else if (nextToken.equals("Show")) {
+                        } else if (nextToken.accept("Show")) {
                             showLine(source, pp);
-                        } else if (nextToken.equals("OnPairsOf")) {
+                        } else if (nextToken.accept("OnPairsOf")) {
                             equivalencesLine(source, pp, inputFile, getLineNumber);
                         } else {
                             pp.setIndex(statementStart);
@@ -380,8 +380,7 @@ private static void propertywiseLine(
                     pp.getIndex());
         }
         expectToken("AreAlike", pp, line);
-        if (",".equals(nextToken(new ParsePosition(pp.getIndex()), line))) {
-            expectToken(",", pp, line);
+        if (Lookahead.oneToken(pp, line).accept(",")) {
             expectToken("Except", pp, line);
             expectToken(":", pp, line);
         }
@@ -691,33 +690,67 @@ private static void inLine(
         }
     }
 
-    private static String nextTokenNoSpace(ParsePosition pp, String text) {
-        if (pp.getIndex() == text.length()) {
-            return null;
+    // A one-token lookahead.
+    // Tokens are defined as runs of [^\p{Pattern_White_Space}\p{Pattern_Syntax}],
+    // or single code points in \p{Pattern_Syntax}.
+    private static class Lookahead {
+        // Advances pp through any pattern white space, then looks ahead one token.
+        public static Lookahead oneToken(ParsePosition pp, String text) {
+            scan(PATTERN_WHITE_SPACE, text, pp, true);
+            return oneTokenNoSpace(pp, text);
         }
-        int start = pp.getIndex();
-        if (PATTERN_SYNTAX.contains(text.codePointAt(start))) {
-            final String result = Character.toString(text.codePointAt(start));
-            pp.setIndex(start + result.length());
-            return result;
-        } else {
-            final String result = scan(PATTERN_SYNTAX_OR_WHITE_SPACE, text, pp, false);
-            return result.isEmpty() ? null : result;
+
+        // Returns null if pp is before pattern white space; otherwise, looks ahead one token.
+        public static Lookahead oneTokenNoSpace(ParsePosition pp, String text) {
+            ParsePosition next = new ParsePosition(pp.getIndex());
+            if (next.getIndex() == text.length()) {
+                return null;
+            }
+            int start = next.getIndex();
+            if (PATTERN_SYNTAX.contains(text.codePointAt(start))) {
+                final String result = Character.toString(text.codePointAt(start));
+                next.setIndex(start + result.length());
+                return new Lookahead(result, pp, next);
+            } else {
+                final String result = scan(PATTERN_SYNTAX_OR_WHITE_SPACE, text, next, false);
+                return result.isEmpty() ? null : new Lookahead(result, pp, next);
+            }
+        }
+
+        private Lookahead(String token, ParsePosition pp, ParsePosition next) {
+            this.token = token;
+            this.pp = pp;
+            this.next = next;
         }
-    }
 
-    private static String nextToken(ParsePosition pp, String text) {
-        scan(PATTERN_WHITE_SPACE, text, pp, true);
-        return nextTokenNoSpace(pp, text);
+        // Advances the ParsePosition passed at construction past the token, and returns the token.
+        public String consume() {
+            pp.setIndex(next.getIndex());
+            return token;
+        }
+
+        // If this token is expected, advances the ParsePosition passed at construction past the
+        // token past it and returns true.
+        // Otherwise, this function no effect and returns false.
+        public boolean accept(String expected) {
+            if (expected.equals(token)) {
+                consume();
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        private final String token;
+        private final ParsePosition pp;
+        private final ParsePosition next;
     }
 
     private static void expectToken(String token, ParsePosition pp, String text)
             throws ParseException {
-        ParsePosition next = new ParsePosition(pp.getIndex());
-        if (!token.equals(nextToken(next, text))) {
+        if (!Lookahead.oneToken(pp, text).accept(token)) {
             throw new ParseException("Expected '" + token + "'", pp.getIndex());
         }
-        pp.setIndex(next.getIndex());
     }
 
     private static PropertyPredicate getPropertyPredicate(ParsePosition pp, String line)
@@ -1035,7 +1068,7 @@ protected String _getVersion() {
 
     private static void letLine(ParsePosition pp, String source) throws ParseException {
         expectToken("$", pp, source);
-        final String variable = nextTokenNoSpace(pp, source);
+        final String variable = Lookahead.oneTokenNoSpace(pp, source).consume();
         expectToken("=", pp, source);
         final int valueStart = pp.getIndex();
         final UnicodeSet valueSet = parseUnicodeSet(source, pp);
@@ -1050,10 +1083,8 @@ private static void letLine(ParsePosition pp, String source) throws ParseExcepti
     }
 
     private static void showLine(String source, ParsePosition pp) throws ParseException {
-        final var next = new ParsePosition(pp.getIndex());
-        if (nextToken(next, source).equals("Each")) {
+        if (Lookahead.oneToken(pp, source).accept("Each")) {
             showLister.setMergeRanges(false);
-            pp.setIndex(next.getIndex());
         }
         showSet(pp, source);
         showLister.setMergeRanges(doRange);