From 0235209330fbe196a68ec23beeb04105e3e8ee30 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 23 Sep 2024 20:19:18 +0100 Subject: [PATCH] Post-16 invariants (#940) * Test MCM stability * Fix line numbers for invariant test failures * Bindestrich Co-authored-by: Markus Scherer --------- Co-authored-by: Markus Scherer --- .../text/UCD/TestUnicodeInvariants.java | 21 +++++++++++++++++-- .../unicode/text/UCD/UnicodeInvariantTest.txt | 7 ++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index ac507c58e..47fbd56de 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -184,7 +184,7 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang } final var noComments = new StringBuilder(); final List lines = new ArrayList<>(); - final List lineBeginnings = new ArrayList(); + final List lineBeginnings = new ArrayList<>(); try (final BufferedReader in = getInputReader(inputFile)) { in.lines() .forEach( @@ -234,7 +234,24 @@ public static int testInvariants(String inputFile, String suffix, boolean doRang position -> { for (int i = 0; i < lineBeginnings.size(); ++i) { if (lineBeginnings.get(i) > position.getIndex()) { - return i; // 1-based line number. + // The error is before the beginning of line i (0-based), thus + // on line i (1-based). + return i; + } else if (lineBeginnings.get(i) == position.getIndex()) { + // The position in a beginning of line; this happens when a + // statement has been successfully parsed, but then fails for + // non-syntactic reasons. + // The parse position is then the beginning of the next + // statement. + // Backtrack to the last nonempty line (ignoring comments), + // which is the last line of the failing statement. + int indexInTrimmedSource = position.getIndex(); + while (lineBeginnings.get(i) == indexInTrimmedSource + && indexInTrimmedSource > 0) { + --indexInTrimmedSource; + --i; + } + return i + 1; } } return lineBeginnings.size(); diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 79bcdcbb4..fe665e467 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -746,11 +746,8 @@ In \P{Other_Joining_Type=Deduce_From_General_Category}, Joining_Type = Other_Joi [ \p{Other_Joining_Type=Deduce_From_General_Category} & [\p{gc=Mn}\p{gc=Me}\p{gc=Cf}] ] ⊆ \p{Joining_Type=Transparent} [ \p{Other_Joining_Type=Deduce_From_General_Category} - [\p{gc=Mn}\p{gc=Me}\p{gc=Cf}] ] ⊆ \p{Joining_Type=Non_Joining} -## Modifier_Combining_Mark stability. Not yet testable, since MCM does not exist before 16.0. -# NOTICE TO THE MAINTAINER: The following line is intended to fail after 16.0; -# When it does, remove it and uncomment the line after it. -\p{gc=Cn} = \p{U16:gc=Cn} # ☚ REMOVE ME WHEN I FAIL… -# … AND UNCOMMENT ME. ☛ # In \P{U-1:gc=Cn}, Modifier_Combining_Mark = U-1:Modifier_Combining_Mark +## Modifier_Combining_Mark stability. +In \P{U-1:gc=Cn}, Modifier_Combining_Mark = U-1:Modifier_Combining_Mark ########################## # LineBreak property