From e4f3c4df93142eb0e8f66ad19de4dfec8a3a3652 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 24 May 2024 20:40:35 +0200 Subject: [PATCH] Move the security invariants to their own CI check (#834) Co-authored-by: Markus Scherer --- .github/workflows/cli-build-instructions.yml | 57 ++++++++++++++++++- .../text/UCD/TestUnicodeInvariants.java | 45 +++++++++------ .../text/UCD/SecurityInvariantTest.txt | 28 +++++++++ .../unicode/text/UCD/UnicodeInvariantTest.txt | 28 --------- .../text/UCD/TestTestUnicodeInvariants.java | 8 ++- 5 files changed, 117 insertions(+), 49 deletions(-) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 729d31bd8..c818d6948 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -88,7 +88,7 @@ jobs: # change anything, which makes little sense; but that is the job of the # other job. - name: Run invariant tests - run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION + run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -170,7 +170,7 @@ jobs: - name: Run command - Build and Test run: | cd unicodetools/mine/src - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DEMIT_GITHUB_ERRORS + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -Dtest=!TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DEMIT_GITHUB_ERRORS env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -322,3 +322,56 @@ jobs: compgen -G "../Generated/UCA/*/CheckCollationValidity.html" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Out-of-source build. + security: + name: Check security data invariants + runs-on: ubuntu-latest + steps: + - name: Checkout Unicode Tools + uses: actions/checkout@v3 + with: + repository: unicode-org/unicodetools + path: unicodetools/mine/src + - name: Get the CLDR_REF from pom.xml + id: cldr_ref + run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} + - name: Verify CLDR checkout ref + run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty + - name: Cache CLDR repository + uses: actions/cache@v3 + with: + path: cldr/mine/src + key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }} + restore-keys: | + cldr + - name: Check out CLDR + uses: actions/checkout@v3 + with: + repository: unicode-org/cldr + path: cldr/mine/src + ref: main + fetch-depth: 0 + - name: Switch CLDR to CLDR_REF + run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }} + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up out-of-source output dir + run: | + mkdir -p unicodetools/mine/Generated/BIN + + - name: Run invariant tests + run: | + cd unicodetools/mine/src + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index 14dc5993c..d122441e5 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -231,7 +231,7 @@ public static int testInvariants(String inputFile, boolean doRange) throws IOExc } else if (line.startsWith("Let")) { letLine(pp, line); } else if (line.startsWith("In")) { - inLine(pp, line, lineNumber); + inLine(pp, line, inputFile, lineNumber); } else if (line.startsWith("ShowScript")) { showScript = true; } else if (line.startsWith("HideScript")) { @@ -243,12 +243,13 @@ public static int testInvariants(String inputFile, boolean doRange) throws IOExc } else if (line.startsWith("Show")) { showLine(line, pp); } else if (line.startsWith("OnPairsOf")) { - equivalencesLine(line, pp, lineNumber); + equivalencesLine(line, pp, inputFile, lineNumber); } else { - testLine(line, pp, lineNumber); + testLine(line, pp, inputFile, lineNumber); } } catch (final Exception e) { - parseErrorCount = parseError(parseErrorCount, line, e, lineNumber); + parseErrorCount = + parseError(parseErrorCount, line, e, inputFile, lineNumber); continue; } } @@ -323,7 +324,7 @@ protected String getFailure(int codepoint) { } } - private static void equivalencesLine(String line, ParsePosition pp, int lineNumber) + private static void equivalencesLine(String line, ParsePosition pp, String file, int lineNumber) throws ParseException { pp.setIndex("OnPairsOf".length()); final UnicodeSet domain = new UnicodeSet(line, pp, symbolTable); @@ -504,7 +505,8 @@ private static void equivalencesLine(String line, ParsePosition pp, int lineNumb } errorMessageLines.addAll(counterexamples); if (failure) { - reportTestFailure(lineNumber, String.join("\n", errorMessageLines).replace('\t', ' ')); + reportTestFailure( + file, lineNumber, String.join("\n", errorMessageLines).replace('\t', ' ')); } out.println(failure ? "" : "
"); for (String counterexample : counterexamples) { @@ -518,7 +520,7 @@ private static void equivalencesLine(String line, ParsePosition pp, int lineNumb } } - private static void inLine(ParsePosition pp, String line, int lineNumber) + private static void inLine(ParsePosition pp, String line, String file, int lineNumber) throws ParseException { pp.setIndex(2); final PropertyPredicate propertyPredicate = getPropertyPredicate(pp, line); @@ -539,7 +541,7 @@ private static void inLine(ParsePosition pp, String line, int lineNumber) errorLister.setLineSeparator("\n"); errorLister.showSetNames(new PrintWriter(monoTable), failureSet); errorLister.setTabber(htmlTabber); - reportTestFailure(lineNumber, errorMessage + "\n" + monoTable.toString()); + reportTestFailure(file, lineNumber, errorMessage + "\n" + monoTable.toString()); if (doHtml) { out.println("
"); @@ -921,7 +923,7 @@ private static void showMapLine(String line, ParsePosition pp) { showLister.setMergeRanges(doRange); } - private static void testLine(String line, ParsePosition pp, int lineNumber) + private static void testLine(String line, ParsePosition pp, String file, int lineNumber) throws ParseException { if (line.startsWith("Test")) { line = line.substring(4).trim(); @@ -989,6 +991,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber) rightSide, "But Not In", leftSide, + file, lineNumber); checkExpected( rightAndLeft, @@ -997,6 +1000,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber) rightSide, "And In", leftSide, + file, lineNumber); checkExpected( left_right, @@ -1005,6 +1009,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber) leftSide, "But Not In", rightSide, + file, lineNumber); } @@ -1026,6 +1031,7 @@ private static void checkExpected( String rightSide, String leftStatus, String leftSide, + String file, int lineNumber) { switch (expected) { case empty: @@ -1059,7 +1065,9 @@ private static void checkExpected( errorLister.setLineSeparator("\n"); errorLister.showSetNames(new PrintWriter(monoTable), segment); reportTestFailure( - lineNumber, String.join("\n", errorMessageLines) + "\n" + monoTable.toString()); + file, + lineNumber, + String.join("\n", errorMessageLines) + "\n" + monoTable.toString()); errorLister.setTabber(htmlTabber); if (doHtml) { out.println("
"); @@ -1249,7 +1257,8 @@ private static void showSet(ParsePosition pp, final String value) { println(); } - private static int parseError(int parseErrorCount, String line, Exception e, int lineNumber) { + private static int parseError( + int parseErrorCount, String line, Exception e, String file, int lineNumber) { parseErrorCount++; if (e instanceof ParseException) { final int index = ((ParseException) e).getErrorOffset(); @@ -1263,7 +1272,7 @@ private static int parseError(int parseErrorCount, String line, Exception e, int if (message != null) { println("##" + message); } - reportParseError(lineNumber, message); + reportParseError(file, lineNumber, message); e.printStackTrace(out); out.println(""); @@ -1358,19 +1367,19 @@ private static void println() { println(""); } - private static void reportParseError(int lineNumber, String message) { - reportError(lineNumber, "Parse error", message); + private static void reportParseError(String file, int lineNumber, String message) { + reportError(file, lineNumber, "Parse error", message); } - private static void reportTestFailure(int lineNumber, String message) { - reportError(lineNumber, "Invariant test failure", message); + private static void reportTestFailure(String file, int lineNumber, String message) { + reportError(file, lineNumber, "Invariant test failure", message); } - private static void reportError(int lineNumber, String title, String message) { + private static void reportError(String file, int lineNumber, String title, String message) { if (EMIT_GITHUB_ERRORS) { System.err.println( "::error file=unicodetools/src/main/resources/org/unicode/text/UCD/" - + DEFAULT_FILE + + file + ",line=" + lineNumber + ",title=" diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt new file mode 100644 index 000000000..838516704 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt @@ -0,0 +1,28 @@ +# Invariant tests for UTS #39 data. +# These tests are separate from the UCD tests in UnicodeInvariantTest.txt to +# allow for UCD development ahead of UTS #39 development, from recommendation +# for provisional assignment up to and including α. + +# https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type + +# “Unassigned characters, private use characters, surrogates, non-whitespace control characters.” +\p{Identifier_Type=Not_Character} = [\p{gc=Cn}\p{gc=Co}\p{gc=Cs}\p{gc=Cc}-\p{White_Space}] + +# “Multiple values are not assigned to characters with strong restrictions: +# Not_Character, Deprecated, Default_Ignorable, Not_NFKC.” +\p{Identifier_Type=Deprecated} = \p{Deprecated} + +# When a code point fits the criteria for multiple restrictions, then the strongest one wins. +# For example, Default_Ignorable is trumped by unassigned and Deprecated. +\p{Identifier_Type=Default_Ignorable} = [\p{Default_Ignorable_Code_Point}-\p{gc=Cn}-\p{Deprecated}] + +\p{Identifier_Type=Not_NFKC} = [\p{NFKC_QC=No}-\p{Deprecated}-\p{Default_Ignorable_Code_Point}] + +Let $Strongly_Restricted = [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}] + +\p{Identifier_Type=Not_XID} = [\P{XID_Continue}-$Strongly_Restricted-\p{Identifier_Type=Inclusion}] + +# By definition, no character can have both Exclusion and Limited_Use. +\p{Identifier_Type=Exclusion} ∥ \p{Identifier_Type=Limited_Use} + +\p{Identifier_Status=Allowed} = [\p{Identifier_Type=Inclusion}\p{Identifier_Type=Recommended}] diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 98032037c..4ccd4a5bb 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -860,34 +860,6 @@ Let $TwoVietnameseReadingMarks = [\p{U15.1.0:ccc=6}] # an LV or V, respectively. [\p{NFC_QC=Maybe}&\p{ccc=0}] ⊆ [\p{GCB=Extend}\p{GCB=T}\p{GCB=V}] -########################## -# Security -########################## - -# https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type - -# “Unassigned characters, private use characters, surrogates, non-whitespace control characters.” -\p{Identifier_Type=Not_Character} = [\p{gc=Cn}\p{gc=Co}\p{gc=Cs}\p{gc=Cc}-\p{White_Space}] - -# “Multiple values are not assigned to characters with strong restrictions: -# Not_Character, Deprecated, Default_Ignorable, Not_NFKC.” -\p{Identifier_Type=Deprecated} = \p{Deprecated} - -# When a code point fits the criteria for multiple restrictions, then the strongest one wins. -# For example, Default_Ignorable is trumped by unassigned and Deprecated. -\p{Identifier_Type=Default_Ignorable} = [\p{Default_Ignorable_Code_Point}-\p{gc=Cn}-\p{Deprecated}] - -\p{Identifier_Type=Not_NFKC} = [\p{NFKC_QC=No}-\p{Deprecated}-\p{Default_Ignorable_Code_Point}] - -Let $Strongly_Restricted = [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}] - -\p{Identifier_Type=Not_XID} = [\P{XID_Continue}-$Strongly_Restricted-\p{Identifier_Type=Inclusion}] - -# By definition, no character can have both Exclusion and Limited_Use. -\p{Identifier_Type=Exclusion} ∥ \p{Identifier_Type=Limited_Use} - -\p{Identifier_Status=Allowed} = [\p{Identifier_Type=Inclusion}\p{Identifier_Type=Recommended}] - ########################## # Emoji ########################## diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestTestUnicodeInvariants.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestTestUnicodeInvariants.java index f3ed500b6..f74750a59 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestTestUnicodeInvariants.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestTestUnicodeInvariants.java @@ -32,6 +32,12 @@ void testSRC_UCD_DIR() { @Test void testUnicodeInvariants() throws IOException { int rc = TestUnicodeInvariants.testInvariants(null, true); - assertEquals(0, rc, "TestUnicodeInvariants.testInvariants() failed"); + assertEquals(0, rc, "TestUnicodeInvariants.testInvariants(default) failed"); + } + + @Test + void testSecurityInvariants() throws IOException { + int rc = TestUnicodeInvariants.testInvariants("SecurityInvariantTest.txt", true); + assertEquals(0, rc, "TestUnicodeInvariants.testInvariants(security) failed"); } }