Skip to content

Commit

Permalink
Move the security invariants to their own CI check (#834)
Browse files Browse the repository at this point in the history
Co-authored-by: Markus Scherer <[email protected]>
  • Loading branch information
eggrobin and markusicu authored May 24, 2024
1 parent 7ebae15 commit e4f3c4d
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 49 deletions.
57 changes: 55 additions & 2 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
# change anything, which makes little sense; but that is the job of the
# other job.
- name: Run invariant tests
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down Expand Up @@ -170,7 +170,7 @@ jobs:
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DEMIT_GITHUB_ERRORS
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -Dtest=!TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DEMIT_GITHUB_ERRORS
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down Expand Up @@ -322,3 +322,56 @@ jobs:
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Out-of-source build.
security:
name: Check security data invariants
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
- name: Verify CLDR checkout ref
run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
- name: Cache CLDR repository
uses: actions/cache@v3
with:
path: cldr/mine/src
key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
restore-keys: |
cldr
- name: Check out CLDR
uses: actions/checkout@v3
with:
repository: unicode-org/cldr
path: cldr/mine/src
ref: main
fetch-depth: 0
- name: Switch CLDR to CLDR_REF
run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up out-of-source output dir
run: |
mkdir -p unicodetools/mine/Generated/BIN
- name: Run invariant tests
run: |
cd unicodetools/mine/src
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ public static int testInvariants(String inputFile, boolean doRange) throws IOExc
} else if (line.startsWith("Let")) {
letLine(pp, line);
} else if (line.startsWith("In")) {
inLine(pp, line, lineNumber);
inLine(pp, line, inputFile, lineNumber);
} else if (line.startsWith("ShowScript")) {
showScript = true;
} else if (line.startsWith("HideScript")) {
Expand All @@ -243,12 +243,13 @@ public static int testInvariants(String inputFile, boolean doRange) throws IOExc
} else if (line.startsWith("Show")) {
showLine(line, pp);
} else if (line.startsWith("OnPairsOf")) {
equivalencesLine(line, pp, lineNumber);
equivalencesLine(line, pp, inputFile, lineNumber);
} else {
testLine(line, pp, lineNumber);
testLine(line, pp, inputFile, lineNumber);
}
} catch (final Exception e) {
parseErrorCount = parseError(parseErrorCount, line, e, lineNumber);
parseErrorCount =
parseError(parseErrorCount, line, e, inputFile, lineNumber);
continue;
}
}
Expand Down Expand Up @@ -323,7 +324,7 @@ protected String getFailure(int codepoint) {
}
}

private static void equivalencesLine(String line, ParsePosition pp, int lineNumber)
private static void equivalencesLine(String line, ParsePosition pp, String file, int lineNumber)
throws ParseException {
pp.setIndex("OnPairsOf".length());
final UnicodeSet domain = new UnicodeSet(line, pp, symbolTable);
Expand Down Expand Up @@ -504,7 +505,8 @@ private static void equivalencesLine(String line, ParsePosition pp, int lineNumb
}
errorMessageLines.addAll(counterexamples);
if (failure) {
reportTestFailure(lineNumber, String.join("\n", errorMessageLines).replace('\t', ' '));
reportTestFailure(
file, lineNumber, String.join("\n", errorMessageLines).replace('\t', ' '));
}
out.println(failure ? "<table class='f'>" : "<table>");
for (String counterexample : counterexamples) {
Expand All @@ -518,7 +520,7 @@ private static void equivalencesLine(String line, ParsePosition pp, int lineNumb
}
}

private static void inLine(ParsePosition pp, String line, int lineNumber)
private static void inLine(ParsePosition pp, String line, String file, int lineNumber)
throws ParseException {
pp.setIndex(2);
final PropertyPredicate propertyPredicate = getPropertyPredicate(pp, line);
Expand All @@ -539,7 +541,7 @@ private static void inLine(ParsePosition pp, String line, int lineNumber)
errorLister.setLineSeparator("\n");
errorLister.showSetNames(new PrintWriter(monoTable), failureSet);
errorLister.setTabber(htmlTabber);
reportTestFailure(lineNumber, errorMessage + "\n" + monoTable.toString());
reportTestFailure(file, lineNumber, errorMessage + "\n" + monoTable.toString());

if (doHtml) {
out.println("<table class='f'>");
Expand Down Expand Up @@ -921,7 +923,7 @@ private static void showMapLine(String line, ParsePosition pp) {
showLister.setMergeRanges(doRange);
}

private static void testLine(String line, ParsePosition pp, int lineNumber)
private static void testLine(String line, ParsePosition pp, String file, int lineNumber)
throws ParseException {
if (line.startsWith("Test")) {
line = line.substring(4).trim();
Expand Down Expand Up @@ -989,6 +991,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber)
rightSide,
"But Not In",
leftSide,
file,
lineNumber);
checkExpected(
rightAndLeft,
Expand All @@ -997,6 +1000,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber)
rightSide,
"And In",
leftSide,
file,
lineNumber);
checkExpected(
left_right,
Expand All @@ -1005,6 +1009,7 @@ private static void testLine(String line, ParsePosition pp, int lineNumber)
leftSide,
"But Not In",
rightSide,
file,
lineNumber);
}

Expand All @@ -1026,6 +1031,7 @@ private static void checkExpected(
String rightSide,
String leftStatus,
String leftSide,
String file,
int lineNumber) {
switch (expected) {
case empty:
Expand Down Expand Up @@ -1059,7 +1065,9 @@ private static void checkExpected(
errorLister.setLineSeparator("\n");
errorLister.showSetNames(new PrintWriter(monoTable), segment);
reportTestFailure(
lineNumber, String.join("\n", errorMessageLines) + "\n" + monoTable.toString());
file,
lineNumber,
String.join("\n", errorMessageLines) + "\n" + monoTable.toString());
errorLister.setTabber(htmlTabber);
if (doHtml) {
out.println("<table class='e'>");
Expand Down Expand Up @@ -1249,7 +1257,8 @@ private static void showSet(ParsePosition pp, final String value) {
println();
}

private static int parseError(int parseErrorCount, String line, Exception e, int lineNumber) {
private static int parseError(
int parseErrorCount, String line, Exception e, String file, int lineNumber) {
parseErrorCount++;
if (e instanceof ParseException) {
final int index = ((ParseException) e).getErrorOffset();
Expand All @@ -1263,7 +1272,7 @@ private static int parseError(int parseErrorCount, String line, Exception e, int
if (message != null) {
println("##" + message);
}
reportParseError(lineNumber, message);
reportParseError(file, lineNumber, message);
e.printStackTrace(out);

out.println("</pre>");
Expand Down Expand Up @@ -1358,19 +1367,19 @@ private static void println() {
println("");
}

private static void reportParseError(int lineNumber, String message) {
reportError(lineNumber, "Parse error", message);
private static void reportParseError(String file, int lineNumber, String message) {
reportError(file, lineNumber, "Parse error", message);
}

private static void reportTestFailure(int lineNumber, String message) {
reportError(lineNumber, "Invariant test failure", message);
private static void reportTestFailure(String file, int lineNumber, String message) {
reportError(file, lineNumber, "Invariant test failure", message);
}

private static void reportError(int lineNumber, String title, String message) {
private static void reportError(String file, int lineNumber, String title, String message) {
if (EMIT_GITHUB_ERRORS) {
System.err.println(
"::error file=unicodetools/src/main/resources/org/unicode/text/UCD/"
+ DEFAULT_FILE
+ file
+ ",line="
+ lineNumber
+ ",title="
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Invariant tests for UTS #39 data.
# These tests are separate from the UCD tests in UnicodeInvariantTest.txt to
# allow for UCD development ahead of UTS #39 development, from recommendation
# for provisional assignment up to and including α.

# https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type

# “Unassigned characters, private use characters, surrogates, non-whitespace control characters.”
\p{Identifier_Type=Not_Character} = [\p{gc=Cn}\p{gc=Co}\p{gc=Cs}\p{gc=Cc}-\p{White_Space}]

# “Multiple values are not assigned to characters with strong restrictions:
# Not_Character, Deprecated, Default_Ignorable, Not_NFKC.”
\p{Identifier_Type=Deprecated} = \p{Deprecated}

# When a code point fits the criteria for multiple restrictions, then the strongest one wins.
# For example, Default_Ignorable is trumped by unassigned and Deprecated.
\p{Identifier_Type=Default_Ignorable} = [\p{Default_Ignorable_Code_Point}-\p{gc=Cn}-\p{Deprecated}]

\p{Identifier_Type=Not_NFKC} = [\p{NFKC_QC=No}-\p{Deprecated}-\p{Default_Ignorable_Code_Point}]

Let $Strongly_Restricted = [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}]

\p{Identifier_Type=Not_XID} = [\P{XID_Continue}-$Strongly_Restricted-\p{Identifier_Type=Inclusion}]

# By definition, no character can have both Exclusion and Limited_Use.
\p{Identifier_Type=Exclusion} ∥ \p{Identifier_Type=Limited_Use}

\p{Identifier_Status=Allowed} = [\p{Identifier_Type=Inclusion}\p{Identifier_Type=Recommended}]
Original file line number Diff line number Diff line change
Expand Up @@ -860,34 +860,6 @@ Let $TwoVietnameseReadingMarks = [\p{U15.1.0:ccc=6}]
# an LV or V, respectively.
[\p{NFC_QC=Maybe}&\p{ccc=0}] ⊆ [\p{GCB=Extend}\p{GCB=T}\p{GCB=V}]

##########################
# Security
##########################

# https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type

# “Unassigned characters, private use characters, surrogates, non-whitespace control characters.”
\p{Identifier_Type=Not_Character} = [\p{gc=Cn}\p{gc=Co}\p{gc=Cs}\p{gc=Cc}-\p{White_Space}]

# “Multiple values are not assigned to characters with strong restrictions:
# Not_Character, Deprecated, Default_Ignorable, Not_NFKC.”
\p{Identifier_Type=Deprecated} = \p{Deprecated}

# When a code point fits the criteria for multiple restrictions, then the strongest one wins.
# For example, Default_Ignorable is trumped by unassigned and Deprecated.
\p{Identifier_Type=Default_Ignorable} = [\p{Default_Ignorable_Code_Point}-\p{gc=Cn}-\p{Deprecated}]

\p{Identifier_Type=Not_NFKC} = [\p{NFKC_QC=No}-\p{Deprecated}-\p{Default_Ignorable_Code_Point}]

Let $Strongly_Restricted = [\p{Identifier_Type=Not_Character}\p{Identifier_Type=Deprecated}\p{Identifier_Type=Default_Ignorable}\p{Identifier_Type=Not_NFKC}]

\p{Identifier_Type=Not_XID} = [\P{XID_Continue}-$Strongly_Restricted-\p{Identifier_Type=Inclusion}]

# By definition, no character can have both Exclusion and Limited_Use.
\p{Identifier_Type=Exclusion} ∥ \p{Identifier_Type=Limited_Use}

\p{Identifier_Status=Allowed} = [\p{Identifier_Type=Inclusion}\p{Identifier_Type=Recommended}]

##########################
# Emoji
##########################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ void testSRC_UCD_DIR() {
@Test
void testUnicodeInvariants() throws IOException {
int rc = TestUnicodeInvariants.testInvariants(null, true);
assertEquals(0, rc, "TestUnicodeInvariants.testInvariants() failed");
assertEquals(0, rc, "TestUnicodeInvariants.testInvariants(default) failed");
}

@Test
void testSecurityInvariants() throws IOException {
int rc = TestUnicodeInvariants.testInvariants("SecurityInvariantTest.txt", true);
assertEquals(0, rc, "TestUnicodeInvariants.testInvariants(security) failed");
}
}

0 comments on commit e4f3c4d

Please sign in to comment.