Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into lb=AS-for-all
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Nov 3, 2023
2 parents f50ef46 + 974e992 commit aacf3a6
Show file tree
Hide file tree
Showing 183 changed files with 2,713 additions and 626 deletions.
118 changes: 94 additions & 24 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,14 @@ jobs:
run: |
mkdir -p Generated/BIN
- name: Run command - Build and Test
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# Since these are just examples to smoke-test the in-source build process,
# let’s not run the whole build and test suite, which is quite slow (6 min
# 26 s as of this writing). Just run the invariant tests and smoke-test
# MakeUnicodeFiles. We don’t even check that MakeUnicodeFiles doesn’t
# change anything, which makes little sense; but that is the job of the
# other job.
- name: Run invariant tests
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand All @@ -91,14 +97,15 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

out-of-source-build:
name: Out-of-source Instructions

# Out-of-source build.
ucd-and-smoke-tests:
name: Check UCD consistency, invariants, smoke-test generators
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
Expand Down Expand Up @@ -136,6 +143,30 @@ jobs:
run: |
mkdir -p unicodetools/mine/Generated/BIN
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Check that UCD files are consistent
run: |
cd unicodetools/mine/src
./py/copygenerateducd.py --out-of-source -y
git diff --compact-summary --exit-code || {
git diff --compact-summary |
awk '{
if (previous) {
print "::error file="previous",title=File must be regenerated::Run org.unicode.text.UCD.Main build MakeUnicodeFiles and copy any changed files to unicodetools/data/ucd/dev."
}
previous=$1
}'
exit 1
}
# Only test once we know the UCD is internally consistent.
# MakeUnicodeFiles is much faster than this anyway.
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
Expand All @@ -151,13 +182,6 @@ jobs:
path: |
unicodetools/mine/Generated/UnicodeTestResults.*
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/emoji/aac.md#aacorderjava
- name: Run command - AAC Order
run: |
Expand All @@ -166,18 +190,6 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/idna.md
- name: Run command - IDNA
run: |
Expand Down Expand Up @@ -252,3 +264,61 @@ jobs:
mvn -s .github/workflows/mvn-settings.xml -Dexec.mainClass="org.unicode.propstest.CheckProperties" -Dexec.classpathScope=test test-compile -Dexec.args="COMPARE ALL $PREVIOUS_UVERSION" compile exec:java -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Out-of-source build.
uca:
name: Check UCA data
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
- name: Verify CLDR checkout ref
run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
- name: Cache CLDR repository
uses: actions/cache@v3
with:
path: cldr/mine/src
key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
restore-keys: |
cldr
- name: Check out CLDR
uses: actions/checkout@v3
with:
repository: unicode-org/cldr
path: cldr/mine/src
ref: main
fetch-depth: 0
- name: Switch CLDR to CLDR_REF
run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up out-of-source output dir
run: |
mkdir -p unicodetools/mine/Generated/BIN
# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 1 addition & 1 deletion UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine as cbuild
WORKDIR /build
RUN apk add --update wget make gcc musl-dev
ARG CPATH=https://www.unicode.org/Public/PROGRAMS/BidiReferenceC/
ARG CVERSION=15.0.0
ARG CVERSION=15.1.0
RUN wget -np -nv --reject-regex='.*\.(lib|exe)$' --cut-dirs=4 -nH -r ${CPATH}${CVERSION}/
RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c brutils.c brtest.c brtable.c brrule.c
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
Expand Down
2 changes: 2 additions & 0 deletions UnicodeJsps/jetty.d/ROOT/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
User-agent: *
Disallow: /UnicodeJsps
2 changes: 1 addition & 1 deletion UnicodeJsps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>29.0-jre</version>
<version>32.0.0-jre</version>
</dependency>

<!-- test -->
Expand Down
2 changes: 1 addition & 1 deletion UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import org.unicode.props.UnicodeProperty;

public class CachedProps {
public static final boolean IS_BETA = true;
public static final boolean IS_BETA = false;

public static final Splitter HASH_SPLITTER = Splitter.on('#').trimResults();
public static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults();
Expand Down
38 changes: 24 additions & 14 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -637,16 +637,7 @@ private void showString(final String string, String separator, Appendable out)
if (UnicodeUtilities.RTL.containsSome(literal)) {
literal = '\u200E' + literal + '\u200E';
}
String name = UnicodeUtilities.getName(string, separator, false);
if (name == null || name.length() == 0) {
name = "<i>no name</i>";
} else {
boolean special = name.indexOf('<') >= 0;
name = UnicodeUtilities.toHTML.transliterate(name);
if (special) {
name = "<i>" + name + "</i>";
}
}
String name = UnicodeUtilities.getName(string, separator, false, false);
literal = UnicodeSetUtilities.addEmojiVariation(literal);
if (doTable) {
out.append(
Expand Down Expand Up @@ -801,7 +792,8 @@ String getPropString(List<UnicodeProperty> props, String codePoints, boolean sho
// }
}

private static String getName(String string, String separator, boolean andCode) {
private static String getName(
String string, String separator, boolean andCode, boolean plainText) {
StringBuilder result = new StringBuilder();
int cp;
for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
Expand All @@ -812,7 +804,25 @@ private static String getName(String string, String separator, boolean andCode)
if (andCode) {
result.append("U+").append(com.ibm.icu.impl.Utility.hex(cp, 4)).append(' ');
}
result.append(CachedProps.NAMES.getValue(cp));
final String name = CachedProps.NAMES.getValue(cp);
if (name != null) {
result.append(name);
} else {
// TODO(egg): We only have Name_Aliasβ during β, which is silly. This will probably
// solve itself as part of https://github.com/unicode-org/unicodetools/issues/432.
String alias =
getFactory()
.getProperty(CachedProps.IS_BETA ? "Name_Aliasβ" : "Name_Alias")
.getValue(cp);
if (alias == null) {
alias = "no name";
}
if (plainText) {
result.append("(" + alias + ")");
} else {
result.append("<i>" + alias + "</i>");
}
}
}
return result.toString();
}
Expand Down Expand Up @@ -1931,7 +1941,7 @@ private static void showBidiLine(
writer.println("</tr><tr><th>Character</th>");
for (int i = 0; i < str.length(); ++i) {
final String s = str.substring(i, i + 1);
String title = toHTML.transform(getName(s, "", true));
String title = toHTML.transform(getName(s, "", true, true));
writer.println(
"<td class='bccell' title='"
+ title
Expand Down Expand Up @@ -1982,7 +1992,7 @@ private static void showBidiLine(
String title =
bidiChar.length() == 0
? "deleted"
: toHTML.transform(getName(bidiChar, "", true));
: toHTML.transform(getName(bidiChar, "", true, true));
String td = bidiChar.length() == 0 ? "bxcell" : "bccell";
writer.println(
"<td class='"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdentifierStatus.txt
# Date: 2023-05-16, 22:25:15 GMT
# Date: 2023-08-11, 17:46:41 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -582,8 +582,8 @@ FA27..FA29 ; Allowed # 1.1 [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK CO
2B740..2B81D ; Allowed # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Allowed # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Allowed # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE4A ; Allowed # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EBF0..2EE5D ; Allowed # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Allowed # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Allowed # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112759
# Total code points: 112778
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdentifierType.txt
# Date: 2023-05-16, 22:25:14 GMT
# Date: 2023-08-11, 17:46:40 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -576,11 +576,11 @@ FA27..FA29 ; Recommended # 1.1 [3] CJK COMPATIBILITY ID
2B740..2B81D ; Recommended # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Recommended # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Recommended # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE4A ; Recommended # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EBF0..2EE5D ; Recommended # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Recommended # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Recommended # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112742
# Total code points: 112761

# Identifier_Type: Inclusion

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdnaMappingTable.txt
# Date: 2023-05-15, 22:37:02 GMT
# Date: 2023-08-10, 22:32:27 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -2036,7 +2036,7 @@
1E9A ; mapped ; 0061 02BE # 1.1 LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B ; mapped ; 1E61 # 2.0 LATIN SMALL LETTER LONG S WITH DOT ABOVE
1E9C..1E9D ; valid # 5.1 LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE
1E9E ; mapped ; 0073 0073 # 5.1 LATIN CAPITAL LETTER SHARP S
1E9E ; mapped ; 00DF # 5.1 LATIN CAPITAL LETTER SHARP S
1E9F ; valid # 5.1 LATIN SMALL LETTER DELTA
1EA0 ; mapped ; 1EA1 # 1.1 LATIN CAPITAL LETTER A WITH DOT BELOW
1EA1 ; valid # 1.1 LATIN SMALL LETTER A WITH DOT BELOW
Expand Down Expand Up @@ -3422,7 +3422,7 @@
31C0..31CF ; valid ; ; NV8 # 4.1 CJK STROKE T..CJK STROKE N
31D0..31E3 ; valid ; ; NV8 # 5.1 CJK STROKE H..CJK STROKE Q
31E4..31EE ; disallowed # NA <reserved-31E4>..<reserved-31EE>
31EF ; valid ; ; NV8 # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31EF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; valid # 3.2 KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200 ; disallowed_STD3_mapped ; 0028 1100 0029 #1.1 PARENTHESIZED HANGUL KIYEOK
3201 ; disallowed_STD3_mapped ; 0028 1102 0029 #1.1 PARENTHESIZED HANGUL NIEUN
Expand Down Expand Up @@ -8448,8 +8448,8 @@ FFFE..FFFF ; disallowed # 1.1 <noncharacter-FFFE
2CEA2..2CEAF ; disallowed # NA <reserved-2CEA2>..<reserved-2CEAF>
2CEB0..2EBE0 ; valid # 10.0 CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBE1..2EBEF ; disallowed # NA <reserved-2EBE1>..<reserved-2EBEF>
2EBF0..2EE4A ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EE4B..2F7FF ; disallowed # NA <reserved-2EE4B>..<reserved-2F7FF>
2EBF0..2EE5D ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2EE5E..2F7FF ; disallowed # NA <reserved-2EE5E>..<reserved-2F7FF>
2F800 ; mapped ; 4E3D # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F800
2F801 ; mapped ; 4E38 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F801
2F802 ; mapped ; 4E41 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F802
Expand Down
Loading

0 comments on commit aacf3a6

Please sign in to comment.