Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into 176-C31
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Nov 2, 2023
2 parents da128cc + ae3a7aa commit 3dd8e9e
Show file tree
Hide file tree
Showing 445 changed files with 3,297,326 additions and 2,571 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-jsp.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: Build JSP

env:
CURRENT_UVERSION: 15.1.0
PREVIOUS_UVERSION: 15.0.0 # not used at present
CURRENT_UVERSION: 16.0.0
PREVIOUS_UVERSION: 15.1.0 # not used at present

on:
push:
Expand Down
122 changes: 96 additions & 26 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ on:
- '*'

env:
CURRENT_UVERSION: 15.1.0
PREVIOUS_UVERSION: 15.0.0
CURRENT_UVERSION: 16.0.0
PREVIOUS_UVERSION: 15.1.0

jobs:

Expand Down Expand Up @@ -81,8 +81,14 @@ jobs:
run: |
mkdir -p Generated/BIN
- name: Run command - Build and Test
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# Since these are just examples to smoke-test the in-source build process,
# let’s not run the whole build and test suite, which is quite slow (6 min
# 26 s as of this writing). Just run the invariant tests and smoke-test
# MakeUnicodeFiles. We don’t even check that MakeUnicodeFiles doesn’t
# change anything, which makes little sense; but that is the job of the
# other job.
- name: Run invariant tests
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand All @@ -91,14 +97,15 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

out-of-source-build:
name: Out-of-source Instructions

# Out-of-source build.
ucd-and-smoke-tests:
name: Check UCD consistency, invariants, smoke-test generators
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
Expand Down Expand Up @@ -136,6 +143,30 @@ jobs:
run: |
mkdir -p unicodetools/mine/Generated/BIN
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Check that UCD files are consistent
run: |
cd unicodetools/mine/src
./py/copygenerateducd.py --out-of-source -y
git diff --compact-summary --exit-code || {
git diff --compact-summary |
awk '{
if (previous) {
print "::error file="previous",title=File must be regenerated::Run org.unicode.text.UCD.Main build MakeUnicodeFiles and copy any changed files to unicodetools/data/ucd/dev."
}
previous=$1
}'
exit 1
}
# Only test once we know the UCD is internally consistent.
# MakeUnicodeFiles is much faster than this anyway.
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
Expand All @@ -151,13 +182,6 @@ jobs:
path: |
unicodetools/mine/Generated/UnicodeTestResults.*
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/emoji/aac.md#aacorderjava
- name: Run command - AAC Order
run: |
Expand All @@ -166,18 +190,6 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/idna.md
- name: Run command - IDNA
run: |
Expand Down Expand Up @@ -252,3 +264,61 @@ jobs:
mvn -s .github/workflows/mvn-settings.xml -Dexec.mainClass="org.unicode.propstest.CheckProperties" -Dexec.classpathScope=test test-compile -Dexec.args="COMPARE ALL $PREVIOUS_UVERSION" compile exec:java -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Out-of-source build.
uca:
name: Check UCA data
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
- name: Verify CLDR checkout ref
run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
- name: Cache CLDR repository
uses: actions/cache@v3
with:
path: cldr/mine/src
key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
restore-keys: |
cldr
- name: Check out CLDR
uses: actions/checkout@v3
with:
repository: unicode-org/cldr
path: cldr/mine/src
ref: main
fetch-depth: 0
- name: Switch CLDR to CLDR_REF
run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up out-of-source output dir
run: |
mkdir -p unicodetools/mine/Generated/BIN
# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ rules.mk

.DS_Store
/output
/cldr
71 changes: 32 additions & 39 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,46 +1,39 @@
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
UNICODE LICENSE V3

See Terms of Use for definitions of Unicode Inc.'s
Data Files and Software.
COPYRIGHT AND PERMISSION NOTICE

NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
Copyright © 2001-2023 Unicode, Inc.

COPYRIGHT AND PERMISSION NOTICE
NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@ use the Unicode Contact Form: https://www.unicode.org/reporting.html
Do not use the GitHub Issues feature in this repo for those.
The tools maintainers use GH issues for issues with the code in this repo.

### Licenses
### Copyright & Licenses

- Data and software is governed by the [Unicode Terms of Use](https://www.unicode.org/copyright.html)
a copy of which is included as [LICENSE](./LICENSE).
Copyright © 2001-2023 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.

### Copyright
The project is released under [LICENSE](./LICENSE).

© 1991 and later: Unicode, Inc. and others.
License & terms of use: <https://www.unicode.org/copyright.html>
A CLA is required to contribute to this project - please refer to the [CONTRIBUTING.md](https://github.com/unicode-org/.github/blob/main/.github/CONTRIBUTING.md) file (or start a Pull Request) for more information.
4 changes: 2 additions & 2 deletions UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ FROM alpine as cbuild
WORKDIR /build
RUN apk add --update wget make gcc musl-dev
ARG CPATH=https://www.unicode.org/Public/PROGRAMS/BidiReferenceC/
ARG CVERSION=15.0.0
ARG CVERSION=15.1.0
RUN wget -np -nv --reject-regex='.*\.(lib|exe)$' --cut-dirs=4 -nH -r ${CPATH}${CVERSION}/
RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c brutils.c brtest.c brtable.c brrule.c
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
# copy and unpack to /tmp/data
ADD ./target/cldr-unicodetools.tgz /build/data/
# move this into place (including unicodetools/unicodetools)
RUN rm -rf /build/data/cldr/.git # unneeded
FROM jetty:9-jre11-slim AS run
FROM jetty:9-jre11-alpine-eclipse-temurin AS run
ADD port-entrypoint.sh /port-entrypoint.sh
ADD ./jetty.d/ROOT /var/lib/jetty/webapps/ROOT/
ENTRYPOINT [ "/port-entrypoint.sh" ]
Expand Down
2 changes: 2 additions & 0 deletions UnicodeJsps/jetty.d/ROOT/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
User-agent: *
Disallow: /UnicodeJsps
2 changes: 1 addition & 1 deletion UnicodeJsps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>29.0-jre</version>
<version>32.0.0-jre</version>
</dependency>

<!-- test -->
Expand Down
2 changes: 1 addition & 1 deletion UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import org.unicode.props.UnicodeProperty;

public class CachedProps {
public static final boolean IS_BETA = true;
public static final boolean IS_BETA = false;

public static final Splitter HASH_SPLITTER = Splitter.on('#').trimResults();
public static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults();
Expand Down
38 changes: 24 additions & 14 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -637,16 +637,7 @@ private void showString(final String string, String separator, Appendable out)
if (UnicodeUtilities.RTL.containsSome(literal)) {
literal = '\u200E' + literal + '\u200E';
}
String name = UnicodeUtilities.getName(string, separator, false);
if (name == null || name.length() == 0) {
name = "<i>no name</i>";
} else {
boolean special = name.indexOf('<') >= 0;
name = UnicodeUtilities.toHTML.transliterate(name);
if (special) {
name = "<i>" + name + "</i>";
}
}
String name = UnicodeUtilities.getName(string, separator, false, false);
literal = UnicodeSetUtilities.addEmojiVariation(literal);
if (doTable) {
out.append(
Expand Down Expand Up @@ -801,7 +792,8 @@ String getPropString(List<UnicodeProperty> props, String codePoints, boolean sho
// }
}

private static String getName(String string, String separator, boolean andCode) {
private static String getName(
String string, String separator, boolean andCode, boolean plainText) {
StringBuilder result = new StringBuilder();
int cp;
for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
Expand All @@ -812,7 +804,25 @@ private static String getName(String string, String separator, boolean andCode)
if (andCode) {
result.append("U+").append(com.ibm.icu.impl.Utility.hex(cp, 4)).append(' ');
}
result.append(CachedProps.NAMES.getValue(cp));
final String name = CachedProps.NAMES.getValue(cp);
if (name != null) {
result.append(name);
} else {
// TODO(egg): We only have Name_Aliasβ during β, which is silly. This will probably
// solve itself as part of https://github.com/unicode-org/unicodetools/issues/432.
String alias =
getFactory()
.getProperty(CachedProps.IS_BETA ? "Name_Aliasβ" : "Name_Alias")
.getValue(cp);
if (alias == null) {
alias = "no name";
}
if (plainText) {
result.append("(" + alias + ")");
} else {
result.append("<i>" + alias + "</i>");
}
}
}
return result.toString();
}
Expand Down Expand Up @@ -1931,7 +1941,7 @@ private static void showBidiLine(
writer.println("</tr><tr><th>Character</th>");
for (int i = 0; i < str.length(); ++i) {
final String s = str.substring(i, i + 1);
String title = toHTML.transform(getName(s, "", true));
String title = toHTML.transform(getName(s, "", true, true));
writer.println(
"<td class='bccell' title='"
+ title
Expand Down Expand Up @@ -1982,7 +1992,7 @@ private static void showBidiLine(
String title =
bidiChar.length() == 0
? "deleted"
: toHTML.transform(getName(bidiChar, "", true));
: toHTML.transform(getName(bidiChar, "", true, true));
String td = bidiChar.length() == 0 ? "bxcell" : "bccell";
writer.println(
"<td class='"
Expand Down
Loading

0 comments on commit 3dd8e9e

Please sign in to comment.