Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into 172-C4
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Oct 12, 2023
2 parents 489b8c3 + db60be5 commit 0f7aacb
Show file tree
Hide file tree
Showing 35 changed files with 728 additions and 164 deletions.
118 changes: 94 additions & 24 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,14 @@ jobs:
run: |
mkdir -p Generated/BIN
- name: Run command - Build and Test
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# Since these are just examples to smoke-test the in-source build process,
# let’s not run the whole build and test suite, which is quite slow (6 min
# 26 s as of this writing). Just run the invariant tests and smoke-test
# MakeUnicodeFiles. We don’t even check that MakeUnicodeFiles doesn’t
# change anything, which makes little sense; but that is the job of the
# other job.
- name: Run invariant tests
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand All @@ -91,14 +97,15 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

out-of-source-build:
name: Out-of-source Instructions

# Out-of-source build.
ucd-and-smoke-tests:
name: Check UCD consistency, invariants, smoke-test generators
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
Expand Down Expand Up @@ -136,6 +143,30 @@ jobs:
run: |
mkdir -p unicodetools/mine/Generated/BIN
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Check that UCD files are consistent
run: |
cd unicodetools/mine/src
./py/copygenerateducd.py --out-of-source -y
git diff --compact-summary --exit-code || {
git diff --compact-summary |
awk '{
if (previous) {
print "::error file="previous",title=File must be regenerated::Run org.unicode.text.UCD.Main build MakeUnicodeFiles and copy any changed files to unicodetools/data/ucd/dev."
}
previous=$1
}'
exit 1
}
# Only test once we know the UCD is internally consistent.
# MakeUnicodeFiles is much faster than this anyway.
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
Expand All @@ -151,13 +182,6 @@ jobs:
path: |
unicodetools/mine/Generated/UnicodeTestResults.*
- name: Run command - Make Unicode Files
run: |
cd unicodetools/mine/src
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/emoji/aac.md#aacorderjava
- name: Run command - AAC Order
run: |
Expand All @@ -166,18 +190,6 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# https://github.com/unicode-org/unicodetools/blob/main/docs/idna.md
- name: Run command - IDNA
run: |
Expand Down Expand Up @@ -252,3 +264,61 @@ jobs:
mvn -s .github/workflows/mvn-settings.xml -Dexec.mainClass="org.unicode.propstest.CheckProperties" -Dexec.classpathScope=test test-compile -Dexec.args="COMPARE ALL $PREVIOUS_UVERSION" compile exec:java -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Out-of-source build.
uca:
name: Check UCA data
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
- name: Verify CLDR checkout ref
run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
- name: Cache CLDR repository
uses: actions/cache@v3
with:
path: cldr/mine/src
key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
restore-keys: |
cldr
- name: Check out CLDR
uses: actions/checkout@v3
with:
repository: unicode-org/cldr
path: cldr/mine/src
ref: main
fetch-depth: 0
- name: Switch CLDR to CLDR_REF
run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up out-of-source output dir
run: |
mkdir -p unicodetools/mine/Generated/BIN
# https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
# Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 2 additions & 0 deletions UnicodeJsps/jetty.d/ROOT/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
User-agent: *
Disallow: /UnicodeJsps
2 changes: 1 addition & 1 deletion UnicodeJsps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>29.0-jre</version>
<version>32.0.0-jre</version>
</dependency>

<!-- test -->
Expand Down
27 changes: 20 additions & 7 deletions docs/unicodejsps/index.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
# Building UnicodeJsp

- Note: you can run the latest UnicodeJsp locally with docker using:

```
docker run --rm -p 8080:8080 unicode/unicode-jsp
```

- Note 2: there are some notes on updated processes for using GCP at [gcp-run.md](./gcp-run.md) - at present, automated deployment is TODO.

## Compiling
Expand Down Expand Up @@ -113,7 +107,26 @@ Look at <http://localhost:8080/UnicodeJsps/properties.jsp>, and make sure that
there aren't any Z-Other props at the bottom (you'll need to update via Adding
New Properties if there are).

(:construction: **TODO**: explain how to do a Docker-based build here.)
### Running a Docker-based build

compile java stuff

- `mvn -B package -am -pl UnicodeJsps -DskipTests=true`

”backup” copy of CLDR and UnicodeTools. (`~/src/cldr` is an optional existing CLDR dir to save a few packets)

- `git clone --reference-if-able ~/src/cldr https://github.com/unicode-org/cldr.git || (cd cldr && git pull)`
- `mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git --exclude=unicodetools/target/ -f UnicodeJsps/target/cldr-unicodetools.tgz ./cldr/ ./unicodetools/`

Now, finally build.

- `docker build -t unicode/unicode-jsp:latest UnicodeJsps/`

… And run. Control-C to cancel it, otherwise visit <http://127.0.0.1:8080>

```
docker run --rm -p 8080:8080 unicode/unicode-jsp:latest
```

## Commit/PR

Expand Down
5 changes: 3 additions & 2 deletions py/copygenerateducd.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@


def main():
out_of_source = '--out-of-source' in sys.argv[1:]
cwd = Path().cwd()
uversion = os.getenv("CURRENT_UVERSION")
genucddir = cwd / "Generated" / "UCD" / uversion
genucddir = (cwd / ".." if out_of_source else cwd) / "Generated" / "UCD" / uversion
if not genucddir.exists():
raise Exception(f"Generated directory not found at {genucddir.absolute()}")

Expand All @@ -34,7 +35,7 @@ def main():
print("THE FOLLOWING FILES WILL BE MOVED:\n")
print("\n".join([f"{str(p.name)} --> {devucddir / p.relative_to(genucddir)}" for p in to_move])) # noqa: E501

confirm = bool(sys.argv[-1] == "-y") # enable running this in automation
confirm = bool("-y" in sys.argv[1:]) # enable running this in automation
if not confirm:
confirm = input("\nProceed [y/N]?").lower() == "y"

Expand Down
5 changes: 5 additions & 0 deletions unicodetools/data/ucd/dev/ArabicShaping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,11 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group
10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA

# Arabic Extended-D Characters
10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL
10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH
10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF

# Sogdian Characters

10F30; SOGDIAN ALEPH; D; No_Joining_Group
Expand Down
6 changes: 4 additions & 2 deletions unicodetools/data/ucd/dev/DerivedAge.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedAge-16.0.0.txt
# Date: 2023-10-03, 19:01:23 GMT
# Date: 2023-10-12, 18:06:06 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -2009,8 +2009,10 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT

# Newly assigned in Unicode 16.0.0 (September, 2024)

0897 ; 16.0 # ARABIC PEPET
1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE
10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW

# Total code points: 2
# Total code points: 6

# EOF
Loading

0 comments on commit 0f7aacb

Please sign in to comment.