Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into L2/23-276
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jun 6, 2024
2 parents 7801e4e + 184d7e5 commit 167840f
Show file tree
Hide file tree
Showing 460 changed files with 458,249 additions and 285,910 deletions.
23 changes: 8 additions & 15 deletions .github/workflows/build-jsp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,15 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-
# TODO: move this DOWN after JSPs once it works
- name: Generate Unicode data
# TODO: hard coded version
# TODO: symlink of security here?
- name: Regenerate the property cache files
run: >
mkdir -pv $(pwd)/output/Generated/ &&
mvn -s .github/workflows/mvn-settings.xml -B compile exec:java -DskipTests=true
-Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version ${CURRENT_UVERSION} build MakeUnicodeFiles"
-am -pl unicodetools -DCLDR_DIR=${GITHUB_WORKSPACE}/cldr
-DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION}
-DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run unicodetools tests
run: >
mvn -s .github/workflows/mvn-settings.xml -B test -am -pl unicodetools
-DCLDR_DIR=${GITHUB_WORKSPACE}/cldr -T 1C -Dparallel=classes -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION} -DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated
mkdir -pv $(pwd)/Generated/ &&
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.jsp.RebuildPropertyCache"
-am -pl unicodetools
-DCLDR_DIR=${GITHUB_WORKSPACE}/cldr
-DUNICODETOOLS_REPO_DIR=$(pwd)
-DUNICODETOOLS_GEN_DIR=$(pwd)/Generated &&
tar -cpz -f UnicodeJsps/target/generated.tgz ./Generated
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Package JSPs
Expand Down
103 changes: 101 additions & 2 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
# change anything, which makes little sense; but that is the job of the
# other job.
- name: Run invariant tests
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down Expand Up @@ -170,7 +170,7 @@ jobs:
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -Dtest=!TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DEMIT_GITHUB_ERRORS
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down Expand Up @@ -275,6 +275,22 @@ jobs:
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Checkout base UnicodeData.txt
if: ${{ github.event_name == 'pull_request'}}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: base
sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt
- name: Compare repertoire
if: ${{ github.event_name == 'pull_request'}}
run: |
# Look for changes affecting the first two fields of UnicodeData.txt (code point and name).
sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt
sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt
set +e
diff base-repertoire.txt merged-repertoire.txt
echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV"
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
Expand Down Expand Up @@ -316,9 +332,92 @@ jobs:
- name: Run command - UCA - collation validity log
run: |
cd unicodetools/mine/src
echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED"
if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]]
then set +e
fi
# invoke main() in class ...UCA.Main
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# check for output file
compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Out-of-source build.
security:
name: Check security data invariants
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Checkout base UnicodeData.txt
if: ${{ github.event_name == 'pull_request'}}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: base
sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt
- name: Compare repertoire
if: ${{ github.event_name == 'pull_request'}}
run: |
# Look for changes affecting the first two fields of UnicodeData.txt (code point and name).
sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt
sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt
set +e
diff base-repertoire.txt merged-repertoire.txt
echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV"
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
- name: Verify CLDR checkout ref
run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
- name: Cache CLDR repository
uses: actions/cache@v3
with:
path: cldr/mine/src
key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
restore-keys: |
cldr
- name: Check out CLDR
uses: actions/checkout@v3
with:
repository: unicode-org/cldr
path: cldr/mine/src
ref: main
fetch-depth: 0
- name: Switch CLDR to CLDR_REF
run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up out-of-source output dir
run: |
mkdir -p unicodetools/mine/Generated/BIN
- name: Run invariant tests
run: |
cd unicodetools/mine/src
echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED"
if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]]
then ERROR="::notice"
else ERROR="::error"
fi
MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/"
STATUS=${PIPESTATUS[0]}
if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]]
then exit 0
else exit $STATUS
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
87 changes: 87 additions & 0 deletions .github/workflows/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: Pipeline


on:
pull_request:
branches: ['*']
types: [opened, synchronize, reopened, labeled, unlabeled, ready_for_review, converted_to_draft, edited]

jobs:
labels-for-repertoire-changes:
name: Labels for repertoire changes
outputs:
repertoire-changed: ${{ steps.compare-repertoire.outputs.repertoire-changed }}
pipeline-label: ${{ steps.check-labels.outputs.pipeline-label }}
runs-on: ubuntu-latest
steps:
- name: Checkout merged UnicodeData.txt
uses: actions/checkout@v3
with:
path: merged
sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt
- name: Checkout base UnicodeData.txt
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: base
sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt
- name: Compare repertoire
id: compare-repertoire
run: |
# Look for changes affecting the first two fields of UnicodeData.txt (code point and name).
sed 's/^\([^;]*;[^;]*\);.*$/\1/' merged/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt
sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt
if diff base-repertoire.txt merged-repertoire.txt
then echo "repertoire-changed=false" >> "$GITHUB_OUTPUT"
else echo "repertoire-changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Checkout Python scripts
uses: actions/checkout@v3
with:
sparse-checkout: py/pipeline-workflow
- name: Check pipeline labels
id: check-labels
if: steps.compare-repertoire.outputs.repertoire-changed == 'true'
run: python3 py/pipeline-workflow/compare-repertoire.py
l2-document:
needs: labels-for-repertoire-changes
if: ${{ always() && needs.labels-for-repertoire-changes.outputs.repertoire-changed == 'true' }}
name: Proposal document
runs-on: ubuntu-latest
steps:
- name: Checkout Python scripts
uses: actions/checkout@v3
with:
sparse-checkout: py/pipeline-workflow
- name: Check L2 document
run: |
python3 py/pipeline-workflow/check-l2-document.py
utc-decision:
needs: labels-for-repertoire-changes
if: needs.labels-for-repertoire-changes.outputs.repertoire-changed == 'true' && needs.labels-for-repertoire-changes.outputs.pipeline-label != 'pipeline-recommended-to-UTC'
name: UTC decision
runs-on: ubuntu-latest
steps:
- name: Checkout Python scripts
uses: actions/checkout@v3
with:
sparse-checkout: py/pipeline-workflow
- name: Check UTC decision
run: python3 py/pipeline-workflow/check-utc-decision.py
draft-unless-approved:
needs: labels-for-repertoire-changes
if: needs.labels-for-repertoire-changes.outputs.repertoire-changed == 'true'
name: Draft unless approved
runs-on: ubuntu-latest
steps:
- name: Checkout Python scripts and DerivedAge.txt
uses: actions/checkout@v3
with:
sparse-checkout: |
py/pipeline-workflow
unicodetools/data/ucd/dev/DerivedAge.txt
- name: Check draft status
env:
PIPELINE_LABEL: ${{ needs.labels-for-repertoire-changes.outputs.pipeline-label }}
run: python3 py/pipeline-workflow/check-draft-status.py

4 changes: 3 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2001-2023 Unicode, Inc.
Copyright © 2001-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
Expand Down Expand Up @@ -37,3 +37,5 @@ Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ The tools maintainers use GH issues for issues with the code in this repo.

### Copyright & Licenses

Copyright © 2001-2023 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.
Copyright © 2001-2024 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.

The project is released under [LICENSE](./LICENSE).

Expand Down
6 changes: 4 additions & 2 deletions UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c bruti
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
# copy and unpack to /tmp/data
ADD ./target/cldr-unicodetools.tgz /build/data/
ADD ./target/generated.tgz /build/data/
# move this into place (including unicodetools/unicodetools)
RUN rm -rf /build/data/cldr/.git # unneeded
FROM jetty:9-jre11-alpine-eclipse-temurin AS run
Expand All @@ -24,9 +25,10 @@ ENV BIDIREFHOME /usr/local/share
# copy the bidiref1 bin
ENV BIDIREF1 /usr/local/bin/bidiref1
COPY --from=cbuild /build/source/bidiref1 /usr/local/bin/
RUN mkdir -p /var/lib/jetty/data/unicodetools
RUN mkdir -p /var/lib/jetty/data/unicodetools/Generated
COPY --from=cbuild /build/data/cldr /var/lib/jetty/data/cldr
COPY --from=cbuild /build/data/unicodetools/ /var/lib/jetty/data/unicodetools/unicodetools/
ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools
COPY --from=cbuild /build/data/Generated /var/lib/jetty/data/unicodetools/Generated
ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools -DUNICODETOOLS_GEN_DIR=/var/lib/jetty/data/unicodetools/Generated -Xmx4g
# This is the default PORT. Override by setting PORT.
EXPOSE 8080
2 changes: 1 addition & 1 deletion UnicodeJsps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<!-- icu -->
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j-for-cldr</artifactId>
<artifactId>icu4j</artifactId>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ private int findIdentical(Comparable[][] sortedFlat, int rowIndex, int colIndex)
}
return sortedFlat.length - rowIndex;
}

// to-do: prevent overlap when it would cause information to be lost.
private BitSet breaksSpans = new BitSet();

Expand Down
Loading

0 comments on commit 167840f

Please sign in to comment.