diff --git a/.github/workflows/build-jsp.yml b/.github/workflows/build-jsp.yml index 4ac7a7172..7522567e4 100644 --- a/.github/workflows/build-jsp.yml +++ b/.github/workflows/build-jsp.yml @@ -52,22 +52,15 @@ jobs: restore-keys: | ${{ runner.os }}-maven- # TODO: move this DOWN after JSPs once it works - - name: Generate Unicode data - # TODO: hard coded version - # TODO: symlink of security here? + - name: Regenerate the property cache files run: > - mkdir -pv $(pwd)/output/Generated/ && - mvn -s .github/workflows/mvn-settings.xml -B compile exec:java -DskipTests=true - -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version ${CURRENT_UVERSION} build MakeUnicodeFiles" - -am -pl unicodetools -DCLDR_DIR=${GITHUB_WORKSPACE}/cldr - -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION} - -DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Run unicodetools tests - run: > - mvn -s .github/workflows/mvn-settings.xml -B test -am -pl unicodetools - -DCLDR_DIR=${GITHUB_WORKSPACE}/cldr -T 1C -Dparallel=classes -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION} -DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated + mkdir -pv $(pwd)/Generated/ && + mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.jsp.RebuildPropertyCache" + -am -pl unicodetools + -DCLDR_DIR=${GITHUB_WORKSPACE}/cldr + -DUNICODETOOLS_REPO_DIR=$(pwd) + -DUNICODETOOLS_GEN_DIR=$(pwd)/Generated && + tar -cpz -f UnicodeJsps/target/generated.tgz ./Generated env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Package JSPs diff --git a/UnicodeJsps/Dockerfile b/UnicodeJsps/Dockerfile index 6d68d359c..86e90f655 100644 --- a/UnicodeJsps/Dockerfile +++ b/UnicodeJsps/Dockerfile @@ -9,6 +9,7 @@ RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c bruti RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true) # copy and unpack to /tmp/data ADD ./target/cldr-unicodetools.tgz /build/data/ +ADD ./target/generated.tgz /build/data/ # move this into place (including unicodetools/unicodetools) RUN rm -rf /build/data/cldr/.git # unneeded FROM jetty:9-jre11-alpine-eclipse-temurin AS run @@ -24,9 +25,10 @@ ENV BIDIREFHOME /usr/local/share # copy the bidiref1 bin ENV BIDIREF1 /usr/local/bin/bidiref1 COPY --from=cbuild /build/source/bidiref1 /usr/local/bin/ -RUN mkdir -p /var/lib/jetty/data/unicodetools +RUN mkdir -p /var/lib/jetty/data/unicodetools/Generated COPY --from=cbuild /build/data/cldr /var/lib/jetty/data/cldr COPY --from=cbuild /build/data/unicodetools/ /var/lib/jetty/data/unicodetools/unicodetools/ -ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools +COPY --from=cbuild /build/data/Generated /var/lib/jetty/data/unicodetools/Generated +ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools -DUNICODETOOLS_GEN_DIR=/var/lib/jetty/data/unicodetools/Generated -Xmx4g # This is the default PORT. Override by setting PORT. EXPOSE 8080 diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java index ea6063774..c9002c916 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java @@ -1,6 +1,5 @@ package org.unicode.jsp; -import com.ibm.icu.util.ICUException; import com.ibm.icu.util.VersionInfo; import java.io.IOException; import javax.servlet.ServletConfig; @@ -9,8 +8,6 @@ import javax.servlet.ServletResponse; import javax.servlet.annotation.WebServlet; import org.unicode.props.IndexUnicodeProperties; -import org.unicode.props.UcdProperty; -import org.unicode.props.UcdPropertyValues.Age_Values; import org.unicode.text.utility.Settings; @WebServlet @@ -28,57 +25,6 @@ private static synchronized void setOldestLoadedUcd(VersionInfo v) { oldestLoadedUcd = v; } - private static void loadUcdHistory(VersionInfo earliest) { - System.out.println("Loading back to " + earliest + "..."); - Age_Values[] ages = Age_Values.values(); - final long overallStart = System.currentTimeMillis(); - for (int i = ages.length - 1; i >= 0; --i) { - final var age = ages[i]; - if (age == Age_Values.Unassigned) { - continue; - } - final long ucdStart = System.currentTimeMillis(); - System.out.println("Loading UCD " + age.getShortName() + "..."); - for (boolean unihan : new boolean[] {false, true}) { - final long partStart = System.currentTimeMillis(); - final String name = unihan ? "Unihan" : "non-Unihan properties"; - final var properties = IndexUnicodeProperties.make(age.getShortName()); - for (UcdProperty property : UcdProperty.values()) { - if (property.getShortName().startsWith("cjk") == unihan) { - try { - properties.load(property); - } catch (ICUException e) { - e.printStackTrace(); - } - } - } - System.out.println( - "Loaded " - + name - + " for " - + age.getShortName() - + " (" - + (System.currentTimeMillis() - partStart) - + " ms)"); - } - System.out.println( - "Loaded UCD " - + age.getShortName() - + " in " - + (System.currentTimeMillis() - ucdStart) - + " ms"); - var version = VersionInfo.getInstance(age.getShortName()); - setOldestLoadedUcd(version); - if (version == earliest) { - break; - } - } - System.out.println( - "Loaded all UCD history in " - + (System.currentTimeMillis() - overallStart) / 1000 - + " s"); - } - @Override public void destroy() {} @@ -94,12 +40,14 @@ public String getServletInfo() { @Override public void init(ServletConfig config) throws ServletException { - loadUcdHistory(Settings.LAST_VERSION_INFO); + IndexUnicodeProperties.loadUcdHistory( + Settings.LAST_VERSION_INFO, UcdLoader::setOldestLoadedUcd, true); new Thread( new Runnable() { @Override public void run() { - loadUcdHistory(null); + IndexUnicodeProperties.loadUcdHistory( + null, UcdLoader::setOldestLoadedUcd, true); } }) .start(); diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index 5fad0cd68..2f28ab655 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -64,7 +64,7 @@ public static XPropertyFactory make() { } public UnicodeProperty getProperty(String propertyAlias) { - var versioned = VersionedProperty.forJSPs().set(propertyAlias); + var versioned = VersionedProperty.forJSPs(UcdLoader::getOldestLoadedUcd).set(propertyAlias); if (versioned != null) { return versioned.getProperty(); } diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin deleted file mode 100644 index 0fb19403d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin deleted file mode 100644 index f7bf20fb1..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin deleted file mode 100644 index 8af2cc103..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin deleted file mode 100644 index 4d154dd0c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin deleted file mode 100644 index 37621bfe1..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin deleted file mode 100644 index 6de05489a..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin deleted file mode 100644 index 934e70c08..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin deleted file mode 100644 index 7ecbe9121..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin deleted file mode 100644 index 137ea206c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin deleted file mode 100644 index d0089782a..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin deleted file mode 100644 index 88547522d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin deleted file mode 100644 index 5517f1a4a..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin deleted file mode 100644 index eef1a6a23..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin deleted file mode 100644 index 75420c338..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin deleted file mode 100644 index b15729670..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin deleted file mode 100644 index 348a73927..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin deleted file mode 100644 index d4933d2bc..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin deleted file mode 100644 index 384480e97..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin deleted file mode 100644 index 56a0dac25..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin deleted file mode 100644 index f04e5f87d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin deleted file mode 100644 index e74e1a354..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin deleted file mode 100644 index 35aaa0146..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin deleted file mode 100644 index dec36868e..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin deleted file mode 100644 index 29ef054dd..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin deleted file mode 100644 index 286271390..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin deleted file mode 100644 index 18e77589f..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin deleted file mode 100644 index ae503185d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin deleted file mode 100644 index dab88d216..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin deleted file mode 100644 index 8fabd01c8..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin deleted file mode 100644 index 5e5c2ba7f..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin deleted file mode 100644 index 729ad90d5..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin deleted file mode 100644 index f62383515..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin deleted file mode 100644 index 7d95f9ed4..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin deleted file mode 100644 index 2d0406425..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin deleted file mode 100644 index d84cdd85e..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin deleted file mode 100644 index caf44d10e..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin deleted file mode 100644 index 40ffcc2be..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin deleted file mode 100644 index 17e647008..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin deleted file mode 100644 index 61c514bbe..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin deleted file mode 100644 index 44df46ac0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin deleted file mode 100644 index b81092202..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin deleted file mode 100644 index ce7b5f11d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin deleted file mode 100644 index 834d072a3..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin deleted file mode 100644 index a36d5af43..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin deleted file mode 100644 index aa6da3679..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin deleted file mode 100644 index cd83a7281..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin deleted file mode 100644 index f5cef05d6..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin deleted file mode 100644 index 2f07fb0b0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin deleted file mode 100644 index c56ef07c6..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin deleted file mode 100644 index 8d0aa06e9..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin deleted file mode 100644 index 0cd1cf695..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin deleted file mode 100644 index 8e70e6047..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin deleted file mode 100644 index 0ef025ca5..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin deleted file mode 100644 index aa7f3b76a..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin deleted file mode 100644 index 2838521f6..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin deleted file mode 100644 index 49ef70a06..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin deleted file mode 100644 index 4ce2cb93c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin deleted file mode 100644 index 040ae01c2..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin deleted file mode 100644 index dc65d1bc3..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin deleted file mode 100644 index 429d67aea..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin deleted file mode 100644 index 8b5bbe482..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin deleted file mode 100644 index 73536b47b..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin deleted file mode 100644 index 56eef5bbe..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin deleted file mode 100644 index f1955976b..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin deleted file mode 100644 index 69ceca418..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin deleted file mode 100644 index 6225ed7dd..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin deleted file mode 100644 index fc203843d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin deleted file mode 100644 index 9174330a0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin deleted file mode 100644 index 89b2c843d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin deleted file mode 100644 index 85741e5f3..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin deleted file mode 100644 index 4cb7d0b09..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin deleted file mode 100644 index 935cbeed0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin deleted file mode 100644 index 73066193e..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin deleted file mode 100644 index a251ddc66..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin deleted file mode 100644 index 6c063aae0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin deleted file mode 100644 index 4467e672c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin deleted file mode 100644 index bbb617664..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin deleted file mode 100644 index 1bd685159..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin deleted file mode 100644 index a9ac908b4..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin deleted file mode 100644 index d0e3d2ebf..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin deleted file mode 100644 index a00dd8596..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin deleted file mode 100644 index eeb416a94..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin deleted file mode 100644 index d3e745a71..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin deleted file mode 100644 index bf74bcf58..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin deleted file mode 100644 index 32f046a03..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin deleted file mode 100644 index 3705d0d36..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin deleted file mode 100644 index e3530b6be..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin deleted file mode 100644 index 8d6ae3276..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin deleted file mode 100644 index d40257170..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin deleted file mode 100644 index 6457f9a34..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin deleted file mode 100644 index f9944df5c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin deleted file mode 100644 index e89e68021..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin deleted file mode 100644 index bd8b5eb6c..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin deleted file mode 100644 index 1bbb982c9..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin deleted file mode 100644 index 01fa821d2..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin deleted file mode 100644 index 0b89554b2..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin deleted file mode 100644 index c36a7bbe0..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin deleted file mode 100644 index a7d8a41e9..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin deleted file mode 100644 index 93c0ea22a..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin deleted file mode 100644 index ccf6347f3..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin deleted file mode 100644 index 0cfba1145..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin deleted file mode 100644 index 07a2e12bd..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin deleted file mode 100644 index 88d7316ad..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin deleted file mode 100644 index ae518e560..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin deleted file mode 100644 index 9f869dff3..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin deleted file mode 100644 index 885c1590e..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin deleted file mode 100644 index c7d8de06d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin deleted file mode 100644 index 26d180a6b..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin deleted file mode 100644 index 6f490e083..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin deleted file mode 100644 index c947c5559..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin deleted file mode 100644 index b0befdbe8..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin deleted file mode 100644 index 5a64aba5d..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin deleted file mode 100644 index f001b53d4..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin deleted file mode 100644 index 650a9a6e9..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin and /dev/null differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin deleted file mode 100644 index 0b17ca5de..000000000 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin and /dev/null differ diff --git a/docs/unicodejsps/index.md b/docs/unicodejsps/index.md index faf1fa84f..b45e36e9d 100644 --- a/docs/unicodejsps/index.md +++ b/docs/unicodejsps/index.md @@ -122,6 +122,13 @@ CLDR_REF=$(mvn help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git -f UnicodeJsps/target/cldr-unicodetools.tgz ./cldr/ ./unicodetools/ ``` +- Regenerate the property cache files: + +``` +mvn compile exec:java '-Dexec.mainClass="org.unicode.jsp.RebuildPropertyCache"' -am -pl unicodetools "-DUNICODETOOLS_GEN_DIR=Generated" "-DUNICODETOOLS_REPO_DIR=." "-DCLDR_DIR=" +tar -cpz -f UnicodeJsps/target/generated.tgz ./Generated/ +``` + Now, finally build. - `docker build -t unicode/unicode-jsp:latest UnicodeJsps/` diff --git a/unicodetools/src/main/java/org/unicode/jsp/RebuildPropertyCache.java b/unicodetools/src/main/java/org/unicode/jsp/RebuildPropertyCache.java new file mode 100644 index 000000000..37f3f0fe9 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/jsp/RebuildPropertyCache.java @@ -0,0 +1,54 @@ +package org.unicode.jsp; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.List; +import java.util.Queue; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.text.utility.Settings; + +public class RebuildPropertyCache { + + public static void main(String[] args) throws IOException { + final var binDir = new File(Settings.Output.BIN_DIR); + if (binDir.exists()) { + final Queue directories = new ArrayDeque<>(); + final List directoriesToDelete = new ArrayList<>(); + final List filesToDelete = new ArrayList<>(); + directories.add(binDir); + while (!directories.isEmpty()) { + final File directory = directories.poll(); + for (final var child : directory.listFiles()) { + if (child.isDirectory()) { + directories.add(child); + directoriesToDelete.add(child); + } else { + filesToDelete.add(child); + } + } + } + System.out.println( + "Cleaning " + + filesToDelete.size() + + " existing files in " + + directoriesToDelete.size() + + " existiing directories under " + + Settings.Output.BIN_DIR); + for (final var f : filesToDelete) { + if (!f.delete()) { + System.err.println("Failed to delete " + f); + } + } + for (final var f : directoriesToDelete) { + if (!f.delete()) { + System.err.println("Failed to delete " + f); + } + } + } + + IndexUnicodeProperties.loadUcdHistory(null, null, false); + System.out.println("Rebuilt property cache in " + Settings.Output.BIN_DIR); + } +} diff --git a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java index 2aafc819f..c1aba5f04 100644 --- a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java +++ b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java @@ -35,6 +35,7 @@ import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.unicode.draft.CldrUtility.VariableReplacer; @@ -56,6 +57,7 @@ * @author markdavis */ public class IndexUnicodeProperties extends UnicodeProperty.Factory { + public static final String UNCHANGED_IN_BASE_VERSION = "👉 SEE OTHER VERSION OF UNICODE"; static final String SET_SEPARATOR = "|"; /** Control file caching */ static final boolean GZIP = true; @@ -122,16 +124,43 @@ static DefaultValueType forString(String string) { static Map version2IndexUnicodeProperties = new ConcurrentHashMap<>(); - private IndexUnicodeProperties(VersionInfo ucdVersion2) { + private IndexUnicodeProperties(VersionInfo ucdVersion2, IndexUnicodeProperties base) { ucdVersion = ucdVersion2; oldVersion = ucdVersion2.compareTo(GenerateEnums.ENUM_VERSION_INFO) < 0; + baseVersionProperties = base; + } + + // TODO(egg): Too much stuff puts its hands in the raw maps to be able to do this by default. + // Remove these static warts once https://github.com/unicode-org/unicodetools/issues/716 is + // fixed. + private static boolean incrementalProperties = false; + + public static synchronized void useIncrementalProperties() { + if (!incrementalProperties && !version2IndexUnicodeProperties.isEmpty()) { + throw new IllegalStateException( + "Cannot switch to incremental storage after making IUPs"); + } + incrementalProperties = true; } public static final synchronized IndexUnicodeProperties make(VersionInfo ucdVersion) { IndexUnicodeProperties newItem = version2IndexUnicodeProperties.get(ucdVersion); if (newItem == null) { + Age_Values nextAge = Age_Values.Unassigned; + for (int i = 0; i < Age_Values.values().length - 1; ++i) { + final var version = VersionInfo.getInstance(Age_Values.values()[i].getShortName()); + if (version.equals(ucdVersion)) { + nextAge = Age_Values.values()[i + 1]; + } + } + IndexUnicodeProperties base = + !incrementalProperties || ucdVersion == Settings.LAST_VERSION_INFO + ? null + : nextAge == Age_Values.Unassigned + ? make(Settings.LAST_VERSION_INFO) + : make(nextAge); version2IndexUnicodeProperties.put( - ucdVersion, newItem = new IndexUnicodeProperties(ucdVersion)); + ucdVersion, newItem = new IndexUnicodeProperties(ucdVersion, base)); } return newItem; } @@ -151,6 +180,7 @@ public static final IndexUnicodeProperties make() { final VersionInfo ucdVersion; final boolean oldVersion; + final IndexUnicodeProperties baseVersionProperties; final EnumMap> property2UnicodeMap = new EnumMap>(UcdProperty.class); private final Set fileNames = new TreeSet(); @@ -420,6 +450,10 @@ public synchronized boolean isLoaded(UcdProperty prop) { } public synchronized UnicodeMap load(UcdProperty prop2) { + return load(prop2, false); + } + + public synchronized UnicodeMap load(UcdProperty prop2, boolean expectCacheHit) { String fullFilename = "?"; try { if (prop2 == CHECK_PROPERTY) { @@ -443,7 +477,12 @@ public synchronized UnicodeMap load(UcdProperty prop2) { } } - PropertyParsingInfo.parseSourceFile(this, fullFilename, fileName); + if (expectCacheHit) { + System.err.println("Failed to find cached " + prop2 + ", parsing from source"); + } + + PropertyParsingInfo.parseSourceFile( + this, baseVersionProperties, fullFilename, fileName); return property2UnicodeMap.get(prop2); } catch (Exception e) { throw new ICUException(prop2.toString() + "( from: " + fullFilename + ")", e); @@ -645,6 +684,10 @@ class IndexUnicodeProperty extends UnicodeProperty.BaseProperty { private final UcdProperty prop; private final Map stringToNamedEnum; private final Set enumValueNames; + // The set of code points for which the property value differs from that in + // baseVersionProperties. + // TODO(egg): Really, for which it may differ, but does not in the default case. + private UnicodeSet diffSet; IndexUnicodeProperty(UcdProperty item) { this.prop = item; @@ -723,13 +766,47 @@ protected UnicodeMap _getRawUnicodeMap() { return load(prop); } + private UnicodeSet getDiffSet() { + if (diffSet == null) { + diffSet = + _getRawUnicodeMap().keySet(UNCHANGED_IN_BASE_VERSION).complement().freeze(); + } + return diffSet; + } + @Override protected String _getValue(int codepoint) { final String result = _getRawUnicodeMap().get(codepoint); return resolveValue(result, codepoint); } + @Override + public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { + if (baseVersionProperties == null) { + return super.getSet(matcher, result); + } + final long start = System.currentTimeMillis(); + final UnicodeSet baseSet = + baseVersionProperties.getProperty(prop).getSet(matcher, result); + final UnicodeSet matchingInThisVersion = + super.getSet(matcher, null).retainAll(getDiffSet()); + result = + baseSet.addAll(matchingInThisVersion) + .removeAll( + getDiffSet().cloneAsThawed().removeAll(matchingInThisVersion)); + final long stop = System.currentTimeMillis(); + final long Δt_in_ms = stop - start; + if (Δt_in_ms > 100) { + System.out.println( + "Long getSet for U" + ucdVersion + ":" + prop + " (" + Δt_in_ms + " ms)"); + } + return result; + } + private String resolveValue(String rawValue, int codepoint) { + if (UNCHANGED_IN_BASE_VERSION.equals(rawValue)) { + return baseVersionProperties.getProperty(prop).getValue(codepoint); + } if (DefaultValueType.forString(rawValue) == DefaultValueType.CODE_POINT) { return Character.toString(codepoint); } else if (prop == UcdProperty.Name && rawValue != null && rawValue.endsWith("#")) { @@ -790,4 +867,65 @@ public UnicodeProperty getProperty(UcdProperty ucdProperty) { public UnicodeSet loadBinary(UcdProperty ucdProp) { return load(ucdProp).getSet(Binary.Yes.toString()); } + + public static void loadUcdHistory( + VersionInfo earliest, Consumer notifyLoaded, boolean expectCacheHit) { + useIncrementalProperties(); + System.out.println( + "Loading back to " + (earliest == null ? "the dawn of time" : earliest) + "..."); + Age_Values[] ages = Age_Values.values(); + final long overallStart = System.currentTimeMillis(); + for (int i = ages.length - 2; i >= 0; --i) { + // Load in the order last (released, the base), latest (dev), penultimate, + // antepenultimate, etc. + final var age = + ages[ + i == ages.length - 2 + ? ages.length - 3 + : i == ages.length - 3 ? ages.length - 2 : i]; + final long ucdStart = System.currentTimeMillis(); + System.out.println("Loading UCD " + age.getShortName() + "..."); + for (boolean unihan : new boolean[] {false, true}) { + final long partStart = System.currentTimeMillis(); + final String name = unihan ? "Unihan" : "non-Unihan properties"; + final var properties = IndexUnicodeProperties.make(age.getShortName()); + for (UcdProperty property : UcdProperty.values()) { + if (property.getShortName().startsWith("cjk") == unihan) { + try { + properties.load(property, expectCacheHit); + } catch (ICUException e) { + e.printStackTrace(); + } + } + } + System.out.println( + "Loaded " + + name + + " for " + + age.getShortName() + + " (" + + (System.currentTimeMillis() - partStart) + + " ms)"); + } + System.out.println( + "Loaded UCD " + + age.getShortName() + + " in " + + (System.currentTimeMillis() - ucdStart) + + " ms"); + var version = VersionInfo.getInstance(age.getShortName()); + if (notifyLoaded != null) { + notifyLoaded.accept(version); + } + if (version == earliest) { + break; + } + } + System.out.println( + "Loaded " + + (earliest == null ? "all UCD history" : "UCD history back to " + earliest) + + " in " + + (System.currentTimeMillis() - overallStart) / 1000 + + " s"); + } } diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java index 0e8036638..83585c1ad 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java @@ -15,6 +15,7 @@ import java.util.Locale; import java.util.Map; import java.util.MissingResourceException; +import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -209,16 +210,21 @@ public boolean useOldFile(VersionInfo ucdVersionRequested) { public static final Normalizer2 NFD = Normalizer2.getNFDInstance(); public static final Normalizer2 NFC = Normalizer2.getNFCInstance(); - public void put(UnicodeMap data, UcdLineParser.IntRange intRange, String string) { - put(data, intRange, string, null); + public void put( + UnicodeMap data, + UcdLineParser.IntRange intRange, + String string, + UnicodeProperty nextVersion) { + put(data, intRange, string, null, nextVersion); } public void put( UnicodeMap data, UcdLineParser.IntRange intRange, String string, - Merge merger) { - put(data, null, intRange, string, merger, false); + Merge merger, + UnicodeProperty nextVersion) { + put(data, null, intRange, string, merger, false, nextVersion); } public void put( @@ -227,7 +233,9 @@ public void put( UcdLineParser.IntRange intRange, String value, Merge merger, - boolean hackHangul) { + boolean hackHangul, + UnicodeProperty nextVersion) { + // MEOW if (value != null && value.isEmpty() && property != UcdProperty.NFKC_Casefold @@ -242,6 +250,11 @@ public void put( PropertyUtilities.putNew(data, intRange.string, value, merger); } else { for (int codepoint = intRange.start; codepoint <= intRange.end; ++codepoint) { + String nextValue = null; + if (nextVersion != null) { + nextValue = nextVersion.getValue(codepoint); + } + String insertedValue; try { if (hackHangul) { // Use ICU for Hangul decomposition. @@ -251,14 +264,17 @@ public void put( NFC.normalize(fullDecomp.substring(0, 2)) + fullDecomp.substring(2); } - PropertyUtilities.putNew(data, missingSet, codepoint, fullDecomp, merger); + insertedValue = fullDecomp; } else if (value == CONSTRUCTED_NAME) { // Use ICU for Hangul Name construction, constant. - PropertyUtilities.putNew( - data, missingSet, codepoint, UCharacter.getName(codepoint), merger); + insertedValue = UCharacter.getName(codepoint); } else { - PropertyUtilities.putNew(data, missingSet, codepoint, value, merger); + insertedValue = value; } + if (nextVersion != null && Objects.equals(insertedValue, nextValue)) { + insertedValue = IndexUnicodeProperties.UNCHANGED_IN_BASE_VERSION; + } + PropertyUtilities.putNew(data, missingSet, codepoint, insertedValue, merger); } catch (final Exception e) { String msg = String.format( @@ -446,6 +462,7 @@ public void setMultiValued(String multivalued2) { static void parseSourceFile( IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, final String fullFilename, final String fileName) { FileType fileType = file2Type.get(fileName); @@ -494,26 +511,37 @@ static void parseSourceFile( parseCJKRadicalsFile( parser.withRange(false), propInfo, - indexUnicodeProperties.property2UnicodeMap.get(propInfo.property)); + indexUnicodeProperties.property2UnicodeMap.get(propInfo.property), + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); break; case NamedSequences: parseNamedSequencesFile( - parser.withRange(false), indexUnicodeProperties, propInfoSet); + parser.withRange(false), + indexUnicodeProperties, + nextProperties, + propInfoSet); break; case PropertyValue: - parsePropertyValueFile(parser.withMissing(true), indexUnicodeProperties); + parsePropertyValueFile( + parser.withMissing(true), indexUnicodeProperties, nextProperties); break; case Confusables: - parseConfusablesFile(parser, indexUnicodeProperties, propInfoSet); + parseConfusablesFile( + parser, indexUnicodeProperties, nextProperties, propInfoSet); break; case StandardizedVariants: - parseStandardizedVariantsFile(parser, indexUnicodeProperties, propInfoSet); + parseStandardizedVariantsFile( + parser, indexUnicodeProperties, nextProperties, propInfoSet); break; case NameAliases: - parseNameAliasesFile(parser, indexUnicodeProperties, propInfoSet); + parseNameAliasesFile( + parser, indexUnicodeProperties, nextProperties, propInfoSet); break; case HackField: - parseUnicodeDataFile(parser, indexUnicodeProperties, propInfoSet); + parseUnicodeDataFile( + parser, indexUnicodeProperties, nextProperties, propInfoSet); break; case Field: if (propInfoSet.size() == 1 @@ -523,10 +551,16 @@ static void parseSourceFile( parseSimpleFieldFile( parser.withMissing(true), propInfo, - indexUnicodeProperties.property2UnicodeMap.get(propInfo.property)); + indexUnicodeProperties.property2UnicodeMap.get(propInfo.property), + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } else { parseFieldFile( - parser.withMissing(true), indexUnicodeProperties, propInfoSet); + parser.withMissing(true), + indexUnicodeProperties, + nextProperties, + propInfoSet); } break; case List: @@ -536,7 +570,10 @@ static void parseSourceFile( parseListFile( parser, propInfo, - indexUnicodeProperties.property2UnicodeMap.get(propInfo.property)); + indexUnicodeProperties.property2UnicodeMap.get(propInfo.property), + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } else { throw new UnicodePropertyException( "List files must have only one property, and must be Boolean"); @@ -575,6 +612,7 @@ static void parseSourceFile( // propInfo.defaultValue = ""; // } switch (propInfo.defaultValueType) { + // TODO(egg): Consider also storing only the changed values here. case Script: case Simple_Lowercase_Mapping: case Simple_Titlecase_Mapping: @@ -617,7 +655,10 @@ static void parseSourceFile( } private static void parseCJKRadicalsFile( - UcdLineParser parser, PropertyParsingInfo propInfo, UnicodeMap data) { + UcdLineParser parser, + PropertyParsingInfo propInfo, + UnicodeMap data, + UnicodeProperty nextVersion) { // Note: CJKRadicals.txt cannot be completely represented via a UnicodeMap. // See the comments in RadicalStroke.getCJKRadicals(). /* @@ -632,29 +673,38 @@ private static void parseCJKRadicalsFile( String[] parts = line.getParts(); if (!parts[1].isEmpty()) { intRange.set(parts[1]); - propInfo.put(data, intRange, parts[0]); + propInfo.put(data, intRange, parts[0], nextVersion); } intRange.set(parts[2]); - propInfo.put(data, intRange, parts[0]); + propInfo.put(data, intRange, parts[0], nextVersion); } } private static void parseNamedSequencesFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { for (UcdLineParser.UcdLine line : parser) { line.getRange().set(line.getParts()[1]); for (final PropertyParsingInfo propInfo : propInfoSet) { final UnicodeMap data = indexUnicodeProperties.property2UnicodeMap.get(propInfo.property); - propInfo.put(data, line.getRange(), line.getParts()[0]); + propInfo.put( + data, + line.getRange(), + line.getParts()[0], + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } } } private static void parsePropertyValueFile( - UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties) { + UcdLineParser parser, + IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties) { for (UcdLineParser.UcdLine line : parser) { String propName = line.getParts()[1]; UcdProperty item = UcdProperty.forString(propName); @@ -726,7 +776,13 @@ private static void parsePropertyValueFile( } catch (Exception e) { throw new IllegalArgumentException(line.getOriginalLine(), e); } - propInfo.put(data, line.getRange(), value); + propInfo.put( + data, + line.getRange(), + value, + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } else { setPropDefault( propInfo.property, @@ -740,6 +796,7 @@ private static void parsePropertyValueFile( private static void parseConfusablesFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { for (UcdLineParser.UcdLine line : parser) { UcdLineParser.IntRange intRange = line.getRange(); @@ -759,14 +816,30 @@ private static void parseConfusablesFile( } final UnicodeMap data = indexUnicodeProperties.property2UnicodeMap.get(propInfo.property); - propInfo.put(data, intRange, parts[1]); + propInfo.put( + data, + intRange, + parts[1], + nextProperties == null ? null : nextProperties.getProperty(propInfo.property)); intRange.set(parts[1]); if (intRange.string == null) { if (!data.containsKey(intRange.start)) { - propInfo.put(data, intRange, parts[1]); + propInfo.put( + data, + intRange, + parts[1], + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } } else if (!intRange.string.isEmpty() && !data.containsKey(intRange.string)) { - propInfo.put(data, intRange, parts[1]); + propInfo.put( + data, + intRange, + parts[1], + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } } } @@ -774,6 +847,7 @@ private static void parseConfusablesFile( private static void parseStandardizedVariantsFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { for (UcdLineParser.UcdLine line : parser) { String[] parts = line.getParts(); @@ -783,6 +857,7 @@ private static void parseStandardizedVariantsFile( parseFields( line, indexUnicodeProperties, + nextProperties, propInfoSet, IndexUnicodeProperties.ALPHABETIC_JOINER, false); @@ -792,11 +867,13 @@ private static void parseStandardizedVariantsFile( private static void parseNameAliasesFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { for (UcdLineParser.UcdLine line : parser) { parseFields( line, indexUnicodeProperties, + nextProperties, propInfoSet, IndexUnicodeProperties.ALPHABETIC_JOINER, false); @@ -806,6 +883,7 @@ private static void parseNameAliasesFile( private static void parseUnicodeDataFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { int lastCodepoint = 0; for (UcdLineParser.UcdLine line : parser) { @@ -852,22 +930,25 @@ private static void parseUnicodeDataFile( // Decomposition_Mapping: Remove the decomposition type. parts[5] = DECOMP_REMOVE.matcher(parts[5]).replaceAll("").trim(); } - parseFields(line, indexUnicodeProperties, propInfoSet, null, hackHangul); + parseFields( + line, indexUnicodeProperties, nextProperties, propInfoSet, null, hackHangul); } } private static void parseFieldFile( UcdLineParser parser, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet) { for (UcdLineParser.UcdLine line : parser) { - parseFields(line, indexUnicodeProperties, propInfoSet, null, false); + parseFields(line, indexUnicodeProperties, nextProperties, propInfoSet, null, false); } } private static void parseFields( UcdLineParser.UcdLine line, IndexUnicodeProperties indexUnicodeProperties, + IndexUnicodeProperties nextProperties, Set propInfoSet, Merge merger, boolean hackHangul) { @@ -917,7 +998,10 @@ private static void parseFields( line.getRange(), value, merger, - hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping); + hackHangul && propInfo.property == UcdProperty.Decomposition_Mapping, + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property)); } } else { for (final PropertyParsingInfo propInfo : propInfoSet) { @@ -933,7 +1017,10 @@ private static void parseFields( } private static void parseSimpleFieldFile( - UcdLineParser parser, PropertyParsingInfo propInfo, UnicodeMap data) { + UcdLineParser parser, + PropertyParsingInfo propInfo, + UnicodeMap data, + UnicodeProperty nextVersion) { for (UcdLineParser.UcdLine line : parser) { if (line.getType() == UcdLineParser.UcdLine.Contents.DATA) { if (propInfo.getDefaultValue() == null) { @@ -963,7 +1050,8 @@ private static void parseSimpleFieldFile( line.getRange(), line.getParts()[1], null, - false); + false, + nextVersion); } else { setPropDefault( propInfo.property, @@ -975,9 +1063,12 @@ private static void parseSimpleFieldFile( } private static void parseListFile( - UcdLineParser parser, PropertyParsingInfo propInfo, UnicodeMap data) { + UcdLineParser parser, + PropertyParsingInfo propInfo, + UnicodeMap data, + UnicodeProperty nextVersion) { for (UcdLineParser.UcdLine line : parser) { - propInfo.put(data, line.getRange(), "Yes"); + propInfo.put(data, line.getRange(), "Yes", nextVersion); } } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index 1615491b4..bf49c77cf 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -1221,7 +1221,11 @@ public int getMaxWidth(boolean isShort) { if (compositeVersion >= 0x040000) { // Word_Break - auxiliary/WordBreakProperty.txt final UnicodeMap unicodeMap = new UnicodeMap(); - unicodeMap.setErrorOnReset(true); // disallow multiple values for code point + // Disallow multiple values for code point, but only if we are using this class to + // derive the current properties; the derivation is incorrect for earlier versions + // anyway. + unicodeMap.setErrorOnReset( + compositeVersion == UCD.makeLatestVersion().getCompositeVersion()); final UnicodeProperty cat = getProperty("General_Category"); final UnicodeProperty script = getProperty("Script"); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index bf6b688e3..f3e3a4fda 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -30,6 +30,7 @@ import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues.Bidi_Class_Values; +import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.ChainException; import org.unicode.text.utility.Settings; import org.unicode.text.utility.UTF32; @@ -520,22 +521,23 @@ static class HanException { byte numericType; } - private void populateHanExceptions(UnicodeMap numeric) { - for (UnicodeMap.EntryRange range : numeric.entryRanges()) { - if (range.value == null || range.value.equals("NaN")) { + private void populateHanExceptions(UnicodeProperty numeric) { + for (String value : numeric.getAvailableValues()) { + if (value == null || value.equals("NaN")) { continue; } - String propertyValue = Utility.replace(range.value, ",", ""); + String propertyValue = Utility.replace(value, ",", ""); final int hack = propertyValue.indexOf(' '); if (hack >= 0) { Utility.fixDot(); if (SHOW_LOADING) { - System.out.println("BAD NUMBER: " + range); + System.out.println("BAD NUMBER: " + value); } propertyValue = propertyValue.substring(0, hack); } - for (int code = range.codepoint; code <= range.codepointEnd; ++code) { + for (String s : numeric.getSet(value)) { + final int code = s.codePointAt(0); // Unicode 15.1: // This code had these two exceptions, but now U+4EAC actually has value // 10000000000000000 @@ -562,9 +564,9 @@ private void populateHanExceptions() { IndexUnicodeProperties iup = IndexUnicodeProperties.make(versionInfo); hanExceptions = new IntMap(); try { - populateHanExceptions(iup.load(UcdProperty.kPrimaryNumeric)); - populateHanExceptions(iup.load(UcdProperty.kAccountingNumeric)); - populateHanExceptions(iup.load(UcdProperty.kOtherNumeric)); + populateHanExceptions(iup.getProperty(UcdProperty.kPrimaryNumeric)); + populateHanExceptions(iup.getProperty(UcdProperty.kAccountingNumeric)); + populateHanExceptions(iup.getProperty(UcdProperty.kOtherNumeric)); } catch (final Exception e) { throw new ChainException("Han File Processing Exception", null, e); } finally { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java index bc5375ee6..25164d073 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java @@ -2,10 +2,12 @@ import com.ibm.icu.text.SymbolTable; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; import java.text.ParsePosition; import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.function.Supplier; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.Factory; @@ -18,6 +20,7 @@ public class VersionedProperty { private UnicodeProperty.Factory propSource; private UnicodeProperty property; private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher(); + private Supplier oldestLoadedUcd; private boolean throwOnUnknownProperty; // The version used in the absence of a version prefix. @@ -43,11 +46,12 @@ public static VersionedProperty forInvariantTesting() { return result; } - public static VersionedProperty forJSPs() { + public static VersionedProperty forJSPs(Supplier oldestLoadedUcd) { var result = new VersionedProperty(); result.throwOnUnknownProperty = false; result.defaultVersion = Settings.lastVersion; result.versionAliases.put("dev", Settings.latestVersion); + result.oldestLoadedUcd = oldestLoadedUcd; for (String latest = Settings.latestVersion; ; latest = latest.substring(0, latest.length() - 2)) { @@ -101,6 +105,18 @@ public VersionedProperty set(String xPropertyName) { version = defaultVersion; } propertyName = xPropertyName; + final VersionInfo versionInfo = VersionInfo.getInstance(version); + if (oldestLoadedUcd != null) { + final VersionInfo oldestLoaded = oldestLoadedUcd.get(); + if (versionInfo.compareTo(oldestLoaded) < 0) { + throw new IllegalStateException( + "Requested version " + + versionInfo + + " is older than the oldest loaded version " + + oldestLoaded + + ". Try again later."); + } + } propSource = getIndexedProperties(version); property = propSource.getProperty(xPropertyName); if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName)) diff --git a/unicodetools/src/main/java/org/unicode/tools/CopyPropsToUnicodeJsp.java b/unicodetools/src/main/java/org/unicode/tools/CopyPropsToUnicodeJsp.java deleted file mode 100644 index 9aaa0d900..000000000 --- a/unicodetools/src/main/java/org/unicode/tools/CopyPropsToUnicodeJsp.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.unicode.tools; - -import com.google.common.collect.ImmutableSet; -import com.ibm.icu.util.VersionInfo; -import java.io.File; -import java.io.IOException; -import java.nio.file.CopyOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; -import java.util.Collections; -import java.util.EnumSet; -import java.util.Set; -import org.unicode.props.IndexUnicodeProperties; -import org.unicode.props.PropertyStatus; -import org.unicode.props.UcdProperty; -import org.unicode.text.utility.Settings; - -public class CopyPropsToUnicodeJsp { - public static void main(String[] args) throws IOException { - IndexUnicodeProperties latest = IndexUnicodeProperties.make(); - VersionInfo ucdVersion = latest.getUcdVersion(); - System.out.println("Copying Props for " + ucdVersion + " into JSP"); - String fromDir = Settings.Output.BIN_DIR + ucdVersion + "/"; - String toDir = - Settings.UnicodeTools.UNICODEJSPS_DIR + "src/main/resources/org/unicode/jsp/props/"; - // overwrite existing file, if exists - CopyOption[] options = - new CopyOption[] { - StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.COPY_ATTRIBUTES - }; - Set kExceptions = - ImmutableSet.of( - "kAccountingNumeric.bin", - "kOtherNumeric.bin", - "kPrimaryNumeric.bin", - "kSimplifiedVariant.bin", - "kTraditionalVariant.bin"); - - for (String name : new File(fromDir).list()) { - if (!name.endsWith(".bin")) { - System.out.println("Skipping1 " + name); - continue; - } - if (name.startsWith("k")) { - if (!kExceptions.contains(name)) { - System.out.println("Skipping2 " + name); - continue; - } else { - System.out.println("Retaining2 " + name); - } - } - String pname = name.substring(0, name.length() - 4); - UcdProperty prop = UcdProperty.forString(pname); - EnumSet status = PropertyStatus.getPropertyStatusSet(prop); - - if (!Collections.disjoint(status, ListProps.SKIP_JSP_STATUS)) { - System.out.println("Skipping3 " + prop); - continue; - } - Path FROM = Paths.get(fromDir + name); - Path TO = Paths.get(toDir + name); - Files.copy(FROM, TO, options); - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/tools/UpdateJspFiles.java b/unicodetools/src/main/java/org/unicode/tools/UpdateJspFiles.java index 352aa4616..a0a5bfc59 100644 --- a/unicodetools/src/main/java/org/unicode/tools/UpdateJspFiles.java +++ b/unicodetools/src/main/java/org/unicode/tools/UpdateJspFiles.java @@ -50,10 +50,6 @@ public static void main(String args[]) throws IOException { System.out.println("Sublaunching ListProps.."); ListProps.main(args); - // Sublaunch CopyPropsToUnicodeJsp - System.out.println("Sublaunching CopyPropsToUnicodeJsp"); - CopyPropsToUnicodeJsp.main(args); - System.out.println( "DONE! Now go run 'mvn org.eclipse.jetty:jetty-maven-plugin:run' to fire up the JSP"); }