Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into incb-derivation-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Apr 25, 2024
2 parents 53bac40 + 5cadb1c commit 68cbc46
Show file tree
Hide file tree
Showing 250 changed files with 318,068 additions and 174,627 deletions.
23 changes: 8 additions & 15 deletions .github/workflows/build-jsp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,15 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-
# TODO: move this DOWN after JSPs once it works
- name: Generate Unicode data
# TODO: hard coded version
# TODO: symlink of security here?
- name: Regenerate the property cache files
run: >
mkdir -pv $(pwd)/output/Generated/ &&
mvn -s .github/workflows/mvn-settings.xml -B compile exec:java -DskipTests=true
-Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version ${CURRENT_UVERSION} build MakeUnicodeFiles"
-am -pl unicodetools -DCLDR_DIR=${GITHUB_WORKSPACE}/cldr
-DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION}
-DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run unicodetools tests
run: >
mvn -s .github/workflows/mvn-settings.xml -B test -am -pl unicodetools
-DCLDR_DIR=${GITHUB_WORKSPACE}/cldr -T 1C -Dparallel=classes -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=${CURRENT_UVERSION} -DUNICODETOOLS_GEN_DIR=$(pwd)/output/Generated
mkdir -pv $(pwd)/Generated/ &&
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.jsp.RebuildPropertyCache"
-am -pl unicodetools
-DCLDR_DIR=${GITHUB_WORKSPACE}/cldr
-DUNICODETOOLS_REPO_DIR=$(pwd)
-DUNICODETOOLS_GEN_DIR=$(pwd)/Generated &&
tar -cpz -f UnicodeJsps/target/generated.tgz ./Generated
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Package JSPs
Expand Down
4 changes: 3 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2001-2023 Unicode, Inc.
Copyright © 2001-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
Expand Down Expand Up @@ -37,3 +37,5 @@ Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ The tools maintainers use GH issues for issues with the code in this repo.

### Copyright & Licenses

Copyright © 2001-2023 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.
Copyright © 2001-2024 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.

The project is released under [LICENSE](./LICENSE).

Expand Down
6 changes: 4 additions & 2 deletions UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c bruti
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
# copy and unpack to /tmp/data
ADD ./target/cldr-unicodetools.tgz /build/data/
ADD ./target/generated.tgz /build/data/
# move this into place (including unicodetools/unicodetools)
RUN rm -rf /build/data/cldr/.git # unneeded
FROM jetty:9-jre11-alpine-eclipse-temurin AS run
Expand All @@ -24,9 +25,10 @@ ENV BIDIREFHOME /usr/local/share
# copy the bidiref1 bin
ENV BIDIREF1 /usr/local/bin/bidiref1
COPY --from=cbuild /build/source/bidiref1 /usr/local/bin/
RUN mkdir -p /var/lib/jetty/data/unicodetools
RUN mkdir -p /var/lib/jetty/data/unicodetools/Generated
COPY --from=cbuild /build/data/cldr /var/lib/jetty/data/cldr
COPY --from=cbuild /build/data/unicodetools/ /var/lib/jetty/data/unicodetools/unicodetools/
ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools
COPY --from=cbuild /build/data/Generated /var/lib/jetty/data/unicodetools/Generated
ENV JAVA_OPTIONS -DCLDR_DIR=/var/lib/jetty/data/cldr -DUNICODETOOLS_REPO_DIR=/var/lib/jetty/data/unicodetools -DUNICODETOOLS_GEN_DIR=/var/lib/jetty/data/unicodetools/Generated -Xmx4g
# This is the default PORT. Override by setting PORT.
EXPOSE 8080
62 changes: 6 additions & 56 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.unicode.jsp;

import com.ibm.icu.util.ICUException;
import com.ibm.icu.util.VersionInfo;
import java.io.IOException;
import javax.servlet.ServletConfig;
Expand All @@ -9,8 +8,6 @@
import javax.servlet.ServletResponse;
import javax.servlet.annotation.WebServlet;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UcdProperty;
import org.unicode.props.UcdPropertyValues.Age_Values;
import org.unicode.text.utility.Settings;

@WebServlet
Expand All @@ -25,58 +22,9 @@ public static synchronized VersionInfo getOldestLoadedUcd() {
}

private static synchronized void setOldestLoadedUcd(VersionInfo v) {
oldestLoadedUcd = v;
}

private static void loadUcdHistory(VersionInfo earliest) {
System.out.println("Loading back to " + earliest + "...");
Age_Values[] ages = Age_Values.values();
final long overallStart = System.currentTimeMillis();
for (int i = ages.length - 1; i >= 0; --i) {
final var age = ages[i];
if (age == Age_Values.Unassigned) {
continue;
}
final long ucdStart = System.currentTimeMillis();
System.out.println("Loading UCD " + age.getShortName() + "...");
for (boolean unihan : new boolean[] {false, true}) {
final long partStart = System.currentTimeMillis();
final String name = unihan ? "Unihan" : "non-Unihan properties";
final var properties = IndexUnicodeProperties.make(age.getShortName());
for (UcdProperty property : UcdProperty.values()) {
if (property.getShortName().startsWith("cjk") == unihan) {
try {
properties.load(property);
} catch (ICUException e) {
e.printStackTrace();
}
}
}
System.out.println(
"Loaded "
+ name
+ " for "
+ age.getShortName()
+ " ("
+ (System.currentTimeMillis() - partStart)
+ " ms)");
}
System.out.println(
"Loaded UCD "
+ age.getShortName()
+ " in "
+ (System.currentTimeMillis() - ucdStart)
+ " ms");
var version = VersionInfo.getInstance(age.getShortName());
setOldestLoadedUcd(version);
if (version == earliest) {
break;
}
if (v.compareTo(oldestLoadedUcd) < 0) {
oldestLoadedUcd = v;
}
System.out.println(
"Loaded all UCD history in "
+ (System.currentTimeMillis() - overallStart) / 1000
+ " s");
}

@Override
Expand All @@ -94,12 +42,14 @@ public String getServletInfo() {

@Override
public void init(ServletConfig config) throws ServletException {
loadUcdHistory(Settings.LAST_VERSION_INFO);
IndexUnicodeProperties.loadUcdHistory(
Settings.LATEST_VERSION_INFO, UcdLoader::setOldestLoadedUcd, true);
new Thread(
new Runnable() {
@Override
public void run() {
loadUcdHistory(null);
IndexUnicodeProperties.loadUcdHistory(
null, UcdLoader::setOldestLoadedUcd, true);
}
})
.start();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ public ComparisonMatcher(String pattern, Relation comparator) {

@Override
public boolean test(String value) {
int comp = comparator.compare(pattern, value.toString());
int comp = comparator.compare(pattern, value);
switch (relation) {
case less:
return comp < 0;
Expand Down
12 changes: 5 additions & 7 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,11 @@ public class UnicodeUtilities {

HTML_RULES_CONTROLS =
HTML_RULES
+ "[[:di:]-[:cc:]-[:cs:]-[\\u200c-\\u200F]] > ; " // remove, should ignore
// in rendering (but may
// not be in browser)
+ "[[:nchar:][:cn:][:cs:][:co:][:cc:]-[:whitespace:]-[\\u200c-\\u200F]] > \\uFFFD ; "; // should be missing glyph (but may not be in browser)
// + "([[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]) >
// &hex/xml($1) ; "; // [\\u0080-\\U0010FFFF]

// + "\\u0000 > \uFFFD ; "
+ "[\\uD800-\\uDB7F] > '<span class=\"high-surrogate\"><span>'\uFFFD'</span></span>' ; "
+ "[\\uDB80-\\uDBFF] > '<span class=\"private-surrogate\"><span>'\uFFFD'</span></span>' ; "
+ "[\\uDC00-\\uDFFF] > '<span class=\"low-surrogate\"><span>'\uFFFD'</span></span>' ; "
+ "([[:cn:][:co:][:cc:]-[:White_Space:]]) > '<span class=\"control\">'$1'</span>' ; ";
toHTML =
Transliterator.createFromRules(
"any-xml", HTML_RULES_CONTROLS, Transliterator.FORWARD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static XPropertyFactory make() {
}

public UnicodeProperty getProperty(String propertyAlias) {
var versioned = VersionedProperty.forJSPs().set(propertyAlias);
var versioned = VersionedProperty.forJSPs(UcdLoader::getOldestLoadedUcd).set(propertyAlias);
if (versioned != null) {
return versioned.getProperty();
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions UnicodeJsps/src/main/webapp/images/highsurrogate.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions UnicodeJsps/src/main/webapp/images/lowsurrogate.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 68cbc46

Please sign in to comment.