Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into 170-C15
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Oct 2, 2023
2 parents 0d113a1 + 4ddfe22 commit c845fb4
Show file tree
Hide file tree
Showing 577 changed files with 3,899,908 additions and 604,360 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-jsp.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: Build JSP

env:
CURRENT_UVERSION: 15.1.0
PREVIOUS_UVERSION: 15.0.0 # not used at present
CURRENT_UVERSION: 16.0.0
PREVIOUS_UVERSION: 15.1.0 # not used at present

on:
push:
Expand Down
25 changes: 4 additions & 21 deletions .github/workflows/cli-build-instructions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ on:
- '*'

env:
CURRENT_UVERSION: 15.1.0
PREVIOUS_UVERSION: 15.0.0
CURRENT_UVERSION: 16.0.0
PREVIOUS_UVERSION: 15.1.0

jobs:

Expand Down Expand Up @@ -207,34 +207,17 @@ jobs:
# run GenerateEnums
mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.props.GenerateEnums" -Dexec.args="" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
# apply formatting because generated file will not pass Java formatter
mvn spotless:apply
# TODO(#100) Get rid of need for fake version 13.1 of Unicode enum val
# Fail if we haven't committed changes from Generate Enums, but make exception for fake Unicode version hack (#100)
mvn spotless:apply '-DspotlessFiles=.*[\\/]org[\\/]unicode[\\/]props[\\/]UcdProperty(Values)?.java'
# Fail if we haven't committed changes from Generate Enums.
echo "Check if GenerateEnums output has been committed to repo"
echo "git diff --compact-summary"
if git diff --compact-summary --exit-code; then
echo "Success: Any output from GenerateEnums has already been committed."
exit 0
else
echo "git diff --name-status | awk '{print $2;}'"
names=`git diff --name-status | awk '{print $2;}'`
if [[ "$names" == "unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java" ]]; then
gitdiffstat=`git diff --stat | tail -n 1`
if [[ "$gitdiffstat" = " 1 file changed, 1 insertion(+), 2 deletions(-)" ]]; then
git diff | grep 'V13_1("13.1")'
if [ $? -eq 0 ]; then
echo "Inferred: Only non-committed change after GenerateEnums is temporary V13_1 hack (issue #100)"
echo "Check passed."
exit 0
fi
fi
fi
echo "Failure: GenerateEnums has changes that need to be committed."
exit 1
fi
# TODO(#100): Note: when running locally on a clean checkout, this command
# leaves a change in UcdPropertyValues.java by not preserving the comment
# about the fake Unicode version 13.1.
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/push-jsp-on-tag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
- name: Get the version
id: get_tag_name
run: echo ::set-output name=GIT_TAG_NAME::${GITHUB_REF/refs\/tags\//}
- uses: RafikFarhad/push-to-gcr-github-action@v3
- uses: RafikFarhad/push-to-gcr-github-action@241707854fb71f655ec4e2a98bb16505f218bcc2
with:
gcloud_service_key: ${{ secrets.GCLOUD_SERVICE_KEY }}
registry: us.gcr.io
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ rules.mk

.DS_Store
/output
/cldr
4 changes: 2 additions & 2 deletions .project
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
</natures>
<filteredResources>
<filter>
<id>1625608051965</id>
<id>1683027586342</id>
<name></name>
<type>30</type>
<matcher>
<id>org.eclipse.core.resources.regexFilterMatcher</id>
<arguments>node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
<arguments>node_modules|\.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
</matcher>
</filter>
</filteredResources>
Expand Down
71 changes: 32 additions & 39 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,46 +1,39 @@
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
UNICODE LICENSE V3

See Terms of Use for definitions of Unicode Inc.'s
Data Files and Software.
COPYRIGHT AND PERMISSION NOTICE

NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
Copyright © 2001-2023 Unicode, Inc.

COPYRIGHT AND PERMISSION NOTICE
NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ maintainers for the production and vetting of data files for the UCD and
other Unicode specs such as UCA, emoji, idna, and security.

Do not use the Unicode data files in this repo for production.
Do use the data files posted publicly on unicode.org.
Do use the data files [posted publicly on unicode.org](https://www.unicode.org/releases/)

There is some documentation for these tools in this repo, in the [docs folder](./docs/).

Expand All @@ -21,12 +21,10 @@ use the Unicode Contact Form: https://www.unicode.org/reporting.html
Do not use the GitHub Issues feature in this repo for those.
The tools maintainers use GH issues for issues with the code in this repo.

### Licenses
### Copyright & Licenses

- Data and software is governed by the [Unicode Terms of Use](https://www.unicode.org/copyright.html)
a copy of which is included as [LICENSE](./LICENSE).
Copyright © 2001-2023 Unicode, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.

### Copyright
The project is released under [LICENSE](./LICENSE).

© 1991 and later: Unicode, Inc. and others.
License & terms of use: <https://www.unicode.org/copyright.html>
A CLA is required to contribute to this project - please refer to the [CONTRIBUTING.md](https://github.com/unicode-org/.github/blob/main/.github/CONTRIBUTING.md) file (or start a Pull Request) for more information.
2 changes: 1 addition & 1 deletion UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
ADD ./target/cldr-unicodetools.tgz /build/data/
# move this into place (including unicodetools/unicodetools)
RUN rm -rf /build/data/cldr/.git # unneeded
FROM jetty:9-jre11-slim AS run
FROM jetty:9-jre11-alpine-eclipse-temurin AS run
ADD port-entrypoint.sh /port-entrypoint.sh
ADD ./jetty.d/ROOT /var/lib/jetty/webapps/ROOT/
ENTRYPOINT [ "/port-entrypoint.sh" ]
Expand Down
14 changes: 13 additions & 1 deletion UnicodeJsps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,19 @@
<artifactId>unicodetools-testutils</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<dependency>
<groupId>org.owasp.encoder</groupId>
<artifactId>encoder</artifactId>
<version>1.2.3</version>
</dependency>

<dependency>
<groupId>org.owasp.encoder</groupId>
<artifactId>encoder-jsp</artifactId>
<version>1.2.3</version>
</dependency>
</dependencies>
<build>
<finalName>${project.artifactId}</finalName>
<plugins>
Expand Down
4 changes: 3 additions & 1 deletion UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimap;
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.VersionInfo;
import java.io.DataInputStream;
Expand Down Expand Up @@ -43,7 +44,8 @@ public class CachedProps {
final BiMultimap<String, String> nameToAliases = new BiMultimap<String, String>(null, null);
final Map<String, BiMultimap<String, String>> nameToValueToAliases = new LinkedHashMap();

static CachedProps CACHED_PROPS = getInstance(VersionInfo.getInstance(14));
static VersionInfo jspVersionInfo = UCharacter.getUnicodeVersion();
static CachedProps CACHED_PROPS = getInstance(jspVersionInfo);

static UnicodeProperty NAMES = CachedProps.CACHED_PROPS.getProperty("Name");

Expand Down
3 changes: 2 additions & 1 deletion UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeJsp.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.unicode.idna.Idna2008;
import org.unicode.idna.Uts46;
import org.unicode.jsp.UnicodeUtilities.CodePointShower;
import org.unicode.text.utility.Settings;

public class UnicodeJsp {

Expand Down Expand Up @@ -420,7 +421,7 @@ public static String getVersions() {
: "<p style='border: 1pt solid red;'>Properties use ICU for Unicode V"
+ UCharacter.getUnicodeVersion().getVersionString(2, 2)
+ "; the beta properties support Unicode V"
+ CachedProps.CACHED_PROPS.version.getVersionString(2, 2)
+ VersionInfo.getInstance(Settings.latestVersion).getVersionString(2, 2)
+ "&beta;. "
+ "For more information, see <a target='help' href='https://unicode-org.github.io/unicodetools/help/changes'>Unicode Utilities Beta</a>.</p>";

Expand Down
38 changes: 24 additions & 14 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -637,16 +637,7 @@ private void showString(final String string, String separator, Appendable out)
if (UnicodeUtilities.RTL.containsSome(literal)) {
literal = '\u200E' + literal + '\u200E';
}
String name = UnicodeUtilities.getName(string, separator, false);
if (name == null || name.length() == 0) {
name = "<i>no name</i>";
} else {
boolean special = name.indexOf('<') >= 0;
name = UnicodeUtilities.toHTML.transliterate(name);
if (special) {
name = "<i>" + name + "</i>";
}
}
String name = UnicodeUtilities.getName(string, separator, false, false);
literal = UnicodeSetUtilities.addEmojiVariation(literal);
if (doTable) {
out.append(
Expand Down Expand Up @@ -801,7 +792,8 @@ String getPropString(List<UnicodeProperty> props, String codePoints, boolean sho
// }
}

private static String getName(String string, String separator, boolean andCode) {
private static String getName(
String string, String separator, boolean andCode, boolean plainText) {
StringBuilder result = new StringBuilder();
int cp;
for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
Expand All @@ -812,7 +804,25 @@ private static String getName(String string, String separator, boolean andCode)
if (andCode) {
result.append("U+").append(com.ibm.icu.impl.Utility.hex(cp, 4)).append(' ');
}
result.append(CachedProps.NAMES.getValue(cp));
final String name = CachedProps.NAMES.getValue(cp);
if (name != null) {
result.append(name);
} else {
// TODO(egg): We only have Name_Aliasβ during β, which is silly. This will probably
// solve itself as part of https://github.com/unicode-org/unicodetools/issues/432.
String alias =
getFactory()
.getProperty(CachedProps.IS_BETA ? "Name_Aliasβ" : "Name_Alias")
.getValue(cp);
if (alias == null) {
alias = "no name";
}
if (plainText) {
result.append("(" + alias + ")");
} else {
result.append("<i>" + alias + "</i>");
}
}
}
return result.toString();
}
Expand Down Expand Up @@ -1931,7 +1941,7 @@ private static void showBidiLine(
writer.println("</tr><tr><th>Character</th>");
for (int i = 0; i < str.length(); ++i) {
final String s = str.substring(i, i + 1);
String title = toHTML.transform(getName(s, "", true));
String title = toHTML.transform(getName(s, "", true, true));
writer.println(
"<td class='bccell' title='"
+ title
Expand Down Expand Up @@ -1982,7 +1992,7 @@ private static void showBidiLine(
String title =
bidiChar.length() == 0
? "deleted"
: toHTML.transform(getName(bidiChar, "", true));
: toHTML.transform(getName(bidiChar, "", true, true));
String td = bidiChar.length() == 0 ? "bxcell" : "bccell";
writer.println(
"<td class='"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# IdentifierStatus.txt
# Date: 2022-08-26, 16:49:09 GMT
# © 2022 Unicode®, Inc.
# Date: 2023-05-16, 22:25:15 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Security Mechanisms for UTS #39
# Version: 15.0.0
# Version: 15.1.0
#
# For documentation and usage, see https://www.unicode.org/reports/tr39
#
Expand Down Expand Up @@ -582,7 +582,8 @@ FA27..FA29 ; Allowed # 1.1 [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK CO
2B740..2B81D ; Allowed # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Allowed # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Allowed # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE4A ; Allowed # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
30000..3134A ; Allowed # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Allowed # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112156
# Total code points: 112759
Loading

0 comments on commit c845fb4

Please sign in to comment.