diff --git a/docs/help/changes.md b/docs/help/changes.md
index 32a91caa6..c4cf6cafe 100644
--- a/docs/help/changes.md
+++ b/docs/help/changes.md
@@ -3,42 +3,38 @@
The Unicode Utilities have been modified to support both properties from the
released version of Unicode (via ICU) and from the new Unicode beta.
-To get the beta version of the property, insert β *after* the property name.
+To get the beta version of the property, insert `Uβ:` *before* the property name.
+The explicit version number for the β can be used;
+the resulting property is then only valid when that specific β is current.
Examples:
-| `\p{Word_Break=ALetter}` | Released version of Unicode |
-| `\p{Word_Breakβ=ALetter}` | Beta version of Unicode |
+| Query | Result |
+|---|---|
+| `\p{Word_Break=ALetter}` | Released version of Unicode. |
+| `\p{Uβ:Word_Break=ALetter}` | Beta version of Unicode; error outside of beta review. |
+| `\p{U16β:Word_Break=ALetter}` | Beta version of Unicode 16.0; error during the beta review of any other version. |
For example, to see additions to that property value in the beta version, use:
-[`\p{Word_Breakβ=ALetter}-\\p{Word_Break=ALetter}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BWord_Break%CE%B2%3DALetter%7D-%5Cp%7BWord_Break%3DALetter%7D&g=&i=)
+[`\p{Uβ:Word_Break=ALetter}-\p{Word_Break=ALetter}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BU%CE%B2%3AWord_Break%3DALetter%7D-%5Cp%7BWord_Break%3DALetter%7D&g=&i=)
## Caveats
-The support is not complete done, and there are some known problems.
-
-1. Some properties are not supported in beta versions. See
-
- for the list.
-2. When characters are listed, the new blocks and subheads don't show up.
-3. If you use a property that has a β version but no ICU version, you get no
- error: just an empty listing.
-4. The beta properties don't yet have the "shorthands" for cases like \\p{Lu}.
- So make sure the property is listed, eg \\p{gcβ=Lu}
- 1. Example:
- [`\p{gcβ=Lu}-\\p{gc=Lu}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7Bgc%CE%B2%3DLu%7D-%5Cp%7Bgc%3DLu%7D&g=&i=)
-5. Tools for segmentation, etc. use the release properties; there isn't a way
+The support is not completely done, and there are some known problems.
+
+1. The General_Category groupings such as \\p{Uβ:L} are not correctly implemented.
+ Only actual values, such as \\p{Uβ:Lu} etc., work.
+2. Tools for segmentation, etc. use the release properties; there isn't a way
to have them use the beta properties.
-6. There are probably others...
+3. There are probably others...
If you find a problem, please file a ticket at
-: make sure to start the summary with
-"Unicode Utilities: "
+https://github.com/unicode-org/unicodetools/issues.
[Back to Unicode Utilities Help Home](index)
\ No newline at end of file
diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt
index f6e645639..1dd05365d 100644
--- a/unicodetools/data/ucd/dev/DerivedAge.txt
+++ b/unicodetools/data/ucd/dev/DerivedAge.txt
@@ -1,5 +1,5 @@
# DerivedAge-16.0.0.txt
-# Date: 2024-06-06, 10:07:23 GMT
+# Date: 2024-06-07, 16:34:38 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
index c6bac003e..f837cb8fa 100644
--- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
+++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
@@ -1,5 +1,5 @@
# DerivedCoreProperties-16.0.0.txt
-# Date: 2024-06-06, 10:07:42 GMT
+# Date: 2024-06-07, 16:34:58 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/PropertyAliases.txt b/unicodetools/data/ucd/dev/PropertyAliases.txt
index addbb0253..69dbb8d2e 100644
--- a/unicodetools/data/ucd/dev/PropertyAliases.txt
+++ b/unicodetools/data/ucd/dev/PropertyAliases.txt
@@ -1,5 +1,5 @@
# PropertyAliases-16.0.0.txt
-# Date: 2024-04-30, 21:48:30 GMT
+# Date: 2024-06-06, 21:52:48 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -99,6 +99,11 @@ cjkIRG_VSource ; kIRG_VSource
cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS
isc ; ISO_Comment
JSN ; Jamo_Short_Name
+kEH_Cat ; kEH_Cat
+kEH_Desc ; kEH_Desc
+kEH_HG ; kEH_HG
+kEH_IFAO ; kEH_IFAO
+kEH_JSesh ; kEH_JSesh
na ; Name
na1 ; Unicode_1_Name
Name_Alias ; Name_Alias
@@ -179,6 +184,8 @@ IDSB ; IDS_Binary_Operator
IDST ; IDS_Trinary_Operator
IDSU ; IDS_Unary_Operator
Join_C ; Join_Control
+kEH_NoMirror ; kEH_NoMirror
+kEH_NoRotate ; kEH_NoRotate
LOE ; Logical_Order_Exception
Lower ; Lowercase
Math ; Math
@@ -213,6 +220,6 @@ XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD
# ================================================
-# Total: 135
+# Total: 142
# EOF
diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt
index 8b62a2a42..8ca25c640 100644
--- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt
+++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt
@@ -1,5 +1,5 @@
# PropertyValueAliases-16.0.0.txt
-# Date: 2024-06-06, 10:08:00 GMT
+# Date: 2024-06-07, 16:35:15 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1676,4 +1676,34 @@ XIDS; Y ; Yes ; T
# @missing: 0000..10FFFF; cjkRSUnicode;
+# kEH_Cat (kEH_Cat)
+
+# @missing: 0000..10FFFF; kEH_Cat;
+
+# kEH_Desc (kEH_Desc)
+
+# @missing: 0000..10FFFF; kEH_Desc;
+
+# kEH_HG (kEH_HG)
+
+# @missing: 0000..10FFFF; kEH_HG;
+
+# kEH_IFAO (kEH_IFAO)
+
+# @missing: 0000..10FFFF; kEH_IFAO;
+
+# kEH_JSesh (kEH_JSesh)
+
+# @missing: 0000..10FFFF; kEH_JSesh;
+
+# kEH_NoMirror (kEH_NoMirror)
+
+kEH_NoMirror; N ; No ; F ; False
+kEH_NoMirror; Y ; Yes ; T ; True
+
+# kEH_NoRotate (kEH_NoRotate)
+
+kEH_NoRotate; N ; No ; F ; False
+kEH_NoRotate; Y ; Yes ; T ; True
+
# EOF
diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
index c68f5c8fb..ca5eff1c3 100644
--- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
@@ -1,5 +1,5 @@
# GraphemeBreakProperty-16.0.0.txt
-# Date: 2024-06-06, 10:07:48 GMT
+# Date: 2024-06-07, 16:35:03 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
index ffbccd2e5..57a9a58a6 100644
--- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
@@ -1,5 +1,5 @@
# SentenceBreakProperty-16.0.0.txt
-# Date: 2024-06-06, 10:08:13 GMT
+# Date: 2024-06-07, 16:35:29 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
index 6444a8e65..a961840b1 100644
--- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
@@ -1,5 +1,5 @@
# WordBreakProperty-16.0.0.txt
-# Date: 2024-06-06, 10:08:15 GMT
+# Date: 2024-06-07, 16:35:31 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
index 34ef68d83..82b7b5e93 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
@@ -1,5 +1,5 @@
# DerivedBidiClass-16.0.0.txt
-# Date: 2024-06-06, 10:07:40 GMT
+# Date: 2024-06-07, 16:34:55 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
index bf6958b9d..051ea0e7f 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
@@ -1,5 +1,5 @@
# DerivedCombiningClass-16.0.0.txt
-# Date: 2024-06-06, 10:07:41 GMT
+# Date: 2024-06-07, 16:34:57 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
index 1f4593cc5..052ecaf0c 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
@@ -1,5 +1,5 @@
# DerivedEastAsianWidth-16.0.0.txt
-# Date: 2024-06-06, 10:07:44 GMT
+# Date: 2024-06-07, 16:34:59 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
index 0485f9373..cf87aa6d1 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
@@ -1,5 +1,5 @@
# DerivedGeneralCategory-16.0.0.txt
-# Date: 2024-06-06, 10:07:44 GMT
+# Date: 2024-06-07, 16:34:59 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
index 0c3a8afaf..cc5888b2e 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
@@ -1,5 +1,5 @@
# DerivedJoiningType-16.0.0.txt
-# Date: 2024-06-06, 10:07:45 GMT
+# Date: 2024-06-07, 16:35:00 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
index 0e22de905..95408aa24 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
@@ -1,5 +1,5 @@
# DerivedLineBreak-16.0.0.txt
-# Date: 2024-06-06, 10:07:45 GMT
+# Date: 2024-06-07, 16:35:01 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt
index ee3be48a3..b7e94719c 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt
@@ -1,5 +1,5 @@
# DerivedName-16.0.0.txt
-# Date: 2024-06-06, 10:07:45 GMT
+# Date: 2024-06-07, 16:35:01 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
index 68ff0d963..6c794380e 100644
--- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
+++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
@@ -516,7 +516,10 @@ static void parseSourceFile(
} else {
indexUnicodeProperties.getFileNames().add(fullFilename);
UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename));
- if (fileName.startsWith("Unihan") || fileName.startsWith("k")) {
+ if (fileName.startsWith("Unihan")
+ || fileName.startsWith("Unikemet")
+ || (fileName.endsWith("Sources") && !fileName.startsWith("Emoji"))
+ || fileName.startsWith("k")) {
parser.withTabs(true);
}
PropertyParsingInfo propInfo;
diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyStatus.java b/unicodetools/src/main/java/org/unicode/props/PropertyStatus.java
index da2e3c175..3bd97f746 100644
--- a/unicodetools/src/main/java/org/unicode/props/PropertyStatus.java
+++ b/unicodetools/src/main/java/org/unicode/props/PropertyStatus.java
@@ -131,6 +131,7 @@ public enum PropertyScope {
UcdProperty.Emoji_KDDI,
UcdProperty.Emoji_SB);
+ // TODO(egg): These lists are not up to date!
private static final EnumSet CONTRIBUTORY_PROPERTY =
EnumSet.of(
UcdProperty.Jamo_Short_Name,
@@ -230,7 +231,10 @@ public enum PropertyScope {
UcdProperty.Named_Sequences_Prov,
UcdProperty.Regional_Indicator,
UcdProperty.Standardized_Variant,
- UcdProperty.Vertical_Orientation);
+ UcdProperty.Vertical_Orientation,
+ // Unikemet
+ UcdProperty.kEH_Cat,
+ UcdProperty.kEH_Desc);
private static final EnumSet NORMATIVE_PROPERTY =
EnumSet.of(
@@ -290,7 +294,13 @@ public enum PropertyScope {
UcdProperty.kIRG_MSource,
UcdProperty.kIRG_TSource,
UcdProperty.kIRG_USource,
- UcdProperty.kIRG_VSource);
+ UcdProperty.kIRG_VSource,
+ // Unikemet
+ UcdProperty.kEH_HG,
+ UcdProperty.kEH_IFAO,
+ UcdProperty.kEH_JSesh,
+ UcdProperty.kEH_NoMirror,
+ UcdProperty.kEH_NoRotate);
private static final EnumSet IMMUTABLE_PROPERTY =
EnumSet.of(
UcdProperty.Name,
diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java
index 0e18f8867..fd9e5b7a3 100644
--- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java
@@ -105,6 +105,14 @@ public enum UcdProperty {
kDaeJaweon(PropertyType.Miscellaneous, "cjkDaeJaweon"),
kDefinition(PropertyType.Miscellaneous, "cjkDefinition"),
kEACC(PropertyType.Miscellaneous, "cjkEACC"),
+ kEH_Cat(PropertyType.Miscellaneous, "kEH_Cat"),
+ kEH_Desc(PropertyType.Miscellaneous, "kEH_Desc"),
+ kEH_FVal(PropertyType.Miscellaneous, "kEH_FVal"),
+ kEH_Func(PropertyType.Miscellaneous, "kEH_Func"),
+ kEH_HG(PropertyType.Miscellaneous, "kEH_HG"),
+ kEH_IFAO(PropertyType.Miscellaneous, "kEH_IFAO"),
+ kEH_JSesh(PropertyType.Miscellaneous, "kEH_JSesh"),
+ kEH_UniK(PropertyType.Miscellaneous, "kEH_UniK"),
kFanqie(PropertyType.Miscellaneous, "cjkFanqie"),
kFenn(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkFenn"),
kFennIndex(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkFennIndex"),
@@ -182,6 +190,7 @@ public enum UcdProperty {
kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"),
kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"),
kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"),
+ kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"),
kRSUnicode(
PropertyType.Miscellaneous,
null,
@@ -189,6 +198,7 @@ public enum UcdProperty {
"cjkRSUnicode",
"Unicode_Radical_Stroke",
"URS"),
+ kReading(PropertyType.Miscellaneous, "kReading"),
kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"),
kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"),
kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"),
@@ -200,9 +210,11 @@ public enum UcdProperty {
ValueCardinality.Unordered,
"cjkSpecializedSemanticVariant"),
kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"),
+ kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"),
kStrange(PropertyType.Miscellaneous, "cjkStrange"),
kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"),
kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"),
+ kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"),
kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"),
kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"),
kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"),
@@ -341,6 +353,9 @@ public enum UcdProperty {
White_Space(PropertyType.Binary, Binary.class, null, "WSpace", "space"),
XID_Continue(PropertyType.Binary, Binary.class, null, "XIDC"),
XID_Start(PropertyType.Binary, Binary.class, null, "XIDS"),
+ kEH_Core(PropertyType.Binary, Binary.class, null, "kEH_Core"),
+ kEH_NoMirror(PropertyType.Binary, Binary.class, null, "kEH_NoMirror"),
+ kEH_NoRotate(PropertyType.Binary, Binary.class, null, "kEH_NoRotate"),
// Unknown
;
diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
index 347e442f6..94dc3f14a 100644
--- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
+++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
@@ -1434,6 +1434,14 @@ public static Joining_Type_Values forName(String name) {
// kDaeJaweon
// kDefinition
// kEACC
+ // kEH_Cat
+ // kEH_Desc
+ // kEH_Func
+ // kEH_FVal
+ // kEH_HG
+ // kEH_IFAO
+ // kEH_JSesh
+ // kEH_UniK
// kFanqie
// kFenn
// kFennIndex
@@ -1501,11 +1509,13 @@ public static Joining_Type_Values forName(String name) {
// kPhonetic
// kPrimaryNumeric
// kPseudoGB1
+ // kReading
// kRSAdobe_Japan1_6
// kRSJapanese
// kRSKangXi
// kRSKanWa
// kRSKorean
+ // kRSTUnicode
// kRSUnicode
// kSBGY
// kSemanticVariant
@@ -1514,11 +1524,13 @@ public static Joining_Type_Values forName(String name) {
// kSMSZD2003Readings
// kSpecializedSemanticVariant
// kSpoofingVariant
+ // kSrc_NushuDuben
// kStrange
// kTaiwanTelegraph
// kTang
// kTGH
// kTGHZ2013
+ // kTGT_MergedSrc
// kTotalStrokes
// kTraditionalVariant
// kUnihanCore2020
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java
index 582d42eec..ef3e215a6 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java
@@ -309,6 +309,13 @@ public String _getValue(int codepoint) {
"cjkIRG_VSource",
"cjkIRG_VSource",
"kIRG_VSource");
+ add(iup.getProperty("kEH_Cat"));
+ add(iup.getProperty("kEH_Desc"));
+ add(iup.getProperty("kEH_HG"));
+ add(iup.getProperty("kEH_IFAO"));
+ add(iup.getProperty("kEH_JSesh"));
+ add(iup.getProperty("kEH_NoMirror"));
+ add(iup.getProperty("kEH_NoRotate"));
add(iup.getProperty("Emoji"));
add(iup.getProperty("Emoji_Presentation"));
add(iup.getProperty("Emoji_Modifier"));
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java
index 25164d073..1aedb2410 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java
@@ -8,6 +8,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Supplier;
+import java.util.stream.Collectors;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
@@ -51,6 +52,7 @@ public static VersionedProperty forJSPs(Supplier oldestLoadedUcd) {
result.throwOnUnknownProperty = false;
result.defaultVersion = Settings.lastVersion;
result.versionAliases.put("dev", Settings.latestVersion);
+ result.versionAliases.put(Settings.latestVersionPhase.toString(), Settings.latestVersion);
result.oldestLoadedUcd = oldestLoadedUcd;
for (String latest = Settings.latestVersion;
;
@@ -96,8 +98,15 @@ public VersionedProperty set(String xPropertyName) {
version = aliased;
} else {
version = names[0].substring(1);
- if (versionAliases.containsValue(version)) {
- throw new IllegalArgumentException("Invalid version " + version);
+ if (versionAliases.containsValue(
+ VersionInfo.getInstance(version).getVersionString(3, 3))) {
+ throw new IllegalArgumentException(
+ "Unreleased version "
+ + version
+ + "; use suffix: "
+ + versionAliases.keySet().stream()
+ .map(v -> "U" + v)
+ .collect(Collectors.joining(", ")));
}
}
xPropertyName = names[1];
diff --git a/unicodetools/src/main/java/org/unicode/text/utility/Settings.java b/unicodetools/src/main/java/org/unicode/text/utility/Settings.java
index 8729fbe06..d32ace265 100644
--- a/unicodetools/src/main/java/org/unicode/text/utility/Settings.java
+++ b/unicodetools/src/main/java/org/unicode/text/utility/Settings.java
@@ -41,7 +41,7 @@ public String toString() {
}
};
- public static final ReleasePhase latestVersionPhase = ReleasePhase.ALPHA;
+ public static final ReleasePhase latestVersionPhase = ReleasePhase.BETA;
public static final String lastVersion = "15.1.0"; // last released version
diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt
index db657d34b..80faee3c7 100644
--- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt
+++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt
@@ -17,6 +17,10 @@ RETS ; RGI_Emoji_Tag_Sequence ; Emoji_Tag_Sequence
REZS ; RGI_Emoji_Zwj_Sequence ; Emoji_Zwj_Sequence
# RE ; RGI_Emoji
+kEH_Core ; kEH_Core
+kEH_NoMirror ; kEH_NoMirror
+kEH_NoRotate ; kEH_NoRotate
+
# ================================================
# Enumerated Properties
# ================================================
@@ -162,3 +166,13 @@ cjkVietnameseNumeric ; kVietnameseNumeric
cjkZhuangNumeric ; kZhuangNumeric
# 16.0
cjkFanqie ; kFanqie
+
+kTGT_MergedSrc ; kTGT_MergedSrc
+kRSTUnicode ; kRSTUnicode
+
+kSrc_NushuDuben ; kSrc_NushuDuben
+kReading ; kReading
+
+kEH_Func ; kEH_Func
+kEH_FVal ; kEH_FVal
+kEH_UniK ; kEH_UniK
\ No newline at end of file
diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
index 98613a31c..8d659c98f 100644
--- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
+++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
@@ -85,6 +85,10 @@
# @missing: 0000..10FFFF; Emoji_Component ; No
# @missing: 0000..10FFFF; Extended_Pictographic ; No
+# @missing: 0000..10FFFF; kEH_Core ; No
+# @missing: 0000..10FFFF; kEH_NoMirror ; No
+# @missing: 0000..10FFFF; kEH_NoRotate ; No
+
# End of binary properties.
# @missing: 0000..10FFFF; Canonical_Combining_Class; Not_Reordered
@@ -194,3 +198,13 @@ Do_Not_Emit_Type ; Precomposed_Form ; Precomposed_Form
Do_Not_Emit_Type ; Deprecated ; Deprecated
Do_Not_Emit_Type ; Discouraged ; Discouraged
Do_Not_Emit_Type ; Preferred_Spelling ; Preferred_Spelling
+
+# @missing: 0000..10FFFF; kTGT_MergedSrc ;
+# @missing: 0000..10FFFF; kRSTUnicode ;
+
+# @missing: 0000..10FFFF; kSrc_NushuDuben ;
+# @missing: 0000..10FFFF; kReading ;
+
+# @missing: 0000..10FFFF; kEH_Func ;
+# @missing: 0000..10FFFF; kEH_FVal ;
+# @missing: 0000..10FFFF; kEH_UniK ;
diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt
index 71f5ffe73..018f9614d 100644
--- a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt
+++ b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt
@@ -368,3 +368,25 @@ emoji/*/emoji-sequences; RGI_Emoji_Tag_Sequence
emoji/*/emoji-zwj-sequences; RGI_Emoji_Zwj_Sequence
#emoji/*/emoji-test ; Emoji_Short_Name
+
+
+FileType ; TangutSources ; PropertyValue
+TangutSources ; kTGT_MergedSrc
+TangutSources ; kRSTUnicode
+
+FileType ; NushuSources ; PropertyValue
+NushuSources ; kSrc_NushuDuben
+NushuSources ; kReading
+
+FileType ; Unikemet ; PropertyValue
+Unikemet ; kEH_Cat
+Unikemet ; kEH_Core
+Unikemet ; kEH_Desc
+Unikemet ; kEH_Func
+Unikemet ; kEH_FVal
+Unikemet ; kEH_JSesh
+Unikemet ; kEH_HG
+Unikemet ; kEH_IFAO
+Unikemet ; kEH_NoMirror
+Unikemet ; kEH_NoRotate
+Unikemet ; kEH_UniK
\ No newline at end of file
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index 3b3f3c35a..65d0004eb 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -1090,6 +1090,29 @@ Let $japaneseSimplifiedRadicals = \p{Name=/CJK RADICAL J-SIMPLIFIED/}
In $chineseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ [\p{kRSUnicode=/^[0-9]+'\.0$/} $radicalsWithUnifiableSimplifications]
In $japaneseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ \p{kRSUnicode=/^[0-9]+''\.0$/}
+# Tangut invariants
+
+Let $tangutSourcesScope = [\p{Block=/^Tangut(.Supplement)?$/} - \p{gc=Cn}]
+$tangutSourcesScope = [ [\p{gc=Lo} & \p{sc=Tangut}] - \p{name=/^TANGUT COMPONENT-/} ]
+$tangutSourcesScope = \P{kTGT_MergedSrc=@none@}
+$tangutSourcesScope = \P{kRSTUnicode=@none@}
+
+# Nüshu invariants
+
+Let $nüshuSourcesScope = [\p{Block=Nushu} - \p{gc=Cn}]
+$nüshuSourcesScope = [\p{gc=Lo} & \p{sc=Nushu}]
+$nüshuSourcesScope = \P{kSrc_NushuDuben=@none@}
+$nüshuSourcesScope = \P{kReading=@none@}
+
+# Egyptian hieroglyph invariants
+
+Let $unikemetScope = [\p{Block=/^Egyptian.Hieroglyphs/} - \p{gc=Cn}]
+$unikemetScope = [ [\p{gc=Lo} & \p{sc=Egyp}] - \p{Name=/^EGYPTIAN HIEROGLYPH (FULL |HALF |TALL |WIDE )?(BLANK|LOST SIGN)$/} ]
+$unikemetScope = \P{kEH_Cat=@none@}
+$unikemetScope = \P{kEH_UniK=@none@}
+\p{kEH_NoMirror} ⊂ $unikemetScope
+\p{kEH_NoRotate} ⊂ $unikemetScope
+
# InPC-InSC-gc invariants
# See https://www.unicode.org/L2/L2023/23200-category-invariants.pdf.
\p{InPC=/(Left|Right)/} ⊆ [\p{gc=Mc}\p{gc=Lo}\p{gc=Lm}]