diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java index cdae87a0e..6c794380e 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java @@ -518,6 +518,7 @@ static void parseSourceFile( UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); if (fileName.startsWith("Unihan") || fileName.startsWith("Unikemet") + || (fileName.endsWith("Sources") && !fileName.startsWith("Emoji")) || fileName.startsWith("k")) { parser.withTabs(true); } diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index 03aebe1e7..fd9e5b7a3 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -190,6 +190,7 @@ public enum UcdProperty { kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"), kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"), kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"), + kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"), kRSUnicode( PropertyType.Miscellaneous, null, @@ -197,6 +198,7 @@ public enum UcdProperty { "cjkRSUnicode", "Unicode_Radical_Stroke", "URS"), + kReading(PropertyType.Miscellaneous, "kReading"), kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"), kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"), kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"), @@ -208,9 +210,11 @@ public enum UcdProperty { ValueCardinality.Unordered, "cjkSpecializedSemanticVariant"), kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"), + kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"), kStrange(PropertyType.Miscellaneous, "cjkStrange"), kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"), kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"), + kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"), kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"), kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"), kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"), diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index c5414864f..37020e727 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -1508,11 +1508,13 @@ public static Joining_Type_Values forName(String name) { // kPhonetic // kPrimaryNumeric // kPseudoGB1 + // kReading // kRSAdobe_Japan1_6 // kRSJapanese // kRSKangXi // kRSKanWa // kRSKorean + // kRSTUnicode // kRSUnicode // kSBGY // kSemanticVariant @@ -1521,11 +1523,13 @@ public static Joining_Type_Values forName(String name) { // kSMSZD2003Readings // kSpecializedSemanticVariant // kSpoofingVariant + // kSrc_NushuDuben // kStrange // kTaiwanTelegraph // kTang // kTGH // kTGHZ2013 + // kTGT_MergedSrc // kTotalStrokes // kTraditionalVariant // kUnihanCore2020 diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt index 32f5964cc..80faee3c7 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt @@ -167,6 +167,12 @@ cjkZhuangNumeric ; kZhuangNumeric # 16.0 cjkFanqie ; kFanqie +kTGT_MergedSrc ; kTGT_MergedSrc +kRSTUnicode ; kRSTUnicode + +kSrc_NushuDuben ; kSrc_NushuDuben +kReading ; kReading + kEH_Func ; kEH_Func kEH_FVal ; kEH_FVal kEH_UniK ; kEH_UniK \ No newline at end of file diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt index ec9b190fb..8d659c98f 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt @@ -199,6 +199,11 @@ Do_Not_Emit_Type ; Deprecated ; Deprecated Do_Not_Emit_Type ; Discouraged ; Discouraged Do_Not_Emit_Type ; Preferred_Spelling ; Preferred_Spelling +# @missing: 0000..10FFFF; kTGT_MergedSrc ; +# @missing: 0000..10FFFF; kRSTUnicode ; + +# @missing: 0000..10FFFF; kSrc_NushuDuben ; +# @missing: 0000..10FFFF; kReading ; # @missing: 0000..10FFFF; kEH_Func ; # @missing: 0000..10FFFF; kEH_FVal ; diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt index d66101597..018f9614d 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt @@ -369,6 +369,15 @@ emoji/*/emoji-zwj-sequences; RGI_Emoji_Zwj_Sequence #emoji/*/emoji-test ; Emoji_Short_Name + +FileType ; TangutSources ; PropertyValue +TangutSources ; kTGT_MergedSrc +TangutSources ; kRSTUnicode + +FileType ; NushuSources ; PropertyValue +NushuSources ; kSrc_NushuDuben +NushuSources ; kReading + FileType ; Unikemet ; PropertyValue Unikemet ; kEH_Cat Unikemet ; kEH_Core diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 9f6ba19f5..65d0004eb 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -1090,6 +1090,20 @@ Let $japaneseSimplifiedRadicals = \p{Name=/CJK RADICAL J-SIMPLIFIED/} In $chineseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ [\p{kRSUnicode=/^[0-9]+'\.0$/} $radicalsWithUnifiableSimplifications] In $japaneseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ \p{kRSUnicode=/^[0-9]+''\.0$/} +# Tangut invariants + +Let $tangutSourcesScope = [\p{Block=/^Tangut(.Supplement)?$/} - \p{gc=Cn}] +$tangutSourcesScope = [ [\p{gc=Lo} & \p{sc=Tangut}] - \p{name=/^TANGUT COMPONENT-/} ] +$tangutSourcesScope = \P{kTGT_MergedSrc=@none@} +$tangutSourcesScope = \P{kRSTUnicode=@none@} + +# Nüshu invariants + +Let $nüshuSourcesScope = [\p{Block=Nushu} - \p{gc=Cn}] +$nüshuSourcesScope = [\p{gc=Lo} & \p{sc=Nushu}] +$nüshuSourcesScope = \P{kSrc_NushuDuben=@none@} +$nüshuSourcesScope = \P{kReading=@none@} + # Egyptian hieroglyph invariants Let $unikemetScope = [\p{Block=/^Egyptian.Hieroglyphs/} - \p{gc=Cn}]