Skip to content

Commit

Permalink
Parse Tangut and Nüshu sources (#856)
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin authored Jun 7, 2024
1 parent d51c407 commit 5a4870e
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ static void parseSourceFile(
UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename));
if (fileName.startsWith("Unihan")
|| fileName.startsWith("Unikemet")
|| (fileName.endsWith("Sources") && !fileName.startsWith("Emoji"))
|| fileName.startsWith("k")) {
parser.withTabs(true);
}
Expand Down
4 changes: 4 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,15 @@ public enum UcdProperty {
kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"),
kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"),
kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"),
kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"),
kRSUnicode(
PropertyType.Miscellaneous,
null,
ValueCardinality.Ordered,
"cjkRSUnicode",
"Unicode_Radical_Stroke",
"URS"),
kReading(PropertyType.Miscellaneous, "kReading"),
kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"),
kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"),
kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"),
Expand All @@ -208,9 +210,11 @@ public enum UcdProperty {
ValueCardinality.Unordered,
"cjkSpecializedSemanticVariant"),
kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"),
kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"),
kStrange(PropertyType.Miscellaneous, "cjkStrange"),
kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"),
kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"),
kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"),
kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"),
kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"),
kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1508,11 +1508,13 @@ public static Joining_Type_Values forName(String name) {
// kPhonetic
// kPrimaryNumeric
// kPseudoGB1
// kReading
// kRSAdobe_Japan1_6
// kRSJapanese
// kRSKangXi
// kRSKanWa
// kRSKorean
// kRSTUnicode
// kRSUnicode
// kSBGY
// kSemanticVariant
Expand All @@ -1521,11 +1523,13 @@ public static Joining_Type_Values forName(String name) {
// kSMSZD2003Readings
// kSpecializedSemanticVariant
// kSpoofingVariant
// kSrc_NushuDuben
// kStrange
// kTaiwanTelegraph
// kTang
// kTGH
// kTGHZ2013
// kTGT_MergedSrc
// kTotalStrokes
// kTraditionalVariant
// kUnihanCore2020
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ cjkZhuangNumeric ; kZhuangNumeric
# 16.0
cjkFanqie ; kFanqie

kTGT_MergedSrc ; kTGT_MergedSrc
kRSTUnicode ; kRSTUnicode

kSrc_NushuDuben ; kSrc_NushuDuben
kReading ; kReading

kEH_Func ; kEH_Func
kEH_FVal ; kEH_FVal
kEH_UniK ; kEH_UniK
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ Do_Not_Emit_Type ; Deprecated ; Deprecated
Do_Not_Emit_Type ; Discouraged ; Discouraged
Do_Not_Emit_Type ; Preferred_Spelling ; Preferred_Spelling

# @missing: 0000..10FFFF; kTGT_MergedSrc ; <none>
# @missing: 0000..10FFFF; kRSTUnicode ; <none>

# @missing: 0000..10FFFF; kSrc_NushuDuben ; <none>
# @missing: 0000..10FFFF; kReading ; <none>

# @missing: 0000..10FFFF; kEH_Func ; <none>
# @missing: 0000..10FFFF; kEH_FVal ; <none>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,15 @@ emoji/*/emoji-zwj-sequences; RGI_Emoji_Zwj_Sequence

#emoji/*/emoji-test ; Emoji_Short_Name


FileType ; TangutSources ; PropertyValue
TangutSources ; kTGT_MergedSrc
TangutSources ; kRSTUnicode

FileType ; NushuSources ; PropertyValue
NushuSources ; kSrc_NushuDuben
NushuSources ; kReading

FileType ; Unikemet ; PropertyValue
Unikemet ; kEH_Cat
Unikemet ; kEH_Core
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,20 @@ Let $japaneseSimplifiedRadicals = \p{Name=/CJK RADICAL J-SIMPLIFIED/}
In $chineseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ [\p{kRSUnicode=/^[0-9]+'\.0$/} $radicalsWithUnifiableSimplifications]
In $japaneseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ \p{kRSUnicode=/^[0-9]+''\.0$/}

# Tangut invariants

Let $tangutSourcesScope = [\p{Block=/^Tangut(.Supplement)?$/} - \p{gc=Cn}]
$tangutSourcesScope = [ [\p{gc=Lo} & \p{sc=Tangut}] - \p{name=/^TANGUT COMPONENT-/} ]
$tangutSourcesScope = \P{kTGT_MergedSrc=@none@}
$tangutSourcesScope = \P{kRSTUnicode=@none@}

# Nüshu invariants

Let $nüshuSourcesScope = [\p{Block=Nushu} - \p{gc=Cn}]
$nüshuSourcesScope = [\p{gc=Lo} & \p{sc=Nushu}]
$nüshuSourcesScope = \P{kSrc_NushuDuben=@none@}
$nüshuSourcesScope = \P{kReading=@none@}

# Egyptian hieroglyph invariants

Let $unikemetScope = [\p{Block=/^Egyptian.Hieroglyphs/} - \p{gc=Cn}]
Expand Down

0 comments on commit 5a4870e

Please sign in to comment.