From 1f791fe19c7b88e190c03aee67de3f20e6a9564b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 9 Nov 2023 20:26:45 -0800 Subject: [PATCH 1/3] Kawi sign nukta (#567) --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 14 +++++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- .../data/ucd/dev/IndicPositionalCategory.txt | 3 ++- .../data/ucd/dev/IndicSyllabicCategory.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 5 +++-- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + unicodetools/data/ucd/dev/VerticalOrientation.txt | 3 ++- .../ucd/dev/auxiliary/GraphemeBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../data/ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedJoiningType.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 +++-- .../org/unicode/text/UCD/UnicodeInvariantTest.txt | 6 ++++++ 21 files changed, 71 insertions(+), 42 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 8a96c5734..d33496325 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-06, 03:48:21 GMT +# Date: 2023-11-10, 01:47:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2026,6 +2026,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11F5A ; 16.0 # KAWI SIGN NUKTA 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE @@ -2043,6 +2044,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 1050 +# Total code points: 1051 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index bc819fc33..6f0672daa 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-06, 03:48:52 GMT +# Date: 2023-11-10, 01:48:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3424,6 +3424,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Case_Ignorable # Mn KAWI VOWEL SIGN EU 11F42 ; Case_Ignorable # Mn KAWI CONJOINER +11F5A ; Case_Ignorable # Mn KAWI SIGN NUKTA 13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED @@ -3479,7 +3480,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2722 +# Total code points: 2723 # ================================================ @@ -8124,6 +8125,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11F41 ; ID_Continue # Mc KAWI SIGN KILLER 11F42 ; ID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; ID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; ID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; ID_Continue # Lo LISU LETTER YHA 12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -8310,7 +8312,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140413 +# Total code points: 140414 # ================================================ @@ -10277,6 +10279,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11F41 ; XID_Continue # Mc KAWI SIGN KILLER 11F42 ; XID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; XID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; XID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; XID_Continue # Lo LISU LETTER YHA 12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -10463,7 +10466,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140394 +# Total code points: 140395 # ================================================ @@ -10864,6 +10867,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11F36..11F3A ; Grapheme_Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Grapheme_Extend # Mn KAWI VOWEL SIGN EU 11F42 ; Grapheme_Extend # Mn KAWI CONJOINER +11F5A ; Grapheme_Extend # Mn KAWI SIGN NUKTA 13440 ; Grapheme_Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Grapheme_Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -10903,7 +10907,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2133 +# Total code points: 2134 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 384e6e62c..c53d2b8ac 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-06, 03:49:00 GMT +# Date: 2023-11-10, 01:48:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2271,6 +2271,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 11F42 ; N # Mn KAWI CONJOINER 11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; N # Mn KAWI SIGN NUKTA 11FB0 ; N # Lo LISU LETTER YHA 11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index bad2697b9..8afb4936d 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-16.0.0.txt -# Date: 2023-11-09, 18:48:36 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -592,6 +592,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F02 ; Top # Lo KAWI SIGN REPHA 11F36..11F37 ; Top # Mn [2] KAWI VOWEL SIGN I..KAWI VOWEL SIGN II 11F40 ; Top # Mn KAWI VOWEL SIGN EU +11F5A ; Top # Mn KAWI SIGN NUKTA # Indic_Positional_Category=Bottom diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 0bc45f818..455b67ffa 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-11-02, 22:55:33 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -251,6 +251,7 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 1183A ; Nukta # Mn DOGRA SIGN NUKTA 11943 ; Nukta # Mn DIVES AKURU SIGN NUKTA 11D42 ; Nukta # Mn MASARAM GONDI SIGN NUKTA +11F5A ; Nukta # Mn KAWI SIGN NUKTA # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3dbae2171..835e639c8 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-06, 03:49:01 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3150,6 +3150,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11F43..11F44 ; BA # Po [2] KAWI DANDA..KAWI DOUBLE DANDA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; AS # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; CM # Mn KAWI SIGN NUKTA 11FB0 ; AL # Lo LISU LETTER YHA 11FC0..11FD4 ; AL # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; AL # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 6ed3e075b..ce1232123 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-11-06, 03:49:06 GMT +# Date: 2023-11-10, 01:58:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1088,6 +1088,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11F5A ; Diacritic # Mn KAWI SIGN NUKTA 13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM @@ -1113,7 +1114,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1152 +# Total code points: 1153 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index d41e316d4..bcecfd3d0 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-06, 03:49:30 GMT +# Date: 2023-11-10, 01:48:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3026,8 +3026,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 11F42 ; Kawi # Mn KAWI CONJOINER 11F43..11F4F ; Kawi # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; Kawi # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; Kawi # Mn KAWI SIGN NUKTA -# Total code points: 86 +# Total code points: 87 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f65dbdfdb..ac5a58311 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -21898,6 +21898,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11F57;KAWI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 11F58;KAWI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 11F59;KAWI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11F5A;KAWI SIGN NUKTA;Mn;0;NSM;;;;;N;;;;; 11FB0;LISU LETTER YHA;Lo;0;L;;;;;N;;;;; 11FC0;TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH;No;0;L;;;;1/320;N;;;;; 11FC1;TAMIL FRACTION ONE ONE-HUNDRED-AND-SIXTIETH;No;0;L;;;;1/160;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 602c20492..805cfd6a4 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-06, 03:49:34 GMT +# Date: 2023-11-10, 01:48:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2099,6 +2099,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11F42 ; R # Mn KAWI CONJOINER 11F43..11F4F ; R # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; R # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; R # Mn KAWI SIGN NUKTA 11FB0 ; R # Lo LISU LETTER YHA 11FC0..11FD4 ; R # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; R # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index f2ee0f839..c65c8497c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-11-02, 22:55:32 GMT +# Date: 2023-11-10, 01:48:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -422,6 +422,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -462,7 +463,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2138 +# Total code points: 2139 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 151c99c5e..90517db87 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-06, 03:49:31 GMT +# Date: 2023-11-10, 01:48:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -530,6 +530,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F41 ; Extend # Mc KAWI SIGN KILLER 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -571,7 +572,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2559 +# Total code points: 2560 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 09cb979e4..32297919e 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-06, 03:49:34 GMT +# Date: 2023-11-10, 01:48:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -566,6 +566,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F41 ; Extend # Mc KAWI SIGN KILLER 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -608,7 +609,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2563 +# Total code points: 2564 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 145c5b131..503a90d7a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-06, 03:48:48 GMT +# Date: 2023-11-10, 01:47:58 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1199,8 +1199,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 819486 code points not listed here. -# Total code points: 1095545 +# The above property value applies to 819485 code points not listed here. +# Total code points: 1095544 # ================================================ @@ -2344,6 +2344,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; NSM # Mn KAWI VOWEL SIGN EU 11F42 ; NSM # Mn KAWI CONJOINER +11F5A ; NSM # Mn KAWI SIGN NUKTA 13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -2380,7 +2381,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2001 +# Total code points: 2002 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 0f4b855b3..ea14d2d3d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-06, 03:48:51 GMT +# Date: 2023-11-10, 01:48:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1760,6 +1760,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11F40 ; 0 # Mn KAWI VOWEL SIGN EU 11F43..11F4F ; 0 # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; 0 # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; 0 # Mn KAWI SIGN NUKTA 11FB0 ; 0 # Lo LISU LETTER YHA 11FC0..11FD4 ; 0 # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; 0 # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2035,7 +2036,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 825716 code points not listed here. +# The above property value applies to 825715 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 66b62c669..86508516a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-06, 03:48:54 GMT +# Date: 2023-11-10, 01:48:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1793,6 +1793,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11F42 ; N # Mn KAWI CONJOINER 11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; N # Mn KAWI SIGN NUKTA 11FB0 ; N # Lo LISU LETTER YHA 11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2075,7 +2076,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765234 code points not listed here. +# The above property value applies to 765233 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 35d22b5e1..97180eda6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-06, 03:48:55 GMT +# Date: 2023-11-10, 01:48:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -538,7 +538,7 @@ FFFE..FFFF ; Cn # [2] .. 11EF9..11EFF ; Cn # [7] .. 11F11 ; Cn # 11F3B..11F3D ; Cn # [3] .. -11F5A..11FAF ; Cn # [86] .. +11F5B..11FAF ; Cn # [85] .. 11FB1..11FBF ; Cn # [15] .. 11FF2..11FFE ; Cn # [13] .. 1239A..123FF ; Cn # [102] .. @@ -735,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 823668 +# Total code points: 823667 # ================================================ @@ -3007,6 +3007,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11F36..11F3A ; Mn # [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Mn # KAWI VOWEL SIGN EU 11F42 ; Mn # KAWI CONJOINER +11F5A ; Mn # KAWI SIGN NUKTA 13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -3043,7 +3044,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1993 +# Total code points: 1994 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 1df4e026a..72e3386c8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-11-02, 22:55:27 GMT +# Date: 2023-11-10, 01:48:06 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -531,6 +531,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 11F36..11F3A ; T # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; T # Mn KAWI VOWEL SIGN EU 11F42 ; T # Mn KAWI CONJOINER +11F5A ; T # Mn KAWI SIGN NUKTA 13430..1343F ; T # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; T # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; T # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED @@ -573,6 +574,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2158 +# Total code points: 2159 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index bcd0d769c..d3bb1d2a2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-06, 03:51:48 GMT +# Date: 2023-11-10, 01:48:06 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 761788 code points not listed here. -# Total code points: 899256 +# The above property value applies to 761787 code points not listed here. +# Total code points: 899255 # ================================================ @@ -2320,6 +2320,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11F3E..11F3F ; CM # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; CM # Mn KAWI VOWEL SIGN EU 11F41 ; CM # Mc KAWI SIGN KILLER +11F5A ; CM # Mn KAWI SIGN NUKTA 13440 ; CM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; CM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -2363,7 +2364,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2438 +# Total code points: 2439 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8434fcab9..cd80a4355 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-06, 03:48:57 GMT +# Date: 2023-11-10, 01:48:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -32563,6 +32563,7 @@ FFFD ; REPLACEMENT CHARACTER 11F57 ; KAWI DIGIT SEVEN 11F58 ; KAWI DIGIT EIGHT 11F59 ; KAWI DIGIT NINE +11F5A ; KAWI SIGN NUKTA 11FB0 ; LISU LETTER YHA 11FC0 ; TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH 11FC1 ; TAMIL FRACTION ONE ONE-HUNDRED-AND-SIXTIETH @@ -45226,6 +45227,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150863 +# Total code points: 150864 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 3c077270c..14d3e096d 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -540,6 +540,12 @@ Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MA # See 177-CXX. \p{name=/COMBINING .* LETTER/} ⊆ [\p{Alphabetic}\p{Diacritic}] +# Nuktas should probably be diacritic, but as of 15.1 this is only the case of +# those that have NUKTA in their name. +# See https://github.com/unicode-org/properties/issues/195#issuecomment-1804962555. +Let $nonDiacriticNuktas = [\u1BE6\U00010A38\U00010A39\U00010A3A\U0001133B] +[\p{InSc=Nukta} - \p{Diacritic}] = $nonDiacriticNuktas + ########################## # LineBreak property ########################## From 926ddff895e58cdd7eea117c23fecef5f5605e76 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 10 Nov 2023 12:54:21 -0800 Subject: [PATCH 2/3] Tulu-Tigalari (#524) --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 16 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 105 ++++++++++++++++-- .../ucd/dev/DerivedNormalizationProps.txt | 46 ++++++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 20 +++- .../data/ucd/dev/IndicPositionalCategory.txt | 20 +++- .../data/ucd/dev/IndicSyllabicCategory.txt | 23 +++- unicodetools/data/ucd/dev/LineBreak.txt | 21 +++- .../data/ucd/dev/NormalizationTest.txt | 22 +++- unicodetools/data/ucd/dev/PropList.txt | 15 ++- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- unicodetools/data/ucd/dev/Scripts.txt | 25 ++++- unicodetools/data/ucd/dev/UnicodeData.txt | 78 +++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 20 +++- .../dev/auxiliary/GraphemeBreakProperty.txt | 19 +++- .../dev/auxiliary/SentenceBreakProperty.txt | 22 +++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 22 +++- .../ucd/dev/extracted/DerivedBidiClass.txt | 26 ++++- .../dev/extracted/DerivedCombiningClass.txt | 26 ++++- .../extracted/DerivedDecompositionType.txt | 10 +- .../dev/extracted/DerivedEastAsianWidth.txt | 22 +++- .../dev/extracted/DerivedGeneralCategory.txt | 44 ++++++-- .../ucd/dev/extracted/DerivedJoiningType.txt | 8 +- .../ucd/dev/extracted/DerivedLineBreak.txt | 37 ++++-- .../data/ucd/dev/extracted/DerivedName.txt | 82 +++++++++++++- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 7 +- .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + .../unicode/text/UCD/UnicodeInvariantTest.txt | 2 +- 29 files changed, 668 insertions(+), 78 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index c9a431d86..703455d43 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -260,6 +260,7 @@ FFF0..FFFF; Specials 11280..112AF; Multani 112B0..112FF; Khudawadi 11300..1137F; Grantha +11380..113FF; Tulu-Tigalari 11400..1147F; Newa 11480..114DF; Tirhuta 11580..115FF; Siddham diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d33496325..52ede83d6 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-10, 01:47:31 GMT +# Date: 2023-11-10, 04:27:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2023,6 +2023,18 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EFC ; 16.0 # ARABIC COMBINING ALEF OVERLAY +11380..11389 ; 16.0 # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; 16.0 # TULU-TIGALARI LETTER EE +1138E ; 16.0 # TULU-TIGALARI LETTER AI +11390..113B5 ; 16.0 # [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7..113C0 ; 16.0 # [10] TULU-TIGALARI SIGN AVAGRAHA..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; 16.0 # TULU-TIGALARI VOWEL SIGN EE +113C5 ; 16.0 # TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; 16.0 # [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113D1 ; 16.0 # [6] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI REPHA +113D4..113D5 ; 16.0 # [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; 16.0 # [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; 16.0 # [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -2044,6 +2056,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 1051 +# Total code points: 1129 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 6f0672daa..b91ba8116 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-10, 01:48:01 GMT +# Date: 2023-11-10, 04:28:08 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1134,6 +1134,18 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11357 ; Alphabetic # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; Alphabetic # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; Alphabetic # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Alphabetic # Lo TULU-TIGALARI LETTER EE +1138E ; Alphabetic # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Alphabetic # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Alphabetic # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113D1 ; Alphabetic # Lo TULU-TIGALARI REPHA 11400..11434 ; Alphabetic # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -1423,7 +1435,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138649 +# Total code points: 138718 # ================================================ @@ -3365,6 +3377,10 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11340 ; Case_Ignorable # Mn GRANTHA VOWEL SIGN II 11366..1136C ; Case_Ignorable # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Case_Ignorable # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Case_Ignorable # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Case_Ignorable # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; Case_Ignorable # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; Case_Ignorable # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Case_Ignorable # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Case_Ignorable # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Case_Ignorable # Mn NEWA SIGN NUKTA @@ -3480,7 +3496,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2723 +# Total code points: 2733 # ================================================ @@ -6742,6 +6758,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; ID_Start # Lo GRANTHA SIGN AVAGRAHA 11350 ; ID_Start # Lo GRANTHA OM 1135D..11361 ; ID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; ID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; ID_Start # Lo TULU-TIGALARI REPHA 11400..11434 ; ID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; ID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; ID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -6928,7 +6950,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137193 +# Total code points: 137245 # ================================================ @@ -7957,6 +7979,22 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11362..11363 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; ID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; ID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; ID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; ID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; ID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; ID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; ID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; ID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; ID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; ID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; ID_Continue # Lo TULU-TIGALARI REPHA +113E1..113E2 ; ID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; ID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; ID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; ID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -8312,7 +8350,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140414 +# Total code points: 140488 # ================================================ @@ -8895,6 +8933,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1133D ; XID_Start # Lo GRANTHA SIGN AVAGRAHA 11350 ; XID_Start # Lo GRANTHA OM 1135D..11361 ; XID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; XID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; XID_Start # Lo TULU-TIGALARI REPHA 11400..11434 ; XID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; XID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; XID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -9081,7 +9125,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137170 +# Total code points: 137222 # ================================================ @@ -10111,6 +10155,22 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11362..11363 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; XID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; XID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; XID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; XID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; XID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; XID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; XID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; XID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; XID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; XID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; XID_Continue # Lo TULU-TIGALARI REPHA +113E1..113E2 ; XID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; XID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; XID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; XID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -10466,7 +10526,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140395 +# Total code points: 140469 # ================================================ @@ -10804,6 +10864,10 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11357 ; Grapheme_Extend # Mc GRANTHA AU LENGTH MARK 11366..1136C ; Grapheme_Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Grapheme_Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Grapheme_Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Grapheme_Extend # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; Grapheme_Extend # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; Grapheme_Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Grapheme_Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Grapheme_Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Grapheme_Extend # Mn NEWA SIGN NUKTA @@ -10907,7 +10971,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2134 +# Total code points: 2144 # ================================================ @@ -12297,6 +12361,20 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 11350 ; Grapheme_Base # Lo GRANTHA OM 1135D..11361 ; Grapheme_Base # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; Grapheme_Base # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Grapheme_Base # Lo TULU-TIGALARI LETTER EE +1138E ; Grapheme_Base # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Grapheme_Base # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Grapheme_Base # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; Grapheme_Base # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; Grapheme_Base # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Grapheme_Base # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Grapheme_Base # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Grapheme_Base # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Grapheme_Base # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D1 ; Grapheme_Base # Lo TULU-TIGALARI REPHA +113D4..113D5 ; Grapheme_Base # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Grapheme_Base # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11400..11434 ; Grapheme_Base # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; Grapheme_Base # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; Grapheme_Base # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -12688,7 +12766,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 148655 +# Total code points: 148723 # ================================================ @@ -12738,6 +12816,9 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 11235 ; Grapheme_Link # Mc KHOJKI SIGN VIRAMA 112EA ; Grapheme_Link # Mn KHUDAWADI SIGN VIRAMA 1134D ; Grapheme_Link # Mc GRANTHA SIGN VIRAMA +113CE ; Grapheme_Link # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Grapheme_Link # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Grapheme_Link # Mn TULU-TIGALARI CONJOINER 11442 ; Grapheme_Link # Mn NEWA SIGN VIRAMA 114C2 ; Grapheme_Link # Mn TIRHUTA SIGN VIRAMA 115BF ; Grapheme_Link # Mn SIDDHAM SIGN VIRAMA @@ -12757,7 +12838,7 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 11F41 ; Grapheme_Link # Mc KAWI SIGN KILLER 11F42 ; Grapheme_Link # Mn KAWI CONJOINER -# Total code points: 65 +# Total code points: 68 # ================================================ @@ -12954,6 +13035,8 @@ FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING 1133B..1133C ; InCB; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA 11366..1136C ; InCB; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; InCB; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; InCB; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; InCB; Extend # Mn TULU-TIGALARI CONJOINER 11446 ; InCB; Extend # Mn NEWA SIGN NUKTA 1145E ; InCB; Extend # Mn NEWA SANDHI MARK 114C3 ; InCB; Extend # Mn TIRHUTA SIGN NUKTA @@ -12994,6 +13077,6 @@ FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING 1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -# Total code points: 892 +# Total code points: 894 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 8a98465e0..47ddba511 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2023-11-06, 03:48:57 GMT +# Date: 2023-11-10, 04:28:13 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1014,6 +1014,12 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 110AB ; NFD_QC; N # Lo KAITHI LETTER VA 1112E..1112F ; NFD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; NFD_QC; N # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; NFD_QC; N # Lo TULU-TIGALARI LETTER II +11385 ; NFD_QC; N # Lo TULU-TIGALARI LETTER UU +1138E ; NFD_QC; N # Lo TULU-TIGALARI LETTER AI +11391 ; NFD_QC; N # Lo TULU-TIGALARI LETTER AU +113C5 ; NFD_QC; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; NFD_QC; N # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; NFD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; NFD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU @@ -1023,7 +1029,7 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13238 +# Total code points: 13245 # ================================================ @@ -1157,6 +1163,10 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 11127 ; NFC_QC; M # Mn CHAKMA VOWEL SIGN A 1133E ; NFC_QC; M # Mc GRANTHA VOWEL SIGN AA 11357 ; NFC_QC; M # Mc GRANTHA AU LENGTH MARK +113B8 ; NFC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AA +113BB ; NFC_QC; M # Mn TULU-TIGALARI VOWEL SIGN U +113C2 ; NFC_QC; M # Mc TULU-TIGALARI VOWEL SIGN EE +113C9 ; NFC_QC; M # Mc TULU-TIGALARI AU LENGTH MARK 114B0 ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN AA 114BA ; NFC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E 114BD ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O @@ -1164,7 +1174,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 11930 ; NFC_QC; M # Mc DIVES AKURU VOWEL SIGN AA 16D67 ; NFC_QC; M # Lo KIRAT RAI VOWEL SIGN E -# Total code points: 112 +# Total code points: 116 # ================================================ @@ -1637,6 +1647,12 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA 1112E..1112F ; NFKD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; NFKD_QC; N # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER II +11385 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER UU +1138E ; NFKD_QC; N # Lo TULU-TIGALARI LETTER AI +11391 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER AU +113C5 ; NFKD_QC; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; NFKD_QC; N # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; NFKD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; NFKD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFKD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU @@ -1733,7 +1749,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17070 +# Total code points: 17077 # ================================================ @@ -2188,6 +2204,10 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 11127 ; NFKC_QC; M # Mn CHAKMA VOWEL SIGN A 1133E ; NFKC_QC; M # Mc GRANTHA VOWEL SIGN AA 11357 ; NFKC_QC; M # Mc GRANTHA AU LENGTH MARK +113B8 ; NFKC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AA +113BB ; NFKC_QC; M # Mn TULU-TIGALARI VOWEL SIGN U +113C2 ; NFKC_QC; M # Mc TULU-TIGALARI VOWEL SIGN EE +113C9 ; NFKC_QC; M # Mc TULU-TIGALARI AU LENGTH MARK 114B0 ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN AA 114BA ; NFKC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E 114BD ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O @@ -2195,7 +2215,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 11930 ; NFKC_QC; M # Mc DIVES AKURU VOWEL SIGN AA 16D67 ; NFKC_QC; M # Lo KIRAT RAI VOWEL SIGN E -# Total code points: 112 +# Total code points: 116 # ================================================ @@ -2424,6 +2444,12 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE 110AB ; Expands_On_NFD # Lo KAITHI LETTER VA 1112E..1112F ; Expands_On_NFD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; Expands_On_NFD # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER II +11385 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER UU +1138E ; Expands_On_NFD # Lo TULU-TIGALARI LETTER AI +11391 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER AU +113C5 ; Expands_On_NFD # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; Expands_On_NFD # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; Expands_On_NFD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; Expands_On_NFD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU @@ -2432,7 +2458,7 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE 1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK -# Total code points: 12221 +# Total code points: 12228 # ================================================ @@ -2781,6 +2807,12 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 110AB ; Expands_On_NFKD # Lo KAITHI LETTER VA 1112E..1112F ; Expands_On_NFKD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; Expands_On_NFKD # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER II +11385 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER UU +1138E ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER AI +11391 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER AU +113C5 ; Expands_On_NFKD # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; Expands_On_NFKD # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; Expands_On_NFKD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; Expands_On_NFKD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFKD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU @@ -2798,7 +2830,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE 1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 13395 +# Total code points: 13402 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index c53d2b8ac..2b5135b0c 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-10, 01:48:10 GMT +# Date: 2023-11-10, 04:28:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2076,6 +2076,24 @@ FFFD ; A # So REPLACEMENT CHARACTER 11362..11363 ; N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; N # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; N # Lo TULU-TIGALARI LETTER EE +1138E ; N # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; N # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; N # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; N # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; N # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; N # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; N # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; N # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; N # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; N # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; N # Mn TULU-TIGALARI CONJOINER +113D1 ; N # Lo TULU-TIGALARI REPHA +113D4..113D5 ; N # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; N # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; N # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; N # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index 8afb4936d..7f37a3bea 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-16.0.0.txt -# Date: 2023-11-10, 01:48:11 GMT +# Date: 2023-11-10, 20:38:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -74,7 +74,7 @@ # Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, # Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, # Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, -# Tibetan, Tirhuta, and Zanabazar Square. +# Tibetan, Tirhuta, Tulu-Tigalari, and Zanabazar Square. # # All characters for all other scripts not in that list # take the default value for this property. @@ -109,6 +109,9 @@ # contextually variable placement in Myanmar. # * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have # contextually variable placement in Tai Tham. +# * U+113BB TULU-TIGALARI VOWEL SIGN U and +# U+113BC TULU-TIGALARI VOWEL SIGN UU form complex ligatures with +# consonants. # # 4. The following character is assigned the positional category Left, but # may have different positions in different styles: @@ -257,6 +260,10 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 1134D ; Right # Mc GRANTHA SIGN VIRAMA 11357 ; Right # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Right # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8 ; Right # Mc TULU-TIGALARI VOWEL SIGN AA +113C9..113CA ; Right # Mc [2] TULU-TIGALARI AU LENGTH MARK..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Right # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Right # Mc TULU-TIGALARI SIGN LOOPED VIRAMA 11435 ; Right # Mc NEWA VOWEL SIGN AA 11437 ; Right # Mc NEWA VOWEL SIGN II 11440..11441 ; Right # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -338,6 +345,8 @@ AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 111CE ; Left # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E 112E1 ; Left # Mc KHUDAWADI VOWEL SIGN I 11347..11348 ; Left # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +113C2 ; Left # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Left # Mc TULU-TIGALARI VOWEL SIGN AI 11436 ; Left # Mc NEWA VOWEL SIGN I 114B1 ; Left # Mc TIRHUTA VOWEL SIGN I 114B9 ; Left # Mc TIRHUTA VOWEL SIGN E @@ -384,6 +393,7 @@ AABB..AABC ; Visual_Order_Left # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL 17C4..17C5 ; Left_And_Right # Mc [2] KHMER VOWEL SIGN OO..KHMER VOWEL SIGN AU 1B40..1B41 ; Left_And_Right # Mc [2] BALINESE VOWEL SIGN TALING TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG 1134B..1134C ; Left_And_Right # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +113C7..113C8 ; Left_And_Right # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BC ; Left_And_Right # Mc TIRHUTA VOWEL SIGN O 114BE ; Left_And_Right # Mc TIRHUTA VOWEL SIGN AU 115BA ; Left_And_Right # Mc SIDDHAM VOWEL SIGN O @@ -546,6 +556,9 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11340 ; Top # Mn GRANTHA VOWEL SIGN II 11366..1136C ; Top # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Top # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Top # Mn TULU-TIGALARI SIGN VIRAMA +113D1 ; Top # Lo TULU-TIGALARI REPHA +113E1 ; Top # Mn TULU-TIGALARI VEDIC TONE SVARITA 1143E..1143F ; Top # Mn [2] NEWA VOWEL SIGN E..NEWA VOWEL SIGN AI 11443..11444 ; Top # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA 1145E ; Top # Mn NEWA SANDHI MARK @@ -705,6 +718,8 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 112E3..112E4 ; Bottom # Mn [2] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN UU 112E9..112EA ; Bottom # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA 1133B..1133C ; Bottom # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +113BB..113C0 ; Bottom # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113E2 ; Bottom # Mn TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143D ; Bottom # Mn [6] NEWA VOWEL SIGN U..NEWA VOWEL SIGN VOCALIC LL 11442 ; Bottom # Mn NEWA SIGN VIRAMA 11446 ; Bottom # Mn NEWA SIGN NUKTA @@ -764,6 +779,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 1B43 ; Top_And_Right # Mc BALINESE VOWEL SIGN PEPET TEDUNG 111BF ; Top_And_Right # Mc SHARADA VOWEL SIGN AU 11232..11233 ; Top_And_Right # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +113B9..113BA ; Top_And_Right # Mc [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II # Indic_Positional_Category=Top_And_Left diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 455b67ffa..5e33cafd5 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-11-10, 01:48:11 GMT +# Date: 2023-11-10, 20:38:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -43,7 +43,7 @@ # Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, # Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, # Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, -# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and +# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, Tulu-Tigalari, and # Zanabazar Square. # # All characters for all other scripts not in that list @@ -119,6 +119,8 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11300..11301 ; Bindu # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU 11302 ; Bindu # Mc GRANTHA SIGN ANUSVARA 1135E..1135F ; Bindu # Lo [2] GRANTHA LETTER VEDIC ANUSVARA..GRANTHA LETTER VEDIC DOUBLE ANUSVARA +113CA ; Bindu # Mc TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC ; Bindu # Mc TULU-TIGALARI SIGN ANUSVARA 11443..11444 ; Bindu # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA 1145F ; Bindu # Lo NEWA LETTER VEDIC ANUSVARA 114BF..114C0 ; Bindu # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA @@ -170,6 +172,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 11102 ; Visarga # Mn CHAKMA SIGN VISARGA 11182 ; Visarga # Mc SHARADA SIGN VISARGA 11303 ; Visarga # Mc GRANTHA SIGN VISARGA +113CD ; Visarga # Mc TULU-TIGALARI SIGN VISARGA 11445 ; Visarga # Mc NEWA SIGN VISARGA 114C1 ; Visarga # Mc TIRHUTA SIGN VISARGA 115BE ; Visarga # Mc SIDDHAM SIGN VISARGA @@ -205,6 +208,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 1BBA ; Avagraha # Lo SUNDANESE AVAGRAHA 111C1 ; Avagraha # Lo SHARADA SIGN AVAGRAHA 1133D ; Avagraha # Lo GRANTHA SIGN AVAGRAHA +113B7 ; Avagraha # Lo TULU-TIGALARI SIGN AVAGRAHA 11447 ; Avagraha # Lo NEWA SIGN AVAGRAHA 114C4 ; Avagraha # Lo TIRHUTA SIGN AVAGRAHA 119E1 ; Avagraha # Lo NANDINAGARI SIGN AVAGRAHA @@ -322,6 +326,8 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 11070 ; Pure_Killer # Mn BRAHMI SIGN OLD TAMIL VIRAMA 11134 ; Pure_Killer # Mn CHAKMA MAAYYAA 112EA ; Pure_Killer # Mn KHUDAWADI SIGN VIRAMA +113CE ; Pure_Killer # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Pure_Killer # Mc TULU-TIGALARI SIGN LOOPED VIRAMA 1172B ; Pure_Killer # Mn AHOM SIGN KILLER 1193D ; Pure_Killer # Mc DIVES AKURU SIGN HALANTA 11A34 ; Pure_Killer # Mn ZANABAZAR SQUARE SIGN VIRAMA @@ -349,6 +355,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK AAF6 ; Invisible_Stacker # Mn MEETEI MAYEK VIRAMA 10A3F ; Invisible_Stacker # Mn KHAROSHTHI VIRAMA 11133 ; Invisible_Stacker # Mn CHAKMA VIRAMA +113D0 ; Invisible_Stacker # Mn TULU-TIGALARI CONJOINER 1193E ; Invisible_Stacker # Mn DIVES AKURU VIRAMA 11A47 ; Invisible_Stacker # Mn ZANABAZAR SQUARE SUBJOINER 11A99 ; Invisible_Stacker # Mn SOYOMBO SUBJOINER @@ -432,6 +439,10 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 1130F..11310 ; Vowel_Independent # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI 11313..11314 ; Vowel_Independent # Lo [2] GRANTHA LETTER OO..GRANTHA LETTER AU 11360..11361 ; Vowel_Independent # Lo [2] GRANTHA LETTER VOCALIC RR..GRANTHA LETTER VOCALIC LL +11380..11389 ; Vowel_Independent # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Vowel_Independent # Lo TULU-TIGALARI LETTER EE +1138E ; Vowel_Independent # Lo TULU-TIGALARI LETTER AI +11390..11391 ; Vowel_Independent # Lo [2] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER AU 11400..1140D ; Vowel_Independent # Lo [14] NEWA LETTER A..NEWA LETTER AU 11481..1148E ; Vowel_Independent # Lo [14] TIRHUTA LETTER A..TIRHUTA LETTER AU 11580..1158D ; Vowel_Independent # Lo [14] SIDDHAM LETTER A..SIDDHAM LETTER AU @@ -659,6 +670,11 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 1134B..1134C ; Vowel_Dependent # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11357 ; Vowel_Dependent # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Vowel_Dependent # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Vowel_Dependent # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Vowel_Dependent # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Vowel_Dependent # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Vowel_Dependent # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Vowel_Dependent # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK 11435..11437 ; Vowel_Dependent # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Vowel_Dependent # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Vowel_Dependent # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -906,6 +922,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 1132A..11330 ; Consonant # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA 11332..11333 ; Consonant # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; Consonant # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +11392..113B5 ; Consonant # Lo [36] TULU-TIGALARI LETTER KA..TULU-TIGALARI LETTER LLLA 1140E..11434 ; Consonant # Lo [39] NEWA LETTER KA..NEWA LETTER HA 1148F..114AF ; Consonant # Lo [33] TIRHUTA LETTER KA..TIRHUTA LETTER HA 1158E..115AE ; Consonant # Lo [33] SIDDHAM LETTER KA..SIDDHAM LETTER HA @@ -981,6 +998,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE # [Not derivable] 0D4E ; Consonant_Preceding_Repha # Lo MALAYALAM LETTER DOT REPH +113D1 ; Consonant_Preceding_Repha # Lo TULU-TIGALARI REPHA 11941 ; Consonant_Preceding_Repha # Lo DIVES AKURU INITIAL RA 11D46 ; Consonant_Preceding_Repha # Lo MASARAM GONDI REPHA 11F02 ; Consonant_Preceding_Repha # Lo KAWI SIGN REPHA @@ -1189,6 +1207,7 @@ A8E0..A8F1 ; Cantillation_Mark # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..CO 1123E ; Cantillation_Mark # Mn KHOJKI SIGN SUKUN 11366..1136C ; Cantillation_Mark # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Cantillation_Mark # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113E1..113E2 ; Cantillation_Mark # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 835e639c8..2918fcf88 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-10, 01:48:11 GMT +# Date: 2023-11-10, 04:28:17 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2941,6 +2941,25 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11362..11363 ; CM # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; CM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; CM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; AS # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; AS # Lo TULU-TIGALARI LETTER EE +1138E ; AS # Lo TULU-TIGALARI LETTER AI +11390..11391 ; AS # Lo [2] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER AU +11392..113B5 ; AK # Lo [36] TULU-TIGALARI LETTER KA..TULU-TIGALARI LETTER LLLA +113B7 ; ID # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; CM # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; CM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; CM # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; CM # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; CM # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; CM # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; CM # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; CM # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; VI # Mn TULU-TIGALARI CONJOINER +113D1 ; AP # Lo TULU-TIGALARI REPHA +113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; CM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; AL # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; CM # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; CM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 80d9a56b2..e3e738408 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2023-10-26, 22:42:58 GMT +# Date: 2023-11-10, 04:28:20 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -15195,6 +15195,13 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 1112F;1112F;11132 11127;1112F;11132 11127; # (◌𑄯; ◌𑄯; ◌𑄲◌𑄧; ◌𑄯; ◌𑄲◌𑄧; ) CHAKMA VOWEL SIGN AU 1134B;1134B;11347 1133E;1134B;11347 1133E; # (𑍋; 𑍋; 𑍋; 𑍋; 𑍋; ) GRANTHA VOWEL SIGN OO 1134C;1134C;11347 11357;1134C;11347 11357; # (𑍌; 𑍌; 𑍌; 𑍌; 𑍌; ) GRANTHA VOWEL SIGN AU +11383;11383;11382 113C9;11383;11382 113C9; # (𑎃; 𑎃; 𑎃; 𑎃; 𑎃; ) TULU-TIGALARI LETTER II +11385;11385;11384 113BB;11385;11384 113BB; # (𑎅; 𑎅; 𑎄◌𑎻; 𑎅; 𑎄◌𑎻; ) TULU-TIGALARI LETTER UU +1138E;1138E;1138B 113C2;1138E;1138B 113C2; # (𑎎; 𑎎; 𑎎; 𑎎; 𑎎; ) TULU-TIGALARI LETTER AI +11391;11391;11390 113C9;11391;11390 113C9; # (𑎑; 𑎑; 𑎑; 𑎑; 𑎑; ) TULU-TIGALARI LETTER AU +113C5;113C5;113C2 113C2;113C5;113C2 113C2; # (𑏅; 𑏅; 𑏅; 𑏅; 𑏅; ) TULU-TIGALARI VOWEL SIGN AI +113C7;113C7;113C2 113B8;113C7;113C2 113B8; # (𑏇; 𑏇; 𑏇; 𑏇; 𑏇; ) TULU-TIGALARI VOWEL SIGN OO +113C8;113C8;113C2 113C9;113C8;113C2 113C9; # (𑏈; 𑏈; 𑏈; 𑏈; 𑏈; ) TULU-TIGALARI VOWEL SIGN AU 114BB;114BB;114B9 114BA;114BB;114B9 114BA; # (𑒻; 𑒻; 𑒹◌𑒺; 𑒻; 𑒹◌𑒺; ) TIRHUTA VOWEL SIGN AI 114BC;114BC;114B9 114B0;114BC;114B9 114B0; # (𑒼; 𑒼; 𑒼; 𑒼; 𑒼; ) TIRHUTA VOWEL SIGN O 114BE;114BE;114B9 114BD;114BE;114B9 114BD; # (𑒾; 𑒾; 𑒾; 𑒾; 𑒾; ) TIRHUTA VOWEL SIGN AU @@ -18711,6 +18718,12 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 11373 0315 0300 05AE 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062; # (a◌𑍳◌̕◌̀◌֮b; a◌֮◌𑍳◌̀◌̕b; a◌֮◌𑍳◌̀◌̕b; a◌֮◌𑍳◌̀◌̕b; a◌֮◌𑍳◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRANTHA LETTER VI, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 11374 0062;00E0 05AE 11374 0315 0062;0061 05AE 0300 11374 0315 0062;00E0 05AE 11374 0315 0062;0061 05AE 0300 11374 0315 0062; # (a◌̕◌̀◌֮◌𑍴b; à◌֮◌𑍴◌̕b; a◌֮◌̀◌𑍴◌̕b; à◌֮◌𑍴◌̕b; a◌֮◌̀◌𑍴◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GRANTHA LETTER PA, LATIN SMALL LETTER B 0061 11374 0315 0300 05AE 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062; # (a◌𑍴◌̕◌̀◌֮b; a◌֮◌𑍴◌̀◌̕b; a◌֮◌𑍴◌̀◌̕b; a◌֮◌𑍴◌̀◌̕b; a◌֮◌𑍴◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRANTHA LETTER PA, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 05B0 094D 3099 113CE 0062;0061 3099 094D 113CE 05B0 0062;0061 3099 094D 113CE 05B0 0062;0061 3099 094D 113CE 05B0 0062;0061 3099 094D 113CE 05B0 0062; # (a◌ְ◌्◌゙◌𑏎b; a◌゙◌्◌𑏎◌ְb; a◌゙◌्◌𑏎◌ְb; a◌゙◌्◌𑏎◌ְb; a◌゙◌्◌𑏎◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, TULU-TIGALARI SIGN VIRAMA, LATIN SMALL LETTER B +0061 113CE 05B0 094D 3099 0062;0061 3099 113CE 094D 05B0 0062;0061 3099 113CE 094D 05B0 0062;0061 3099 113CE 094D 05B0 0062;0061 3099 113CE 094D 05B0 0062; # (a◌𑏎◌ְ◌्◌゙b; a◌゙◌𑏎◌्◌ְb; a◌゙◌𑏎◌्◌ְb; a◌゙◌𑏎◌्◌ְb; a◌゙◌𑏎◌्◌ְb; ) LATIN SMALL LETTER A, TULU-TIGALARI SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 113CF 0062;0061 3099 094D 113CF 05B0 0062;0061 3099 094D 113CF 05B0 0062;0061 3099 094D 113CF 05B0 0062;0061 3099 094D 113CF 05B0 0062; # (a◌ְ◌्◌゙𑏏b; a◌゙◌्𑏏◌ְb; a◌゙◌्𑏏◌ְb; a◌゙◌्𑏏◌ְb; a◌゙◌्𑏏◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, TULU-TIGALARI SIGN LOOPED VIRAMA, LATIN SMALL LETTER B +0061 113CF 05B0 094D 3099 0062;0061 3099 113CF 094D 05B0 0062;0061 3099 113CF 094D 05B0 0062;0061 3099 113CF 094D 05B0 0062;0061 3099 113CF 094D 05B0 0062; # (a𑏏◌ְ◌्◌゙b; a◌゙𑏏◌्◌ְb; a◌゙𑏏◌्◌ְb; a◌゙𑏏◌्◌ְb; a◌゙𑏏◌्◌ְb; ) LATIN SMALL LETTER A, TULU-TIGALARI SIGN LOOPED VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 113D0 0062;0061 3099 094D 113D0 05B0 0062;0061 3099 094D 113D0 05B0 0062;0061 3099 094D 113D0 05B0 0062;0061 3099 094D 113D0 05B0 0062; # (a◌ְ◌्◌゙◌𑏐b; a◌゙◌्◌𑏐◌ְb; a◌゙◌्◌𑏐◌ְb; a◌゙◌्◌𑏐◌ְb; a◌゙◌्◌𑏐◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, TULU-TIGALARI CONJOINER, LATIN SMALL LETTER B +0061 113D0 05B0 094D 3099 0062;0061 3099 113D0 094D 05B0 0062;0061 3099 113D0 094D 05B0 0062;0061 3099 113D0 094D 05B0 0062;0061 3099 113D0 094D 05B0 0062; # (a◌𑏐◌ְ◌्◌゙b; a◌゙◌𑏐◌्◌ְb; a◌゙◌𑏐◌्◌ְb; a◌゙◌𑏐◌्◌ְb; a◌゙◌𑏐◌्◌ְb; ) LATIN SMALL LETTER A, TULU-TIGALARI CONJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 05B0 094D 3099 11442 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062; # (a◌ְ◌्◌゙◌𑑂b; a◌゙◌्◌𑑂◌ְb; a◌゙◌्◌𑑂◌ְb; a◌゙◌्◌𑑂◌ְb; a◌゙◌्◌𑑂◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, NEWA SIGN VIRAMA, LATIN SMALL LETTER B 0061 11442 05B0 094D 3099 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062; # (a◌𑑂◌ְ◌्◌゙b; a◌゙◌𑑂◌्◌ְb; a◌゙◌𑑂◌्◌ְb; a◌゙◌𑑂◌्◌ְb; a◌゙◌𑑂◌्◌ְb; ) LATIN SMALL LETTER A, NEWA SIGN VIRAMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 3099 093C 16FF0 11446 0062;0061 16FF0 093C 11446 3099 0062;0061 16FF0 093C 11446 3099 0062;0061 16FF0 093C 11446 3099 0062;0061 16FF0 093C 11446 3099 0062; # (a◌゙◌𖿰़◌𑑆b; a𖿰◌़◌𑑆◌゙b; a𖿰◌़◌𑑆◌゙b; a𖿰◌़◌𑑆◌゙b; a𖿰◌़◌𑑆◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, NEWA SIGN NUKTA, LATIN SMALL LETTER B @@ -19176,6 +19189,13 @@ D750 0334 11B5;D750 0334 11B5;1112 1173 0334 11B5;D750 0334 11B5;1112 1173 0334 11132 0334 11127;11132 0334 11127;11132 0334 11127;11132 0334 11127;11132 0334 11127; # (◌𑄲◌̴◌𑄧; ◌𑄲◌̴◌𑄧; ◌𑄲◌̴◌𑄧; ◌𑄲◌̴◌𑄧; ◌𑄲◌̴◌𑄧; ) CHAKMA AU MARK, COMBINING TILDE OVERLAY, CHAKMA VOWEL SIGN A 11347 0334 1133E;11347 0334 1133E;11347 0334 1133E;11347 0334 1133E;11347 0334 1133E; # (𑍇◌̴𑌾; 𑍇◌̴𑌾; 𑍇◌̴𑌾; 𑍇◌̴𑌾; 𑍇◌̴𑌾; ) GRANTHA VOWEL SIGN EE, COMBINING TILDE OVERLAY, GRANTHA VOWEL SIGN AA 11347 0334 11357;11347 0334 11357;11347 0334 11357;11347 0334 11357;11347 0334 11357; # (𑍇◌̴𑍗; 𑍇◌̴𑍗; 𑍇◌̴𑍗; 𑍇◌̴𑍗; 𑍇◌̴𑍗; ) GRANTHA VOWEL SIGN EE, COMBINING TILDE OVERLAY, GRANTHA AU LENGTH MARK +11382 0334 113C9;11382 0334 113C9;11382 0334 113C9;11382 0334 113C9;11382 0334 113C9; # (𑎂◌̴𑏉; 𑎂◌̴𑏉; 𑎂◌̴𑏉; 𑎂◌̴𑏉; 𑎂◌̴𑏉; ) TULU-TIGALARI LETTER I, COMBINING TILDE OVERLAY, TULU-TIGALARI AU LENGTH MARK +11384 0334 113BB;11384 0334 113BB;11384 0334 113BB;11384 0334 113BB;11384 0334 113BB; # (𑎄◌̴◌𑎻; 𑎄◌̴◌𑎻; 𑎄◌̴◌𑎻; 𑎄◌̴◌𑎻; 𑎄◌̴◌𑎻; ) TULU-TIGALARI LETTER U, COMBINING TILDE OVERLAY, TULU-TIGALARI VOWEL SIGN U +1138B 0334 113C2;1138B 0334 113C2;1138B 0334 113C2;1138B 0334 113C2;1138B 0334 113C2; # (𑎋◌̴𑏂; 𑎋◌̴𑏂; 𑎋◌̴𑏂; 𑎋◌̴𑏂; 𑎋◌̴𑏂; ) TULU-TIGALARI LETTER EE, COMBINING TILDE OVERLAY, TULU-TIGALARI VOWEL SIGN EE +11390 0334 113C9;11390 0334 113C9;11390 0334 113C9;11390 0334 113C9;11390 0334 113C9; # (𑎐◌̴𑏉; 𑎐◌̴𑏉; 𑎐◌̴𑏉; 𑎐◌̴𑏉; 𑎐◌̴𑏉; ) TULU-TIGALARI LETTER OO, COMBINING TILDE OVERLAY, TULU-TIGALARI AU LENGTH MARK +113C2 0334 113B8;113C2 0334 113B8;113C2 0334 113B8;113C2 0334 113B8;113C2 0334 113B8; # (𑏂◌̴𑎸; 𑏂◌̴𑎸; 𑏂◌̴𑎸; 𑏂◌̴𑎸; 𑏂◌̴𑎸; ) TULU-TIGALARI VOWEL SIGN EE, COMBINING TILDE OVERLAY, TULU-TIGALARI VOWEL SIGN AA +113C2 0334 113C2;113C2 0334 113C2;113C2 0334 113C2;113C2 0334 113C2;113C2 0334 113C2; # (𑏂◌̴𑏂; 𑏂◌̴𑏂; 𑏂◌̴𑏂; 𑏂◌̴𑏂; 𑏂◌̴𑏂; ) TULU-TIGALARI VOWEL SIGN EE, COMBINING TILDE OVERLAY, TULU-TIGALARI VOWEL SIGN EE +113C2 0334 113C9;113C2 0334 113C9;113C2 0334 113C9;113C2 0334 113C9;113C2 0334 113C9; # (𑏂◌̴𑏉; 𑏂◌̴𑏉; 𑏂◌̴𑏉; 𑏂◌̴𑏉; 𑏂◌̴𑏉; ) TULU-TIGALARI VOWEL SIGN EE, COMBINING TILDE OVERLAY, TULU-TIGALARI AU LENGTH MARK 114B9 0334 114B0;114B9 0334 114B0;114B9 0334 114B0;114B9 0334 114B0;114B9 0334 114B0; # (𑒹◌̴𑒰; 𑒹◌̴𑒰; 𑒹◌̴𑒰; 𑒹◌̴𑒰; 𑒹◌̴𑒰; ) TIRHUTA VOWEL SIGN E, COMBINING TILDE OVERLAY, TIRHUTA VOWEL SIGN AA 114B9 0334 114BA;114B9 0334 114BA;114B9 0334 114BA;114B9 0334 114BA;114B9 0334 114BA; # (𑒹◌̴◌𑒺; 𑒹◌̴◌𑒺; 𑒹◌̴◌𑒺; 𑒹◌̴◌𑒺; 𑒹◌̴◌𑒺; ) TIRHUTA VOWEL SIGN E, COMBINING TILDE OVERLAY, TIRHUTA VOWEL SIGN SHORT E 114B9 0334 114BD;114B9 0334 114BD;114B9 0334 114BD;114B9 0334 114BD;114B9 0334 114BD; # (𑒹◌̴𑒽; 𑒹◌̴𑒽; 𑒹◌̴𑒽; 𑒹◌̴𑒽; 𑒹◌̴𑒽; ) TIRHUTA VOWEL SIGN E, COMBINING TILDE OVERLAY, TIRHUTA VOWEL SIGN SHORT O diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ce1232123..d97429463 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-11-10, 01:58:50 GMT +# Date: 2023-11-10, 04:28:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -737,6 +737,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Other_Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Other_Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Other_Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Other_Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA 11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -841,7 +847,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1461 +# Total code points: 1478 # ================================================ @@ -1068,6 +1074,9 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA 11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Diacritic # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Diacritic # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113E1..113E2 ; Diacritic # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11442 ; Diacritic # Mn NEWA SIGN VIRAMA 11446 ; Diacritic # Mn NEWA SIGN NUKTA 114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA @@ -1114,7 +1123,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1153 +# Total code points: 1157 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 77fff842d..9c1fd6808 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2023-10-26, 22:43:02 GMT +# Date: 2023-11-10, 04:28:24 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -470,6 +470,7 @@ blk; Tirhuta ; Tirhuta blk; Todhri ; Todhri blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; Tulu_Tigalari ; Tulu_Tigalari blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A @@ -1454,6 +1455,7 @@ sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa sc ; Todr ; Todhri sc ; Toto ; Toto +sc ; Tutg ; Tulu_Tigalari sc ; Ugar ; Ugaritic sc ; Vaii ; Vai sc ; Vith ; Vithkuqi diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index bcecfd3d0..794f17240 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-10, 01:48:40 GMT +# Date: 2023-11-10, 04:28:43 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3088,4 +3088,27 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 52 +# ================================================ + +11380..11389 ; Tulu_Tigalari # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Tulu_Tigalari # Lo TULU-TIGALARI LETTER EE +1138E ; Tulu_Tigalari # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Tulu_Tigalari # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Tulu_Tigalari # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; Tulu_Tigalari # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Tulu_Tigalari # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Tulu_Tigalari # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Tulu_Tigalari # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Tulu_Tigalari # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Tulu_Tigalari # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; Tulu_Tigalari # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Tulu_Tigalari # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Tulu_Tigalari # Mn TULU-TIGALARI CONJOINER +113D1 ; Tulu_Tigalari # Lo TULU-TIGALARI REPHA +113D4..113D5 ; Tulu_Tigalari # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Tulu_Tigalari # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; Tulu_Tigalari # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA + +# Total code points: 78 + # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ac5a58311..8d10fd3ae 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -20405,6 +20405,84 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11372;COMBINING GRANTHA LETTER NA;Mn;230;NSM;;;;;N;;;;; 11373;COMBINING GRANTHA LETTER VI;Mn;230;NSM;;;;;N;;;;; 11374;COMBINING GRANTHA LETTER PA;Mn;230;NSM;;;;;N;;;;; +11380;TULU-TIGALARI LETTER A;Lo;0;L;;;;;N;;;;; +11381;TULU-TIGALARI LETTER AA;Lo;0;L;;;;;N;;;;; +11382;TULU-TIGALARI LETTER I;Lo;0;L;;;;;N;;;;; +11383;TULU-TIGALARI LETTER II;Lo;0;L;11382 113C9;;;;N;;;;; +11384;TULU-TIGALARI LETTER U;Lo;0;L;;;;;N;;;;; +11385;TULU-TIGALARI LETTER UU;Lo;0;L;11384 113BB;;;;N;;;;; +11386;TULU-TIGALARI LETTER VOCALIC R;Lo;0;L;;;;;N;;;;; +11387;TULU-TIGALARI LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; +11388;TULU-TIGALARI LETTER VOCALIC L;Lo;0;L;;;;;N;;;;; +11389;TULU-TIGALARI LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; +1138B;TULU-TIGALARI LETTER EE;Lo;0;L;;;;;N;;;;; +1138E;TULU-TIGALARI LETTER AI;Lo;0;L;1138B 113C2;;;;N;;;;; +11390;TULU-TIGALARI LETTER OO;Lo;0;L;;;;;N;;;;; +11391;TULU-TIGALARI LETTER AU;Lo;0;L;11390 113C9;;;;N;;;;; +11392;TULU-TIGALARI LETTER KA;Lo;0;L;;;;;N;;;;; +11393;TULU-TIGALARI LETTER KHA;Lo;0;L;;;;;N;;;;; +11394;TULU-TIGALARI LETTER GA;Lo;0;L;;;;;N;;;;; +11395;TULU-TIGALARI LETTER GHA;Lo;0;L;;;;;N;;;;; +11396;TULU-TIGALARI LETTER NGA;Lo;0;L;;;;;N;;;;; +11397;TULU-TIGALARI LETTER CA;Lo;0;L;;;;;N;;;;; +11398;TULU-TIGALARI LETTER CHA;Lo;0;L;;;;;N;;;;; +11399;TULU-TIGALARI LETTER JA;Lo;0;L;;;;;N;;;;; +1139A;TULU-TIGALARI LETTER JHA;Lo;0;L;;;;;N;;;;; +1139B;TULU-TIGALARI LETTER NYA;Lo;0;L;;;;;N;;;;; +1139C;TULU-TIGALARI LETTER TTA;Lo;0;L;;;;;N;;;;; +1139D;TULU-TIGALARI LETTER TTHA;Lo;0;L;;;;;N;;;;; +1139E;TULU-TIGALARI LETTER DDA;Lo;0;L;;;;;N;;;;; +1139F;TULU-TIGALARI LETTER DDHA;Lo;0;L;;;;;N;;;;; +113A0;TULU-TIGALARI LETTER NNA;Lo;0;L;;;;;N;;;;; +113A1;TULU-TIGALARI LETTER TA;Lo;0;L;;;;;N;;;;; +113A2;TULU-TIGALARI LETTER THA;Lo;0;L;;;;;N;;;;; +113A3;TULU-TIGALARI LETTER DA;Lo;0;L;;;;;N;;;;; +113A4;TULU-TIGALARI LETTER DHA;Lo;0;L;;;;;N;;;;; +113A5;TULU-TIGALARI LETTER NA;Lo;0;L;;;;;N;;;;; +113A6;TULU-TIGALARI LETTER PA;Lo;0;L;;;;;N;;;;; +113A7;TULU-TIGALARI LETTER PHA;Lo;0;L;;;;;N;;;;; +113A8;TULU-TIGALARI LETTER BA;Lo;0;L;;;;;N;;;;; +113A9;TULU-TIGALARI LETTER BHA;Lo;0;L;;;;;N;;;;; +113AA;TULU-TIGALARI LETTER MA;Lo;0;L;;;;;N;;;;; +113AB;TULU-TIGALARI LETTER YA;Lo;0;L;;;;;N;;;;; +113AC;TULU-TIGALARI LETTER RA;Lo;0;L;;;;;N;;;;; +113AD;TULU-TIGALARI LETTER LA;Lo;0;L;;;;;N;;;;; +113AE;TULU-TIGALARI LETTER VA;Lo;0;L;;;;;N;;;;; +113AF;TULU-TIGALARI LETTER SHA;Lo;0;L;;;;;N;;;;; +113B0;TULU-TIGALARI LETTER SSA;Lo;0;L;;;;;N;;;;; +113B1;TULU-TIGALARI LETTER SA;Lo;0;L;;;;;N;;;;; +113B2;TULU-TIGALARI LETTER HA;Lo;0;L;;;;;N;;;;; +113B3;TULU-TIGALARI LETTER LLA;Lo;0;L;;;;;N;;;;; +113B4;TULU-TIGALARI LETTER RRA;Lo;0;L;;;;;N;;;;; +113B5;TULU-TIGALARI LETTER LLLA;Lo;0;L;;;;;N;;;;; +113B7;TULU-TIGALARI SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;; +113B8;TULU-TIGALARI VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; +113B9;TULU-TIGALARI VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +113BA;TULU-TIGALARI VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +113BB;TULU-TIGALARI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +113BC;TULU-TIGALARI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +113BD;TULU-TIGALARI VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;; +113BE;TULU-TIGALARI VOWEL SIGN VOCALIC RR;Mn;0;NSM;;;;;N;;;;; +113BF;TULU-TIGALARI VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;; +113C0;TULU-TIGALARI VOWEL SIGN VOCALIC LL;Mn;0;NSM;;;;;N;;;;; +113C2;TULU-TIGALARI VOWEL SIGN EE;Mc;0;L;;;;;N;;;;; +113C5;TULU-TIGALARI VOWEL SIGN AI;Mc;0;L;113C2 113C2;;;;N;;;;; +113C7;TULU-TIGALARI VOWEL SIGN OO;Mc;0;L;113C2 113B8;;;;N;;;;; +113C8;TULU-TIGALARI VOWEL SIGN AU;Mc;0;L;113C2 113C9;;;;N;;;;; +113C9;TULU-TIGALARI AU LENGTH MARK;Mc;0;L;;;;;N;;;;; +113CA;TULU-TIGALARI SIGN CANDRA ANUNASIKA;Mc;0;L;;;;;N;;;;; +113CC;TULU-TIGALARI SIGN ANUSVARA;Mc;0;L;;;;;N;;;;; +113CD;TULU-TIGALARI SIGN VISARGA;Mc;0;L;;;;;N;;;;; +113CE;TULU-TIGALARI SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +113CF;TULU-TIGALARI SIGN LOOPED VIRAMA;Mc;9;L;;;;;N;;;;; +113D0;TULU-TIGALARI CONJOINER;Mn;9;NSM;;;;;N;;;;; +113D1;TULU-TIGALARI REPHA;Lo;0;L;;;;;N;;;;; +113D4;TULU-TIGALARI DANDA;Po;0;L;;;;;N;;;;; +113D5;TULU-TIGALARI DOUBLE DANDA;Po;0;L;;;;;N;;;;; +113D7;TULU-TIGALARI SIGN OM PUSHPIKA;Po;0;L;;;;;N;;;;; +113D8;TULU-TIGALARI SIGN SHRII PUSHPIKA;Po;0;L;;;;;N;;;;; +113E1;TULU-TIGALARI VEDIC TONE SVARITA;Mn;0;NSM;;;;;N;;;;; +113E2;TULU-TIGALARI VEDIC TONE ANUDATTA;Mn;0;NSM;;;;;N;;;;; 11400;NEWA LETTER A;Lo;0;L;;;;;N;;;;; 11401;NEWA LETTER AA;Lo;0;L;;;;;N;;;;; 11402;NEWA LETTER I;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 805cfd6a4..4a1854fa9 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-10, 01:48:44 GMT +# Date: 2023-11-10, 04:28:47 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1900,6 +1900,24 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11362..11363 ; R # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; R # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; R # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; R # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; R # Lo TULU-TIGALARI LETTER EE +1138E ; R # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; R # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; R # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; R # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; R # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; R # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; R # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; R # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; R # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; R # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; R # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; R # Mn TULU-TIGALARI CONJOINER +113D1 ; R # Lo TULU-TIGALARI REPHA +113D4..113D5 ; R # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; R # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; R # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; R # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; R # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; R # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index c65c8497c..aaa397355 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-11-10, 01:48:10 GMT +# Date: 2023-11-10, 04:28:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -27,6 +27,7 @@ 110BD ; Prepend # Cf KAITHI NUMBER SIGN 110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE 111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +113D1 ; Prepend # Lo TULU-TIGALARI REPHA 1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN 11941 ; Prepend # Lo DIVES AKURU INITIAL RA 11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA @@ -34,7 +35,7 @@ 11D46 ; Prepend # Lo MASARAM GONDI REPHA 11F02 ; Prepend # Lo KAWI SIGN REPHA -# Total code points: 27 +# Total code points: 28 # ================================================ @@ -359,6 +360,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11357 ; Extend # Mc GRANTHA AU LENGTH MARK 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; Extend # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Extend # Mn NEWA SIGN NUKTA @@ -463,7 +468,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2139 +# Total code points: 2149 # ================================================ @@ -590,6 +595,12 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI 1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA 11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; SpacingMark # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; SpacingMark # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; SpacingMark # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; SpacingMark # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; SpacingMark # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; SpacingMark # Mc TULU-TIGALARI SIGN LOOPED VIRAMA 11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU 11445 ; SpacingMark # Mc NEWA SIGN VISARGA @@ -640,7 +651,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 396 +# Total code points: 408 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 90517db87..150b00274 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-10, 01:48:41 GMT +# Date: 2023-11-10, 04:28:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -426,6 +426,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8..113BA ; Extend # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Extend # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Extend # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Extend # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -572,7 +582,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2560 +# Total code points: 2582 # ================================================ @@ -2413,6 +2423,12 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; OLetter # Lo GRANTHA SIGN AVAGRAHA 11350 ; OLetter # Lo GRANTHA OM 1135D..11361 ; OLetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; OLetter # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; OLetter # Lo TULU-TIGALARI LETTER EE +1138E ; OLetter # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; OLetter # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; OLetter # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; OLetter # Lo TULU-TIGALARI REPHA 11400..11434 ; OLetter # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; OLetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; OLetter # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -2562,7 +2578,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132832 +# Total code points: 132884 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 32297919e..dd210fcb7 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-10, 01:48:44 GMT +# Date: 2023-11-10, 04:28:47 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -462,6 +462,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8..113BA ; Extend # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Extend # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Extend # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Extend # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -609,7 +619,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2564 +# Total code points: 2586 # ================================================ @@ -1166,6 +1176,12 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; ALetter # Lo GRANTHA SIGN AVAGRAHA 11350 ; ALetter # Lo GRANTHA OM 1135D..11361 ; ALetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; ALetter # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ALetter # Lo TULU-TIGALARI LETTER EE +1138E ; ALetter # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ALetter # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ALetter # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; ALetter # Lo TULU-TIGALARI REPHA 11400..11434 ; ALetter # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; ALetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; ALetter # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -1333,7 +1349,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29716 +# Total code points: 29768 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 503a90d7a..39eb2965a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-10, 01:47:58 GMT +# Date: 2023-11-10, 04:28:05 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -902,6 +902,20 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11357 ; L # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; L # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; L # Lo TULU-TIGALARI LETTER EE +1138E ; L # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; L # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; L # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; L # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; L # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; L # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; L # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; L # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; L # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D1 ; L # Lo TULU-TIGALARI REPHA +113D4..113D5 ; L # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; L # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -1199,8 +1213,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 819485 code points not listed here. -# Total code points: 1095544 +# The above property value applies to 819407 code points not listed here. +# Total code points: 1095534 # ================================================ @@ -2285,6 +2299,10 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11340 ; NSM # Mn GRANTHA VOWEL SIGN II 11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; NSM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; NSM # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; NSM # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; NSM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; NSM # Mn NEWA SIGN NUKTA @@ -2381,7 +2399,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2002 +# Total code points: 2012 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index ea14d2d3d..2bc9650ba 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-10, 01:48:01 GMT +# Date: 2023-11-10, 04:28:08 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1581,6 +1581,21 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11357 ; 0 # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; 0 # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; 0 # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; 0 # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; 0 # Lo TULU-TIGALARI LETTER EE +1138E ; 0 # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; 0 # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; 0 # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; 0 # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; 0 # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; 0 # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; 0 # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; 0 # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; 0 # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113D1 ; 0 # Lo TULU-TIGALARI REPHA +113D4..113D5 ; 0 # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; 0 # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; 0 # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; 0 # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; 0 # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; 0 # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -2036,8 +2051,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 825715 code points not listed here. -# Total code points: 1113182 +# The above property value applies to 825637 code points not listed here. +# Total code points: 1113179 # ================================================ @@ -2152,6 +2167,9 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 11235 ; 9 # Mc KHOJKI SIGN VIRAMA 112EA ; 9 # Mn KHUDAWADI SIGN VIRAMA 1134D ; 9 # Mc GRANTHA SIGN VIRAMA +113CE ; 9 # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; 9 # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; 9 # Mn TULU-TIGALARI CONJOINER 11442 ; 9 # Mn NEWA SIGN VIRAMA 114C2 ; 9 # Mn TIRHUTA SIGN VIRAMA 115BF ; 9 # Mn SIDDHAM SIGN VIRAMA @@ -2171,7 +2189,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 11F41 ; 9 # Mc KAWI SIGN KILLER 11F42 ; 9 # Mn KAWI CONJOINER -# Total code points: 65 +# Total code points: 68 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index 4734c4d04..a52989f81 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ # DerivedDecompositionType-16.0.0.txt -# Date: 2023-11-06, 03:48:54 GMT +# Date: 2023-11-10, 04:28:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -256,6 +256,12 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 110AB ; Canonical # Lo KAITHI LETTER VA 1112E..1112F ; Canonical # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; Canonical # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; Canonical # Lo TULU-TIGALARI LETTER II +11385 ; Canonical # Lo TULU-TIGALARI LETTER UU +1138E ; Canonical # Lo TULU-TIGALARI LETTER AI +11391 ; Canonical # Lo TULU-TIGALARI LETTER AU +113C5 ; Canonical # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; Canonical # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; Canonical # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; Canonical # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Canonical # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU @@ -265,7 +271,7 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13238 +# Total code points: 13245 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 86508516a..bcc77852c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-10, 01:48:04 GMT +# Date: 2023-11-10, 04:28:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1600,6 +1600,24 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11362..11363 ; N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; N # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; N # Lo TULU-TIGALARI LETTER EE +1138E ; N # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; N # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; N # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; N # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; N # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; N # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; N # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; N # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; N # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; N # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; N # Mn TULU-TIGALARI CONJOINER +113D1 ; N # Lo TULU-TIGALARI REPHA +113D4..113D5 ; N # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; N # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; N # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; N # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -2076,7 +2094,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765233 code points not listed here. +# The above property value applies to 765155 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 97180eda6..f89d39ae7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-10, 01:48:04 GMT +# Date: 2023-11-10, 04:28:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -480,7 +480,19 @@ FFFE..FFFF ; Cn # [2] .. 11358..1135C ; Cn # [5] .. 11364..11365 ; Cn # [2] .. 1136D..1136F ; Cn # [3] .. -11375..113FF ; Cn # [139] .. +11375..1137F ; Cn # [11] .. +1138A ; Cn # +1138C..1138D ; Cn # [2] .. +1138F ; Cn # +113B6 ; Cn # +113C1 ; Cn # +113C3..113C4 ; Cn # [2] .. +113C6 ; Cn # +113CB ; Cn # +113D2..113D3 ; Cn # [2] .. +113D6 ; Cn # +113D9..113E0 ; Cn # [8] .. +113E3..113FF ; Cn # [29] .. 1145C ; Cn # 11462..1147F ; Cn # [30] .. 114C8..114CF ; Cn # [8] .. @@ -735,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 823667 +# Total code points: 823589 # ================================================ @@ -2551,6 +2563,12 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1133D ; Lo # GRANTHA SIGN AVAGRAHA 11350 ; Lo # GRANTHA OM 1135D..11361 ; Lo # [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; Lo # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Lo # TULU-TIGALARI LETTER EE +1138E ; Lo # TULU-TIGALARI LETTER AI +11390..113B5 ; Lo # [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Lo # TULU-TIGALARI SIGN AVAGRAHA +113D1 ; Lo # TULU-TIGALARI REPHA 11400..11434 ; Lo # [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; Lo # [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; Lo # [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -2687,7 +2705,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132401 +# Total code points: 132453 # ================================================ @@ -2948,6 +2966,10 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11340 ; Mn # GRANTHA VOWEL SIGN II 11366..1136C ; Mn # [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Mn # [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Mn # [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Mn # TULU-TIGALARI SIGN VIRAMA +113D0 ; Mn # TULU-TIGALARI CONJOINER +113E1..113E2 ; Mn # [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Mn # [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Mn # [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Mn # NEWA SIGN NUKTA @@ -3044,7 +3066,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1994 +# Total code points: 2004 # ================================================ @@ -3196,6 +3218,12 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1134B..1134D ; Mc # [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA 11357 ; Mc # GRANTHA AU LENGTH MARK 11362..11363 ; Mc # [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Mc # [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; Mc # TULU-TIGALARI VOWEL SIGN EE +113C5 ; Mc # TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Mc # [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Mc # [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Mc # TULU-TIGALARI SIGN LOOPED VIRAMA 11435..11437 ; Mc # [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; Mc # [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU 11445 ; Mc # NEWA SIGN VISARGA @@ -3246,7 +3274,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 453 +# Total code points: 465 # ================================================ @@ -3876,6 +3904,8 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 111DD..111DF ; Po # [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 11238..1123D ; Po # [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN 112A9 ; Po # MULTANI SECTION MARK +113D4..113D5 ; Po # [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Po # [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 1144B..1144F ; Po # [5] NEWA DANDA..NEWA ABBREVIATION SIGN 1145A..1145B ; Po # [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK 1145D ; Po # NEWA INSERTION SIGN @@ -3912,7 +3942,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1E5FF ; Po # OL ONAL ABBREVIATION SIGN 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 633 +# Total code points: 637 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 72e3386c8..b5c781bb4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-11-10, 01:48:06 GMT +# Date: 2023-11-10, 04:28:12 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -472,6 +472,10 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 11340 ; T # Mn GRANTHA VOWEL SIGN II 11366..1136C ; T # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; T # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; T # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; T # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; T # Mn TULU-TIGALARI CONJOINER +113E1..113E2 ; T # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; T # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; T # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; T # Mn NEWA SIGN NUKTA @@ -574,6 +578,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2159 +# Total code points: 2169 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index d3bb1d2a2..125c89421 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-10, 01:48:06 GMT +# Date: 2023-11-10, 04:28:12 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 761787 code points not listed here. -# Total code points: 899255 +# The above property value applies to 761709 code points not listed here. +# Total code points: 899177 # ================================================ @@ -1754,6 +1754,9 @@ FFE3 ; ID # Sk FULLWIDTH MACRON FFE4 ; ID # So FULLWIDTH BROKEN BAR 11049..1104D ; ID # Po [5] BRAHMI PUNCTUATION DOT..BRAHMI PUNCTUATION LOTUS 11052..11065 ; ID # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +113B7 ; ID # Lo TULU-TIGALARI SIGN AVAGRAHA +113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 @@ -1849,7 +1852,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172422 +# Total code points: 172427 # ================================================ @@ -2225,6 +2228,15 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11362..11363 ; CM # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; CM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; CM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8..113BA ; CM # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; CM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; CM # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; CM # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; CM # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; CM # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; CM # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; CM # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113E1..113E2 ; CM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11435..11437 ; CM # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; CM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; CM # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -2364,7 +2376,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2439 +# Total code points: 2460 # ================================================ @@ -3848,6 +3860,7 @@ A984..A9B2 ; AK # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA 11332..11333 ; AK # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; AK # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA 11360..11361 ; AK # Lo [2] GRANTHA LETTER VOCALIC RR..GRANTHA LETTER VOCALIC LL +11392..113B5 ; AK # Lo [36] TULU-TIGALARI LETTER KA..TULU-TIGALARI LETTER LLLA 11900..11906 ; AK # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E 11909 ; AK # Lo DIVES AKURU LETTER O 1190C..11913 ; AK # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA @@ -3856,18 +3869,19 @@ A984..A9B2 ; AK # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA 11F04..11F10 ; AK # Lo [13] KAWI LETTER A..KAWI LETTER O 11F12..11F33 ; AK # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA -# Total code points: 293 +# Total code points: 329 # ================================================ # Line_Break=Aksara_Prebase 11003..11004 ; AP # Lo [2] BRAHMI SIGN JIHVAMULIYA..BRAHMI SIGN UPADHMANIYA +113D1 ; AP # Lo TULU-TIGALARI REPHA 1193F ; AP # Lo DIVES AKURU PREFIXED NASAL SIGN 11941 ; AP # Lo DIVES AKURU INITIAL RA 11F02 ; AP # Lo KAWI SIGN REPHA -# Total code points: 5 +# Total code points: 6 # ================================================ @@ -3881,11 +3895,15 @@ AA50..AA59 ; AS # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE 11066..1106F ; AS # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 11350 ; AS # Lo GRANTHA OM 1135E..1135F ; AS # Lo [2] GRANTHA LETTER VEDIC ANUSVARA..GRANTHA LETTER VEDIC DOUBLE ANUSVARA +11380..11389 ; AS # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; AS # Lo TULU-TIGALARI LETTER EE +1138E ; AS # Lo TULU-TIGALARI LETTER AI +11390..11391 ; AS # Lo [2] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER AU 11950..11959 ; AS # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE 11EE0..11EF1 ; AS # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11F50..11F59 ; AS # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE -# Total code points: 160 +# Total code points: 174 # ================================================ @@ -3895,10 +3913,11 @@ AA50..AA59 ; AS # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE A9C0 ; VI # Mc JAVANESE PANGKON 11046 ; VI # Mn BRAHMI VIRAMA 1134D ; VI # Mc GRANTHA SIGN VIRAMA +113D0 ; VI # Mn TULU-TIGALARI CONJOINER 1193E ; VI # Mn DIVES AKURU VIRAMA 11F42 ; VI # Mn KAWI CONJOINER -# Total code points: 6 +# Total code points: 7 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index cd80a4355..8e82c59c3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-10, 01:48:07 GMT +# Date: 2023-11-10, 04:28:12 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -31070,6 +31070,84 @@ FFFD ; REPLACEMENT CHARACTER 11372 ; COMBINING GRANTHA LETTER NA 11373 ; COMBINING GRANTHA LETTER VI 11374 ; COMBINING GRANTHA LETTER PA +11380 ; TULU-TIGALARI LETTER A +11381 ; TULU-TIGALARI LETTER AA +11382 ; TULU-TIGALARI LETTER I +11383 ; TULU-TIGALARI LETTER II +11384 ; TULU-TIGALARI LETTER U +11385 ; TULU-TIGALARI LETTER UU +11386 ; TULU-TIGALARI LETTER VOCALIC R +11387 ; TULU-TIGALARI LETTER VOCALIC RR +11388 ; TULU-TIGALARI LETTER VOCALIC L +11389 ; TULU-TIGALARI LETTER VOCALIC LL +1138B ; TULU-TIGALARI LETTER EE +1138E ; TULU-TIGALARI LETTER AI +11390 ; TULU-TIGALARI LETTER OO +11391 ; TULU-TIGALARI LETTER AU +11392 ; TULU-TIGALARI LETTER KA +11393 ; TULU-TIGALARI LETTER KHA +11394 ; TULU-TIGALARI LETTER GA +11395 ; TULU-TIGALARI LETTER GHA +11396 ; TULU-TIGALARI LETTER NGA +11397 ; TULU-TIGALARI LETTER CA +11398 ; TULU-TIGALARI LETTER CHA +11399 ; TULU-TIGALARI LETTER JA +1139A ; TULU-TIGALARI LETTER JHA +1139B ; TULU-TIGALARI LETTER NYA +1139C ; TULU-TIGALARI LETTER TTA +1139D ; TULU-TIGALARI LETTER TTHA +1139E ; TULU-TIGALARI LETTER DDA +1139F ; TULU-TIGALARI LETTER DDHA +113A0 ; TULU-TIGALARI LETTER NNA +113A1 ; TULU-TIGALARI LETTER TA +113A2 ; TULU-TIGALARI LETTER THA +113A3 ; TULU-TIGALARI LETTER DA +113A4 ; TULU-TIGALARI LETTER DHA +113A5 ; TULU-TIGALARI LETTER NA +113A6 ; TULU-TIGALARI LETTER PA +113A7 ; TULU-TIGALARI LETTER PHA +113A8 ; TULU-TIGALARI LETTER BA +113A9 ; TULU-TIGALARI LETTER BHA +113AA ; TULU-TIGALARI LETTER MA +113AB ; TULU-TIGALARI LETTER YA +113AC ; TULU-TIGALARI LETTER RA +113AD ; TULU-TIGALARI LETTER LA +113AE ; TULU-TIGALARI LETTER VA +113AF ; TULU-TIGALARI LETTER SHA +113B0 ; TULU-TIGALARI LETTER SSA +113B1 ; TULU-TIGALARI LETTER SA +113B2 ; TULU-TIGALARI LETTER HA +113B3 ; TULU-TIGALARI LETTER LLA +113B4 ; TULU-TIGALARI LETTER RRA +113B5 ; TULU-TIGALARI LETTER LLLA +113B7 ; TULU-TIGALARI SIGN AVAGRAHA +113B8 ; TULU-TIGALARI VOWEL SIGN AA +113B9 ; TULU-TIGALARI VOWEL SIGN I +113BA ; TULU-TIGALARI VOWEL SIGN II +113BB ; TULU-TIGALARI VOWEL SIGN U +113BC ; TULU-TIGALARI VOWEL SIGN UU +113BD ; TULU-TIGALARI VOWEL SIGN VOCALIC R +113BE ; TULU-TIGALARI VOWEL SIGN VOCALIC RR +113BF ; TULU-TIGALARI VOWEL SIGN VOCALIC L +113C0 ; TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; TULU-TIGALARI VOWEL SIGN EE +113C5 ; TULU-TIGALARI VOWEL SIGN AI +113C7 ; TULU-TIGALARI VOWEL SIGN OO +113C8 ; TULU-TIGALARI VOWEL SIGN AU +113C9 ; TULU-TIGALARI AU LENGTH MARK +113CA ; TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC ; TULU-TIGALARI SIGN ANUSVARA +113CD ; TULU-TIGALARI SIGN VISARGA +113CE ; TULU-TIGALARI SIGN VIRAMA +113CF ; TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; TULU-TIGALARI CONJOINER +113D1 ; TULU-TIGALARI REPHA +113D4 ; TULU-TIGALARI DANDA +113D5 ; TULU-TIGALARI DOUBLE DANDA +113D7 ; TULU-TIGALARI SIGN OM PUSHPIKA +113D8 ; TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1 ; TULU-TIGALARI VEDIC TONE SVARITA +113E2 ; TULU-TIGALARI VEDIC TONE ANUDATTA 11400 ; NEWA LETTER A 11401 ; NEWA LETTER AA 11402 ; NEWA LETTER I @@ -45227,6 +45305,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150864 +# Total code points: 150942 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 9712ab6f4..13779ae05 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -495,6 +495,7 @@ public enum Block_Values implements Named { Todhri("Todhri"), Toto("Toto"), Transport_And_Map_Symbols("Transport_And_Map"), + Tulu_Tigalari("Tulu_Tigalari"), Unified_Canadian_Aboriginal_Syllabics("UCAS", "Canadian_Syllabics"), Unified_Canadian_Aboriginal_Syllabics_Extended("UCAS_Ext"), Unified_Canadian_Aboriginal_Syllabics_Extended_A("UCAS_Ext_A"), @@ -1863,6 +1864,7 @@ public enum Script_Values implements Named { Tangsa("Tnsa"), Todhri("Todr"), Toto("Toto"), + Tulu_Tigalari("Tutg"), Ugaritic("Ugar"), Vai("Vaii"), Vithkuqi("Vith"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 04e0c1339..07f528d8e 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -977,7 +977,7 @@ File: IndicPositionalCategory # Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, # Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, # Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, -# Tibetan, Tirhuta, and Zanabazar Square. +# Tibetan, Tirhuta, Tulu-Tigalari, and Zanabazar Square. # # All characters for all other scripts not in that list # take the default value for this property. @@ -1012,6 +1012,9 @@ File: IndicPositionalCategory # contextually variable placement in Myanmar. # * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have # contextually variable placement in Tai Tham. +# * U+113BB TULU-TIGALARI VOWEL SIGN U and +# U+113BC TULU-TIGALARI VOWEL SIGN UU form complex ligatures with +# consonants. # # 4. The following character is assigned the positional category Left, but # may have different positions in different styles: @@ -1081,7 +1084,7 @@ File: IndicSyllabicCategory # Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, # Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, # Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, -# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and +# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, Tulu-Tigalari, and # Zanabazar Square. # # All characters for all other scripts not in that list diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 50d5befe2..8f1d6a5dd 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -315,6 +315,7 @@ Tifinagh ; Tifinagh Tirhuta ; Tirhuta Todhri ; Todhri Toto ; Toto +Tulu_Tigalari ; Tulu_Tigalari Transport_And_Map ; Transport_And_Map_Symbols Ugaritic ; Ugaritic UCAS ; Unified_Canadian_Aboriginal_Syllabics diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 14d3e096d..a9978cd9b 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -557,7 +557,7 @@ Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000- \p{LB=ID} ⊃ $IDInclusions \p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions - [\u20C0-\u20CF]] -Let $BrahmicLineBreaking = [\p{sc=Balinese}\p{sc=Batak}\p{sc=Brahmi}\p{sc=Cham}\p{sc=DivesAkuru}\p{sc=Grantha}\p{sc=Javanese}\p{sc=Makasar}\p{sc=Kawi}\p{sc=Cham}\p{sc=Makasar}] +Let $BrahmicLineBreaking = [\p{sc=Balinese}\p{sc=Batak}\p{sc=Brahmi}\p{sc=Cham}\p{sc=DivesAkuru}\p{sc=Grantha}\p{sc=Javanese}\p{sc=Makasar}\p{sc=Kawi}\p{sc=Cham}\p{sc=Makasar}\p{sc=Tulu_Tigalari}] Let $VFScripts = [\p{sc=Batak}] Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288\U00013379\U0001342F\U00013437\U0001343C\U0001343E\U000145CE\U0001E95E-\U0001E95F] From 48ff7889f87c659ada9a259358af5d8c34c1b823 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 10 Nov 2023 13:57:35 -0800 Subject: [PATCH 3/3] Gurung Khema (#564) --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 5 +- .../data/ucd/dev/DerivedCoreProperties.txt | 47 ++++++++++---- .../ucd/dev/DerivedNormalizationProps.txt | 22 ++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 ++- .../data/ucd/dev/IndicPositionalCategory.txt | 13 ++-- .../data/ucd/dev/IndicSyllabicCategory.txt | 16 +++-- unicodetools/data/ucd/dev/LineBreak.txt | 7 ++- .../data/ucd/dev/NormalizationTest.txt | 20 +++++- unicodetools/data/ucd/dev/PropList.txt | 10 ++- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- .../data/ucd/dev/ScriptExtensions.txt | 6 +- unicodetools/data/ucd/dev/Scripts.txt | 12 +++- unicodetools/data/ucd/dev/UnicodeData.txt | 58 +++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 7 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 9 ++- .../dev/auxiliary/SentenceBreakProperty.txt | 13 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 13 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 13 ++-- .../dev/extracted/DerivedCombiningClass.txt | 14 +++-- .../extracted/DerivedDecompositionType.txt | 5 +- .../dev/extracted/DerivedEastAsianWidth.txt | 9 ++- .../dev/extracted/DerivedGeneralCategory.txt | 20 +++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 +- .../ucd/dev/extracted/DerivedLineBreak.txt | 15 +++-- .../data/ucd/dev/extracted/DerivedName.txt | 62 ++++++++++++++++++- .../ucd/dev/extracted/DerivedNumericType.txt | 5 +- .../dev/extracted/DerivedNumericValues.txt | 32 ++++++---- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 12 ++-- .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + .../unicode/text/UCD/UnicodeInvariantTest.txt | 2 +- 32 files changed, 369 insertions(+), 99 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 703455d43..368ad8d04 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -294,6 +294,7 @@ FFF0..FFFF; Specials 13000..1342F; Egyptian Hieroglyphs 13430..1345F; Egyptian Hieroglyph Format Controls 14400..1467F; Anatolian Hieroglyphs +16100..1613F; Gurung Khema 16800..16A3F; Bamum Supplement 16A40..16A6F; Mro 16A70..16ACF; Tangsa diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 52ede83d6..8c3094d62 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-10, 04:27:40 GMT +# Date: 2023-11-10, 20:56:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2039,6 +2039,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11F5A ; 16.0 # KAWI SIGN NUKTA +16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE @@ -2056,6 +2057,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 1129 +# Total code points: 1187 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index b91ba8116..845545d91 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-10, 04:28:08 GMT +# Date: 2023-11-10, 20:57:17 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1293,6 +1293,10 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 13000..1342F ; Alphabetic # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; Alphabetic # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Alphabetic # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Alphabetic # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; Alphabetic # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -1435,7 +1439,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138718 +# Total code points: 138765 # ================================================ @@ -3444,6 +3448,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Case_Ignorable # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Case_Ignorable # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Case_Ignorable # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Case_Ignorable # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM @@ -3496,7 +3502,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2733 +# Total code points: 2748 # ================================================ @@ -6823,6 +6829,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 13000..1342F ; ID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; ID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; ID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -6950,7 +6957,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137245 +# Total code points: 137275 # ================================================ @@ -8174,6 +8181,11 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 13441..13446 ; ID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; ID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; ID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; ID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; ID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; ID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; ID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -8350,7 +8362,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140488 +# Total code points: 140546 # ================================================ @@ -8998,6 +9010,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 13000..1342F ; XID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; XID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; XID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -9125,7 +9138,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137222 +# Total code points: 137252 # ================================================ @@ -10350,6 +10363,11 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 13441..13446 ; XID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; XID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; XID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; XID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; XID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; XID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; XID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -10526,7 +10544,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140469 +# Total code points: 140527 # ================================================ @@ -10934,6 +10952,8 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11F5A ; Grapheme_Extend # Mn KAWI SIGN NUKTA 13440 ; Grapheme_Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Grapheme_Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Grapheme_Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Grapheme_Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Grapheme_Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Grapheme_Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -10971,7 +10991,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2144 +# Total code points: 2159 # ================================================ @@ -12525,6 +12545,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 13000..1342F ; Grapheme_Base # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; Grapheme_Base # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; Grapheme_Base # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Grapheme_Base # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1612A..1612C ; Grapheme_Base # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16130..16139 ; Grapheme_Base # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Grapheme_Base # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; Grapheme_Base # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -12766,7 +12789,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 148723 +# Total code points: 148766 # ================================================ @@ -12837,8 +12860,9 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 11D97 ; Grapheme_Link # Mn GUNJALA GONDI VIRAMA 11F41 ; Grapheme_Link # Mc KAWI SIGN KILLER 11F42 ; Grapheme_Link # Mn KAWI CONJOINER +1612F ; Grapheme_Link # Mn GURUNG KHEMA SIGN THOLHOMA -# Total code points: 68 +# Total code points: 69 # ================================================ @@ -13053,6 +13077,7 @@ FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING 11D44..11D45 ; InCB; Extend # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; InCB; Extend # Mn GUNJALA GONDI VIRAMA 11F42 ; InCB; Extend # Mn KAWI CONJOINER +1612F ; InCB; Extend # Mn GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 1BC9E ; InCB; Extend # Mn DUPLOYAN DOUBLE MARK @@ -13077,6 +13102,6 @@ FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING 1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -# Total code points: 894 +# Total code points: 895 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 47ddba511..797dfb763 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2023-11-10, 04:28:13 GMT +# Date: 2023-11-10, 20:57:25 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1024,12 +1024,13 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 114BE ; NFD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; NFD_QC; N # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; NFD_QC; N # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU 16D68..16D6A ; NFD_QC; N # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; NFD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13245 +# Total code points: 13253 # ================================================ @@ -1172,9 +1173,11 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 114BD ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O 115AF ; NFC_QC; M # Mc SIDDHAM VOWEL SIGN AA 11930 ; NFC_QC; M # Mc DIVES AKURU VOWEL SIGN AA +1611E..16120 ; NFC_QC; M # Mn [3] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL SIGN II +16129 ; NFC_QC; M # Mn GURUNG KHEMA VOWEL LENGTH MARK 16D67 ; NFC_QC; M # Lo KIRAT RAI VOWEL SIGN E -# Total code points: 116 +# Total code points: 120 # ================================================ @@ -1657,6 +1660,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 114BE ; NFKD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFKD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; NFKD_QC; N # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; NFKD_QC; N # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU 16D68..16D6A ; NFKD_QC; N # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1CCD6..1CCEF ; NFKD_QC; N # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; NFKD_QC; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE @@ -1749,7 +1753,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17077 +# Total code points: 17085 # ================================================ @@ -2213,9 +2217,11 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 114BD ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O 115AF ; NFKC_QC; M # Mc SIDDHAM VOWEL SIGN AA 11930 ; NFKC_QC; M # Mc DIVES AKURU VOWEL SIGN AA +1611E..16120 ; NFKC_QC; M # Mn [3] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL SIGN II +16129 ; NFKC_QC; M # Mn GURUNG KHEMA VOWEL LENGTH MARK 16D67 ; NFKC_QC; M # Lo KIRAT RAI VOWEL SIGN E -# Total code points: 116 +# Total code points: 120 # ================================================ @@ -2454,11 +2460,12 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE 114BE ; Expands_On_NFD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; Expands_On_NFD # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; Expands_On_NFD # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU 16D68..16D6A ; Expands_On_NFD # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK -# Total code points: 12228 +# Total code points: 12236 # ================================================ @@ -2817,6 +2824,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 114BE ; Expands_On_NFKD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFKD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; Expands_On_NFKD # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; Expands_On_NFKD # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU 16D68..16D6A ; Expands_On_NFKD # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFKD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFKD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK @@ -2830,7 +2838,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE 1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 13402 +# Total code points: 13410 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 2b5135b0c..74c97082b 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-10, 04:28:16 GMT +# Date: 2023-11-10, 20:57:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2308,6 +2308,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 13441..13446 ; N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; N # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; N # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; N # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; N # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; N # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; N # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index 7f37a3bea..3ed589530 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-16.0.0.txt -# Date: 2023-11-10, 20:38:16 GMT +# Date: 2023-11-10, 20:57:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,9 +68,9 @@ # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, Limbu, -# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, Kannada, +# Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, +# Limbu, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, # Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, # Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, # Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, @@ -306,6 +306,7 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 11F03 ; Right # Mc KAWI SIGN VISARGA 11F34..11F35 ; Right # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F41 ; Right # Mc KAWI SIGN KILLER +1612C ; Right # Mc GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16D40..16D42 ; Right # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D63..16D6A ; Right # Lo [8] KIRAT RAI VOWEL SIGN AA..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; Right # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT @@ -362,6 +363,7 @@ AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 11CB1 ; Left # Mc MARCHEN VOWEL SIGN I 11EF5 ; Left # Mc MAKASAR VOWEL SIGN E 11F3E..11F3F ; Left # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +1612A..1612B ; Left # Mc [2] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL VA # Indic_Positional_Category=Visual_Order_Left @@ -606,6 +608,8 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F36..11F37 ; Top # Mn [2] KAWI VOWEL SIGN I..KAWI VOWEL SIGN II 11F40 ; Top # Mn KAWI VOWEL SIGN EU 11F5A ; Top # Mn KAWI SIGN NUKTA +1611E..16129 ; Top # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D ; Top # Mn GURUNG KHEMA SIGN ANUSVARA # Indic_Positional_Category=Bottom @@ -758,6 +762,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11D47 ; Bottom # Mn MASARAM GONDI RA-KARA 11EF4 ; Bottom # Mn MAKASAR VOWEL SIGN U 11F38..11F3A ; Bottom # Mn [3] KAWI VOWEL SIGN U..KAWI VOWEL SIGN VOCALIC R +1612E..1612F ; Bottom # Mn [2] GURUNG KHEMA CONSONANT SIGN MEDIAL RA..GURUNG KHEMA SIGN THOLHOMA # Indic_Positional_Category=Top_And_Bottom diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 5e33cafd5..73af9af2b 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-11-10, 20:38:16 GMT +# Date: 2023-11-10, 20:57:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -37,9 +37,9 @@ # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, Limbu, -# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, Kannada, +# Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, +# Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, # Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, # Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, # Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, @@ -137,6 +137,7 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11D40 ; Bindu # Mn MASARAM GONDI SIGN ANUSVARA 11D95 ; Bindu # Mn GUNJALA GONDI SIGN ANUSVARA 11F00..11F01 ; Bindu # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +1612D ; Bindu # Mn GURUNG KHEMA SIGN ANUSVARA 16D40..16D41 ; Bindu # Lm [2] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN TONPI # ================================================ @@ -333,6 +334,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 11A34 ; Pure_Killer # Mn ZANABAZAR SQUARE SIGN VIRAMA 11D44 ; Pure_Killer # Mn MASARAM GONDI SIGN HALANTA 11F41 ; Pure_Killer # Mc KAWI SIGN KILLER +1612F ; Pure_Killer # Mn GURUNG KHEMA SIGN THOLHOMA 16D6B..16D6C ; Pure_Killer # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT # ================================================ @@ -465,6 +467,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11D67..11D68 ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D6B ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER AU 11F04..11F10 ; Vowel_Independent # Lo [13] KAWI LETTER A..KAWI LETTER O +16100 ; Vowel_Independent # Lo GURUNG KHEMA LETTER A # ================================================ @@ -732,6 +735,7 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 11F36..11F3A ; Vowel_Dependent # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F3E..11F3F ; Vowel_Dependent # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; Vowel_Dependent # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Vowel_Dependent # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK 16D63..16D6A ; Vowel_Dependent # Lo [8] KIRAT RAI VOWEL SIGN AA..KIRAT RAI VOWEL SIGN AU # ================================================ @@ -944,6 +948,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11D6C..11D89 ; Consonant # Lo [30] GUNJALA GONDI LETTER YA..GUNJALA GONDI LETTER SA 11EE0..11EF1 ; Consonant # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11F12..11F33 ; Consonant # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +16101..1611D ; Consonant # Lo [29] GURUNG KHEMA LETTER KA..GURUNG KHEMA LETTER SA 16D43..16D62 ; Consonant # Lo [32] KIRAT RAI LETTER A..KIRAT RAI LETTER HA # ================================================ @@ -1077,6 +1082,8 @@ AA35..AA36 ; Consonant_Medial # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA 11942 ; Consonant_Medial # Mc DIVES AKURU MEDIAL RA 11A3B..11A3E ; Consonant_Medial # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA 11D47 ; Consonant_Medial # Mn MASARAM GONDI RA-KARA +1612A..1612C ; Consonant_Medial # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612E ; Consonant_Medial # Mn GURUNG KHEMA CONSONANT SIGN MEDIAL RA # ================================================ @@ -1353,6 +1360,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 11D50..11D59 ; Number # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Number # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Number # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Number # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16D70..16D79 ; Number # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 2918fcf88..e986cce44 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-10, 04:28:17 GMT +# Date: 2023-11-10, 20:57:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3212,6 +3212,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 145CE ; OP # Lo ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK 145CF ; CL # Lo ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK 145D0..14646 ; AL # Lo [119] ANATOLIAN HIEROGLYPH A411..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; AS # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; CM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; CM # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; CM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; AS # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; AL # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; AL # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; NU # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index e3e738408..008613b90 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2023-11-10, 04:28:20 GMT +# Date: 2023-11-10, 20:57:38 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -15208,6 +15208,14 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 115BA;115BA;115B8 115AF;115BA;115B8 115AF; # (𑖺; 𑖺; 𑖺; 𑖺; 𑖺; ) SIDDHAM VOWEL SIGN O 115BB;115BB;115B9 115AF;115BB;115B9 115AF; # (𑖻; 𑖻; 𑖻; 𑖻; 𑖻; ) SIDDHAM VOWEL SIGN AU 11938;11938;11935 11930;11938;11935 11930; # (𑤸; 𑤸; 𑤸; 𑤸; 𑤸; ) DIVES AKURU VOWEL SIGN O +16121;16121;1611E 1611E;16121;1611E 1611E; # (◌𖄡; ◌𖄡; ◌𖄞◌𖄞; ◌𖄡; ◌𖄞◌𖄞; ) GURUNG KHEMA VOWEL SIGN U +16122;16122;1611E 16129;16122;1611E 16129; # (◌𖄢; ◌𖄢; ◌𖄞◌𖄩; ◌𖄢; ◌𖄞◌𖄩; ) GURUNG KHEMA VOWEL SIGN UU +16123;16123;1611E 1611F;16123;1611E 1611F; # (◌𖄣; ◌𖄣; ◌𖄞◌𖄟; ◌𖄣; ◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN E +16124;16124;16129 1611F;16124;16129 1611F; # (◌𖄤; ◌𖄤; ◌𖄩◌𖄟; ◌𖄤; ◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN EE +16125;16125;1611E 16120;16125;1611E 16120; # (◌𖄥; ◌𖄥; ◌𖄞◌𖄠; ◌𖄥; ◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AI +16126;16126;1611E 1611E 1611F;16126;1611E 1611E 1611F; # (◌𖄦; ◌𖄦; ◌𖄞◌𖄞◌𖄟; ◌𖄦; ◌𖄞◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN O +16127;16127;1611E 16129 1611F;16127;1611E 16129 1611F; # (◌𖄧; ◌𖄧; ◌𖄞◌𖄩◌𖄟; ◌𖄧; ◌𖄞◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN OO +16128;16128;1611E 1611E 16120;16128;1611E 1611E 16120; # (◌𖄨; ◌𖄨; ◌𖄞◌𖄞◌𖄠; ◌𖄨; ◌𖄞◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AU 16D68;16D68;16D67 16D67;16D68;16D67 16D67; # (𖵨; 𖵨; 𖵨; 𖵨; 𖵨; ) KIRAT RAI VOWEL SIGN AI 16D69;16D69;16D63 16D67;16D69;16D63 16D67; # (𖵩; 𖵩; 𖵩; 𖵩; 𖵩; ) KIRAT RAI VOWEL SIGN O 16D6A;16D6A;16D63 16D67 16D67;16D6A;16D63 16D67 16D67; # (𖵪; 𖵪; 𖵪; 𖵪; 𖵪; ) KIRAT RAI VOWEL SIGN AU @@ -18778,6 +18786,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 11F41 05B0 094D 3099 0062;0061 3099 11F41 094D 05B0 0062;0061 3099 11F41 094D 05B0 0062;0061 3099 11F41 094D 05B0 0062;0061 3099 11F41 094D 05B0 0062; # (a𑽁◌ְ◌्◌゙b; a◌゙𑽁◌्◌ְb; a◌゙𑽁◌्◌ְb; a◌゙𑽁◌्◌ְb; a◌゙𑽁◌्◌ְb; ) LATIN SMALL LETTER A, KAWI SIGN KILLER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 05B0 094D 3099 11F42 0062;0061 3099 094D 11F42 05B0 0062;0061 3099 094D 11F42 05B0 0062;0061 3099 094D 11F42 05B0 0062;0061 3099 094D 11F42 05B0 0062; # (a◌ְ◌्◌゙◌𑽂b; a◌゙◌्◌𑽂◌ְb; a◌゙◌्◌𑽂◌ְb; a◌゙◌्◌𑽂◌ְb; a◌゙◌्◌𑽂◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, KAWI CONJOINER, LATIN SMALL LETTER B 0061 11F42 05B0 094D 3099 0062;0061 3099 11F42 094D 05B0 0062;0061 3099 11F42 094D 05B0 0062;0061 3099 11F42 094D 05B0 0062;0061 3099 11F42 094D 05B0 0062; # (a◌𑽂◌ְ◌्◌゙b; a◌゙◌𑽂◌्◌ְb; a◌゙◌𑽂◌्◌ְb; a◌゙◌𑽂◌्◌ְb; a◌゙◌𑽂◌्◌ְb; ) LATIN SMALL LETTER A, KAWI CONJOINER, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B +0061 05B0 094D 3099 1612F 0062;0061 3099 094D 1612F 05B0 0062;0061 3099 094D 1612F 05B0 0062;0061 3099 094D 1612F 05B0 0062;0061 3099 094D 1612F 05B0 0062; # (a◌ְ◌्◌゙◌𖄯b; a◌゙◌्◌𖄯◌ְb; a◌゙◌्◌𖄯◌ְb; a◌゙◌्◌𖄯◌ְb; a◌゙◌्◌𖄯◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, GURUNG KHEMA SIGN THOLHOMA, LATIN SMALL LETTER B +0061 1612F 05B0 094D 3099 0062;0061 3099 1612F 094D 05B0 0062;0061 3099 1612F 094D 05B0 0062;0061 3099 1612F 094D 05B0 0062;0061 3099 1612F 094D 05B0 0062; # (a◌𖄯◌ְ◌्◌゙b; a◌゙◌𖄯◌्◌ְb; a◌゙◌𖄯◌्◌ְb; a◌゙◌𖄯◌्◌ְb; a◌゙◌𖄯◌्◌ְb; ) LATIN SMALL LETTER A, GURUNG KHEMA SIGN THOLHOMA, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 16FF0 0334 16AF0 0062;0061 0334 16AF0 16FF0 0062;0061 0334 16AF0 16FF0 0062;0061 0334 16AF0 16FF0 0062;0061 0334 16AF0 16FF0 0062; # (a𖿰◌̴◌𖫰b; a◌̴◌𖫰𖿰b; a◌̴◌𖫰𖿰b; a◌̴◌𖫰𖿰b; a◌̴◌𖫰𖿰b; ) LATIN SMALL LETTER A, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING HIGH TONE, LATIN SMALL LETTER B 0061 16AF0 16FF0 0334 0062;0061 16AF0 0334 16FF0 0062;0061 16AF0 0334 16FF0 0062;0061 16AF0 0334 16FF0 0062;0061 16AF0 0334 16FF0 0062; # (a◌𖫰𖿰◌̴b; a◌𖫰◌̴𖿰b; a◌𖫰◌̴𖿰b; a◌𖫰◌̴𖿰b; a◌𖫰◌̴𖿰b; ) LATIN SMALL LETTER A, BASSA VAH COMBINING HIGH TONE, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 16FF0 0334 16AF1 0062;0061 0334 16AF1 16FF0 0062;0061 0334 16AF1 16FF0 0062;0061 0334 16AF1 16FF0 0062;0061 0334 16AF1 16FF0 0062; # (a𖿰◌̴◌𖫱b; a◌̴◌𖫱𖿰b; a◌̴◌𖫱𖿰b; a◌̴◌𖫱𖿰b; a◌̴◌𖫱𖿰b; ) LATIN SMALL LETTER A, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, BASSA VAH COMBINING LOW TONE, LATIN SMALL LETTER B @@ -19202,6 +19212,14 @@ D750 0334 11B5;D750 0334 11B5;1112 1173 0334 11B5;D750 0334 11B5;1112 1173 0334 115B8 0334 115AF;115B8 0334 115AF;115B8 0334 115AF;115B8 0334 115AF;115B8 0334 115AF; # (𑖸◌̴𑖯; 𑖸◌̴𑖯; 𑖸◌̴𑖯; 𑖸◌̴𑖯; 𑖸◌̴𑖯; ) SIDDHAM VOWEL SIGN E, COMBINING TILDE OVERLAY, SIDDHAM VOWEL SIGN AA 115B9 0334 115AF;115B9 0334 115AF;115B9 0334 115AF;115B9 0334 115AF;115B9 0334 115AF; # (𑖹◌̴𑖯; 𑖹◌̴𑖯; 𑖹◌̴𑖯; 𑖹◌̴𑖯; 𑖹◌̴𑖯; ) SIDDHAM VOWEL SIGN AI, COMBINING TILDE OVERLAY, SIDDHAM VOWEL SIGN AA 11935 0334 11930;11935 0334 11930;11935 0334 11930;11935 0334 11930;11935 0334 11930; # (𑤵◌̴𑤰; 𑤵◌̴𑤰; 𑤵◌̴𑤰; 𑤵◌̴𑤰; 𑤵◌̴𑤰; ) DIVES AKURU VOWEL SIGN E, COMBINING TILDE OVERLAY, DIVES AKURU VOWEL SIGN AA +1611E 0334 1611E;1611E 0334 1611E;1611E 0334 1611E;1611E 0334 1611E;1611E 0334 1611E; # (◌𖄞◌̴◌𖄞; ◌𖄞◌̴◌𖄞; ◌𖄞◌̴◌𖄞; ◌𖄞◌̴◌𖄞; ◌𖄞◌̴◌𖄞; ) GURUNG KHEMA VOWEL SIGN AA, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN AA +1611E 0334 1611F;1611E 0334 1611F;1611E 0334 1611F;1611E 0334 1611F;1611E 0334 1611F; # (◌𖄞◌̴◌𖄟; ◌𖄞◌̴◌𖄟; ◌𖄞◌̴◌𖄟; ◌𖄞◌̴◌𖄟; ◌𖄞◌̴◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN I +1611E 0334 16120;1611E 0334 16120;1611E 0334 16120;1611E 0334 16120;1611E 0334 16120; # (◌𖄞◌̴◌𖄠; ◌𖄞◌̴◌𖄠; ◌𖄞◌̴◌𖄠; ◌𖄞◌̴◌𖄠; ◌𖄞◌̴◌𖄠; ) GURUNG KHEMA VOWEL SIGN AA, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN II +1611E 0334 16129;1611E 0334 16129;1611E 0334 16129;1611E 0334 16129;1611E 0334 16129; # (◌𖄞◌̴◌𖄩; ◌𖄞◌̴◌𖄩; ◌𖄞◌̴◌𖄩; ◌𖄞◌̴◌𖄩; ◌𖄞◌̴◌𖄩; ) GURUNG KHEMA VOWEL SIGN AA, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL LENGTH MARK +16121 0334 1611F;16121 0334 1611F;1611E 1611E 0334 1611F;16121 0334 1611F;1611E 1611E 0334 1611F; # (◌𖄡◌̴◌𖄟; ◌𖄡◌̴◌𖄟; ◌𖄞◌𖄞◌̴◌𖄟; ◌𖄡◌̴◌𖄟; ◌𖄞◌𖄞◌̴◌𖄟; ) GURUNG KHEMA VOWEL SIGN U, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN I +16121 0334 16120;16121 0334 16120;1611E 1611E 0334 16120;16121 0334 16120;1611E 1611E 0334 16120; # (◌𖄡◌̴◌𖄠; ◌𖄡◌̴◌𖄠; ◌𖄞◌𖄞◌̴◌𖄠; ◌𖄡◌̴◌𖄠; ◌𖄞◌𖄞◌̴◌𖄠; ) GURUNG KHEMA VOWEL SIGN U, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN II +16122 0334 1611F;16122 0334 1611F;1611E 16129 0334 1611F;16122 0334 1611F;1611E 16129 0334 1611F; # (◌𖄢◌̴◌𖄟; ◌𖄢◌̴◌𖄟; ◌𖄞◌𖄩◌̴◌𖄟; ◌𖄢◌̴◌𖄟; ◌𖄞◌𖄩◌̴◌𖄟; ) GURUNG KHEMA VOWEL SIGN UU, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN I +16129 0334 1611F;16129 0334 1611F;16129 0334 1611F;16129 0334 1611F;16129 0334 1611F; # (◌𖄩◌̴◌𖄟; ◌𖄩◌̴◌𖄟; ◌𖄩◌̴◌𖄟; ◌𖄩◌̴◌𖄟; ◌𖄩◌̴◌𖄟; ) GURUNG KHEMA VOWEL LENGTH MARK, COMBINING TILDE OVERLAY, GURUNG KHEMA VOWEL SIGN I 16D63 0334 16D67;16D63 0334 16D67;16D63 0334 16D67;16D63 0334 16D67;16D63 0334 16D67; # (𖵣◌̴𖵧; 𖵣◌̴𖵧; 𖵣◌̴𖵧; 𖵣◌̴𖵧; 𖵣◌̴𖵧; ) KIRAT RAI VOWEL SIGN AA, COMBINING TILDE OVERLAY, KIRAT RAI VOWEL SIGN E 16D67 0334 16D67;16D67 0334 16D67;16D67 0334 16D67;16D67 0334 16D67;16D67 0334 16D67; # (𖵧◌̴𖵧; 𖵧◌̴𖵧; 𖵧◌̴𖵧; 𖵧◌̴𖵧; 𖵧◌̴𖵧; ) KIRAT RAI VOWEL SIGN E, COMBINING TILDE OVERLAY, KIRAT RAI VOWEL SIGN E 16D69 0334 16D67;16D69 0334 16D67;16D63 16D67 0334 16D67;16D69 0334 16D67;16D63 16D67 0334 16D67; # (𖵩◌̴𖵧; 𖵩◌̴𖵧; 𖵩◌̴𖵧; 𖵩◌̴𖵧; 𖵩◌̴𖵧; ) KIRAT RAI VOWEL SIGN O, COMBINING TILDE OVERLAY, KIRAT RAI VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index d97429463..4c14b97c9 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-11-10, 04:28:21 GMT +# Date: 2023-11-10, 20:57:39 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -831,6 +831,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11F36..11F3A ; Other_Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F3E..11F3F ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; Other_Alphabetic # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -847,7 +850,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1478 +# Total code points: 1495 # ================================================ @@ -1099,6 +1102,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA 11F5A ; Diacritic # Mn KAWI SIGN NUKTA 13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1612F ; Diacritic # Mn GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -1123,7 +1127,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1157 +# Total code points: 1158 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 9c1fd6808..3d9b1a0d1 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2023-11-10, 04:28:24 GMT +# Date: 2023-11-10, 20:57:45 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -273,6 +273,7 @@ blk; Greek_Ext ; Greek_Extended blk; Gujarati ; Gujarati blk; Gunjala_Gondi ; Gunjala_Gondi blk; Gurmukhi ; Gurmukhi +blk; Gurung_Khema ; Gurung_Khema blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms blk; Half_Marks ; Combining_Half_Marks blk; Hangul ; Hangul_Syllables @@ -1344,6 +1345,7 @@ sc ; Goth ; Gothic sc ; Gran ; Grantha sc ; Grek ; Greek sc ; Gujr ; Gujarati +sc ; Gukh ; Gurung_Khema sc ; Guru ; Gurmukhi sc ; Hang ; Hangul sc ; Hani ; Han diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 6ebebd97e..e7fc879a3 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ # ScriptExtensions-16.0.0.txt -# Date: 2023-10-24, 21:04:04 GMT +# Date: 2023-11-10, 21:47:13 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -626,9 +626,9 @@ A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind # ================================================ -# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh +# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh -0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA +0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA # Total code points: 1 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 794f17240..a7ef39655 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-10, 04:28:43 GMT +# Date: 2023-11-10, 20:58:18 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3056,6 +3056,16 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ +16100..1611D ; Gurung_Khema # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; Gurung_Khema # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Gurung_Khema # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; Gurung_Khema # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; Gurung_Khema # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE + +# Total code points: 58 + +# ================================================ + 16D40..16D42 ; Kirat_Rai # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; Kirat_Rai # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; Kirat_Rai # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 8d10fd3ae..ec386d5d5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -25055,6 +25055,64 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 14644;ANATOLIAN HIEROGLYPH A528;Lo;0;L;;;;;N;;;;; 14645;ANATOLIAN HIEROGLYPH A529;Lo;0;L;;;;;N;;;;; 14646;ANATOLIAN HIEROGLYPH A530;Lo;0;L;;;;;N;;;;; +16100;GURUNG KHEMA LETTER A;Lo;0;L;;;;;N;;;;; +16101;GURUNG KHEMA LETTER KA;Lo;0;L;;;;;N;;;;; +16102;GURUNG KHEMA LETTER KHA;Lo;0;L;;;;;N;;;;; +16103;GURUNG KHEMA LETTER GA;Lo;0;L;;;;;N;;;;; +16104;GURUNG KHEMA LETTER GHA;Lo;0;L;;;;;N;;;;; +16105;GURUNG KHEMA LETTER NGA;Lo;0;L;;;;;N;;;;; +16106;GURUNG KHEMA LETTER CA;Lo;0;L;;;;;N;;;;; +16107;GURUNG KHEMA LETTER CHA;Lo;0;L;;;;;N;;;;; +16108;GURUNG KHEMA LETTER JA;Lo;0;L;;;;;N;;;;; +16109;GURUNG KHEMA LETTER JHA;Lo;0;L;;;;;N;;;;; +1610A;GURUNG KHEMA LETTER HA;Lo;0;L;;;;;N;;;;; +1610B;GURUNG KHEMA LETTER TTA;Lo;0;L;;;;;N;;;;; +1610C;GURUNG KHEMA LETTER TTHA;Lo;0;L;;;;;N;;;;; +1610D;GURUNG KHEMA LETTER DDA;Lo;0;L;;;;;N;;;;; +1610E;GURUNG KHEMA LETTER DDHA;Lo;0;L;;;;;N;;;;; +1610F;GURUNG KHEMA LETTER VA;Lo;0;L;;;;;N;;;;; +16110;GURUNG KHEMA LETTER TA;Lo;0;L;;;;;N;;;;; +16111;GURUNG KHEMA LETTER THA;Lo;0;L;;;;;N;;;;; +16112;GURUNG KHEMA LETTER DA;Lo;0;L;;;;;N;;;;; +16113;GURUNG KHEMA LETTER DHA;Lo;0;L;;;;;N;;;;; +16114;GURUNG KHEMA LETTER NA;Lo;0;L;;;;;N;;;;; +16115;GURUNG KHEMA LETTER PA;Lo;0;L;;;;;N;;;;; +16116;GURUNG KHEMA LETTER PHA;Lo;0;L;;;;;N;;;;; +16117;GURUNG KHEMA LETTER BA;Lo;0;L;;;;;N;;;;; +16118;GURUNG KHEMA LETTER BHA;Lo;0;L;;;;;N;;;;; +16119;GURUNG KHEMA LETTER MA;Lo;0;L;;;;;N;;;;; +1611A;GURUNG KHEMA LETTER YA;Lo;0;L;;;;;N;;;;; +1611B;GURUNG KHEMA LETTER RA;Lo;0;L;;;;;N;;;;; +1611C;GURUNG KHEMA LETTER LA;Lo;0;L;;;;;N;;;;; +1611D;GURUNG KHEMA LETTER SA;Lo;0;L;;;;;N;;;;; +1611E;GURUNG KHEMA VOWEL SIGN AA;Mn;0;NSM;;;;;N;;;;; +1611F;GURUNG KHEMA VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +16120;GURUNG KHEMA VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; +16121;GURUNG KHEMA VOWEL SIGN U;Mn;0;NSM;1611E 1611E;;;;N;;;;; +16122;GURUNG KHEMA VOWEL SIGN UU;Mn;0;NSM;1611E 16129;;;;N;;;;; +16123;GURUNG KHEMA VOWEL SIGN E;Mn;0;NSM;1611E 1611F;;;;N;;;;; +16124;GURUNG KHEMA VOWEL SIGN EE;Mn;0;NSM;16129 1611F;;;;N;;;;; +16125;GURUNG KHEMA VOWEL SIGN AI;Mn;0;NSM;1611E 16120;;;;N;;;;; +16126;GURUNG KHEMA VOWEL SIGN O;Mn;0;NSM;16121 1611F;;;;N;;;;; +16127;GURUNG KHEMA VOWEL SIGN OO;Mn;0;NSM;16122 1611F;;;;N;;;;; +16128;GURUNG KHEMA VOWEL SIGN AU;Mn;0;NSM;16121 16120;;;;N;;;;; +16129;GURUNG KHEMA VOWEL LENGTH MARK;Mn;0;NSM;;;;;N;;;;; +1612A;GURUNG KHEMA CONSONANT SIGN MEDIAL YA;Mc;0;L;;;;;N;;;;; +1612B;GURUNG KHEMA CONSONANT SIGN MEDIAL VA;Mc;0;L;;;;;N;;;;; +1612C;GURUNG KHEMA CONSONANT SIGN MEDIAL HA;Mc;0;L;;;;;N;;;;; +1612D;GURUNG KHEMA SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +1612E;GURUNG KHEMA CONSONANT SIGN MEDIAL RA;Mn;0;NSM;;;;;N;;;;; +1612F;GURUNG KHEMA SIGN THOLHOMA;Mn;9;NSM;;;;;N;;;;; +16130;GURUNG KHEMA DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +16131;GURUNG KHEMA DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +16132;GURUNG KHEMA DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +16133;GURUNG KHEMA DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +16134;GURUNG KHEMA DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +16135;GURUNG KHEMA DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +16136;GURUNG KHEMA DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +16137;GURUNG KHEMA DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +16138;GURUNG KHEMA DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +16139;GURUNG KHEMA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 16800;BAMUM LETTER PHASE-A NGKUE MFON;Lo;0;L;;;;;N;;;;; 16801;BAMUM LETTER PHASE-A GBIEE FON;Lo;0;L;;;;;N;;;;; 16802;BAMUM LETTER PHASE-A PON MFON PIPAEMGBIEE;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 4a1854fa9..72724221e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-10, 04:28:47 GMT +# Date: 2023-11-10, 20:58:25 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2138,6 +2138,11 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 13456..1345F ; U # Cn [10] .. 14400..14646 ; U # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 14647..1467F ; U # Cn [57] .. +16100..1611D ; R # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; R # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; R # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; R # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; R # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; R # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; R # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; R # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index aaa397355..9cde73f37 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-11-10, 04:28:16 GMT +# Date: 2023-11-10, 20:57:30 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -430,6 +430,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -468,7 +470,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2149 +# Total code points: 2164 # ================================================ @@ -646,12 +648,13 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11F34..11F35 ; SpacingMark # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F41 ; SpacingMark # Mc KAWI SIGN KILLER +1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 408 +# Total code points: 411 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 150b00274..bde8fe599 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-10, 04:28:44 GMT +# Date: 2023-11-10, 20:58:20 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -543,6 +543,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Extend # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -582,7 +585,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2582 +# Total code points: 2600 # ================================================ @@ -2487,6 +2490,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 13000..1342F ; OLetter # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; OLetter # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; OLetter # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; OLetter # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; OLetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; OLetter # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; OLetter # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -2578,7 +2582,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132884 +# Total code points: 132914 # ================================================ @@ -2647,6 +2651,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE @@ -2660,7 +2665,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 764 +# Total code points: 774 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index dd210fcb7..acba98caf 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-10, 04:28:47 GMT +# Date: 2023-11-10, 20:58:25 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -579,6 +579,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Extend # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -619,7 +622,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2586 +# Total code points: 2604 # ================================================ @@ -1239,6 +1242,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 13000..1342F ; ALetter # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; ALetter # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; ALetter # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ALetter # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; ALetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ALetter # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; ALetter # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -1349,7 +1353,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29768 +# Total code points: 29798 # ================================================ @@ -1463,6 +1467,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE @@ -1476,7 +1481,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 763 +# Total code points: 773 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 39eb2965a..b5402022b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-10, 04:28:05 GMT +# Date: 2023-11-10, 20:57:13 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1064,6 +1064,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 13430..1343F ; L # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13441..13446 ; L # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; L # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1612A..1612C ; L # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16130..16139 ; L # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -1213,8 +1216,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 819407 code points not listed here. -# Total code points: 1095534 +# The above property value applies to 819349 code points not listed here. +# Total code points: 1095519 # ================================================ @@ -2365,6 +2368,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11F5A ; NSM # Mn KAWI SIGN NUKTA 13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; NSM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -2399,7 +2404,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2012 +# Total code points: 2027 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 2bc9650ba..0fd1fd0e8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-10, 04:28:08 GMT +# Date: 2023-11-10, 20:57:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1794,6 +1794,11 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 13441..13446 ; 0 # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; 0 # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; 0 # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; 0 # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; 0 # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; 0 # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; 0 # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA +16130..16139 ; 0 # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; 0 # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; 0 # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; 0 # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -2051,8 +2056,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 825637 code points not listed here. -# Total code points: 1113179 +# The above property value applies to 825579 code points not listed here. +# Total code points: 1113178 # ================================================ @@ -2188,8 +2193,9 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 11D97 ; 9 # Mn GUNJALA GONDI VIRAMA 11F41 ; 9 # Mc KAWI SIGN KILLER 11F42 ; 9 # Mn KAWI CONJOINER +1612F ; 9 # Mn GURUNG KHEMA SIGN THOLHOMA -# Total code points: 68 +# Total code points: 69 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a52989f81..0bb606ae2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ # DerivedDecompositionType-16.0.0.txt -# Date: 2023-11-10, 04:28:10 GMT +# Date: 2023-11-10, 20:57:20 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -266,12 +266,13 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 114BE ; Canonical # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Canonical # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; Canonical # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; Canonical # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU 16D68..16D6A ; Canonical # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; Canonical # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13245 +# Total code points: 13253 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index bcc77852c..899b81344 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-10, 04:28:10 GMT +# Date: 2023-11-10, 20:57:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1830,6 +1830,11 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 13441..13446 ; N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 14400..14646 ; N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; N # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; N # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; N # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; N # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; N # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; N # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -2094,7 +2099,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765155 code points not listed here. +# The above property value applies to 765097 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index f89d39ae7..df5e6199b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-10, 04:28:11 GMT +# Date: 2023-11-10, 20:57:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -559,7 +559,8 @@ FFFE..FFFF ; Cn # [2] .. 12544..12F8F ; Cn # [2636] .. 12FF3..12FFF ; Cn # [13] .. 13456..143FF ; Cn # [4010] .. -14647..167FF ; Cn # [8633] .. +14647..160FF ; Cn # [6841] .. +1613A..167FF ; Cn # [1734] .. 16A39..16A3F ; Cn # [7] .. 16A5F ; Cn # 16A6A..16A6D ; Cn # [4] .. @@ -747,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 823589 +# Total code points: 823531 # ================================================ @@ -2626,6 +2627,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 13000..1342F ; Lo # [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; Lo # [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Lo # [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Lo # [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; Lo # [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -2705,7 +2707,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132453 +# Total code points: 132483 # ================================================ @@ -3032,6 +3034,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11F5A ; Mn # KAWI SIGN NUKTA 13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Mn # [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Mn # [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR @@ -3066,7 +3070,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2004 +# Total code points: 2019 # ================================================ @@ -3269,12 +3273,13 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11F34..11F35 ; Mc # [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F3E..11F3F ; Mc # [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F41 ; Mc # KAWI SIGN KILLER +1612A..1612C ; Mc # [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 465 +# Total code points: 468 # ================================================ @@ -3338,6 +3343,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Nd # [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE @@ -3351,7 +3357,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 750 +# Total code points: 760 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index b5c781bb4..4f5176411 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-11-10, 04:28:12 GMT +# Date: 2023-11-10, 20:57:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -539,6 +539,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 13430..1343F ; T # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; T # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; T # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; T # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; T # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; T # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; T # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; T # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -578,6 +580,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2169 +# Total code points: 2184 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 125c89421..3190f9348 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-10, 04:28:12 GMT +# Date: 2023-11-10, 21:08:49 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 761709 code points not listed here. -# Total code points: 899177 +# The above property value applies to 761651 code points not listed here. +# Total code points: 899119 # ================================================ @@ -2335,6 +2335,9 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11F5A ; CM # Mn KAWI SIGN NUKTA 13440 ; CM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; CM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; CM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; CM # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; CM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; CM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -2376,7 +2379,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2460 +# Total code points: 2478 # ================================================ @@ -3902,8 +3905,10 @@ AA50..AA59 ; AS # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE 11950..11959 ; AS # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE 11EE0..11EF1 ; AS # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11F50..11F59 ; AS # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16100..1611D ; AS # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +16130..16139 ; AS # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE -# Total code points: 174 +# Total code points: 214 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8e82c59c3..780987289 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-10, 04:28:12 GMT +# Date: 2023-11-10, 20:57:24 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -35720,6 +35720,64 @@ FFFD ; REPLACEMENT CHARACTER 14644 ; ANATOLIAN HIEROGLYPH A528 14645 ; ANATOLIAN HIEROGLYPH A529 14646 ; ANATOLIAN HIEROGLYPH A530 +16100 ; GURUNG KHEMA LETTER A +16101 ; GURUNG KHEMA LETTER KA +16102 ; GURUNG KHEMA LETTER KHA +16103 ; GURUNG KHEMA LETTER GA +16104 ; GURUNG KHEMA LETTER GHA +16105 ; GURUNG KHEMA LETTER NGA +16106 ; GURUNG KHEMA LETTER CA +16107 ; GURUNG KHEMA LETTER CHA +16108 ; GURUNG KHEMA LETTER JA +16109 ; GURUNG KHEMA LETTER JHA +1610A ; GURUNG KHEMA LETTER HA +1610B ; GURUNG KHEMA LETTER TTA +1610C ; GURUNG KHEMA LETTER TTHA +1610D ; GURUNG KHEMA LETTER DDA +1610E ; GURUNG KHEMA LETTER DDHA +1610F ; GURUNG KHEMA LETTER VA +16110 ; GURUNG KHEMA LETTER TA +16111 ; GURUNG KHEMA LETTER THA +16112 ; GURUNG KHEMA LETTER DA +16113 ; GURUNG KHEMA LETTER DHA +16114 ; GURUNG KHEMA LETTER NA +16115 ; GURUNG KHEMA LETTER PA +16116 ; GURUNG KHEMA LETTER PHA +16117 ; GURUNG KHEMA LETTER BA +16118 ; GURUNG KHEMA LETTER BHA +16119 ; GURUNG KHEMA LETTER MA +1611A ; GURUNG KHEMA LETTER YA +1611B ; GURUNG KHEMA LETTER RA +1611C ; GURUNG KHEMA LETTER LA +1611D ; GURUNG KHEMA LETTER SA +1611E ; GURUNG KHEMA VOWEL SIGN AA +1611F ; GURUNG KHEMA VOWEL SIGN I +16120 ; GURUNG KHEMA VOWEL SIGN II +16121 ; GURUNG KHEMA VOWEL SIGN U +16122 ; GURUNG KHEMA VOWEL SIGN UU +16123 ; GURUNG KHEMA VOWEL SIGN E +16124 ; GURUNG KHEMA VOWEL SIGN EE +16125 ; GURUNG KHEMA VOWEL SIGN AI +16126 ; GURUNG KHEMA VOWEL SIGN O +16127 ; GURUNG KHEMA VOWEL SIGN OO +16128 ; GURUNG KHEMA VOWEL SIGN AU +16129 ; GURUNG KHEMA VOWEL LENGTH MARK +1612A ; GURUNG KHEMA CONSONANT SIGN MEDIAL YA +1612B ; GURUNG KHEMA CONSONANT SIGN MEDIAL VA +1612C ; GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D ; GURUNG KHEMA SIGN ANUSVARA +1612E ; GURUNG KHEMA CONSONANT SIGN MEDIAL RA +1612F ; GURUNG KHEMA SIGN THOLHOMA +16130 ; GURUNG KHEMA DIGIT ZERO +16131 ; GURUNG KHEMA DIGIT ONE +16132 ; GURUNG KHEMA DIGIT TWO +16133 ; GURUNG KHEMA DIGIT THREE +16134 ; GURUNG KHEMA DIGIT FOUR +16135 ; GURUNG KHEMA DIGIT FIVE +16136 ; GURUNG KHEMA DIGIT SIX +16137 ; GURUNG KHEMA DIGIT SEVEN +16138 ; GURUNG KHEMA DIGIT EIGHT +16139 ; GURUNG KHEMA DIGIT NINE 16800 ; BAMUM LETTER PHASE-A NGKUE MFON 16801 ; BAMUM LETTER PHASE-A GBIEE FON 16802 ; BAMUM LETTER PHASE-A PON MFON PIPAEMGBIEE @@ -45305,6 +45363,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150942 +# Total code points: 151000 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 3a4bca04d..843ee6d9d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ # DerivedNumericType-16.0.0.txt -# Date: 2023-11-06, 03:48:59 GMT +# Date: 2023-11-10, 20:57:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -279,6 +279,7 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Decimal # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE @@ -292,6 +293,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 750 +# Total code points: 760 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index 37bade8a8..d057ac4a1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ # DerivedNumericValues-16.0.0.txt -# Date: 2023-11-06, 03:49:00 GMT +# Date: 2023-11-10, 20:57:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -111,6 +111,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO 11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO 11F50 ; 0.0 ; ; 0 # Nd KAWI DIGIT ZERO +16130 ; 0.0 ; ; 0 # Nd GURUNG KHEMA DIGIT ZERO 16A60 ; 0.0 ; ; 0 # Nd MRO DIGIT ZERO 16AC0 ; 0.0 ; ; 0 # Nd TANGSA DIGIT ZERO 16B50 ; 0.0 ; ; 0 # Nd PAHAWH HMONG DIGIT ZERO @@ -133,7 +134,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 95 +# Total code points: 96 # ================================================ @@ -536,6 +537,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 12434 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE BURU 1244F ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE BAN2 12458 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE ESHE3 +16131 ; 1.0 ; ; 1 # Nd GURUNG KHEMA DIGIT ONE 16A61 ; 1.0 ; ; 1 # Nd MRO DIGIT ONE 16AC1 ; 1.0 ; ; 1 # Nd TANGSA DIGIT ONE 16B51 ; 1.0 ; ; 1 # Nd PAHAWH HMONG DIGIT ONE @@ -567,7 +569,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 151 +# Total code points: 152 # ================================================ @@ -697,6 +699,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 12450 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO BAN2 12456 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN NIGIDAMIN 12459 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ESHE3 +16132 ; 2.0 ; ; 2 # Nd GURUNG KHEMA DIGIT TWO 16A62 ; 2.0 ; ; 2 # Nd MRO DIGIT TWO 16AC2 ; 2.0 ; ; 2 # Nd TANGSA DIGIT TWO 16B52 ; 2.0 ; ; 2 # Nd PAHAWH HMONG DIGIT TWO @@ -728,7 +731,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 153 +# Total code points: 154 # ================================================ @@ -850,6 +853,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 1244B ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH TENU 12451 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE BAN2 12457 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN NIGIDAESH +16133 ; 3.0 ; ; 3 # Nd GURUNG KHEMA DIGIT THREE 16A63 ; 3.0 ; ; 3 # Nd MRO DIGIT THREE 16AC3 ; 3.0 ; ; 3 # Nd TANGSA DIGIT THREE 16B53 ; 3.0 ; ; 3 # Nd PAHAWH HMONG DIGIT THREE @@ -883,7 +887,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 151 +# Total code points: 152 # ================================================ @@ -1000,6 +1004,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 1244C ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH TENU 12452..12453 ; 4.0 ; ; 4 # Nl [2] CUNEIFORM NUMERIC SIGN FOUR BAN2..CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM 12469 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U VARIANT FORM +16134 ; 4.0 ; ; 4 # Nd GURUNG KHEMA DIGIT FOUR 16A64 ; 4.0 ; ; 4 # Nd MRO DIGIT FOUR 16AC4 ; 4.0 ; ; 4 # Nd TANGSA DIGIT FOUR 16B54 ; 4.0 ; ; 4 # Nd PAHAWH HMONG DIGIT FOUR @@ -1031,7 +1036,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 142 +# Total code points: 143 # ================================================ @@ -1151,6 +1156,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1244D ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH TENU 12454..12455 ; 5.0 ; ; 5 # Nl [2] CUNEIFORM NUMERIC SIGN FIVE BAN2..CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM 1246A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U VARIANT FORM +16135 ; 5.0 ; ; 5 # Nd GURUNG KHEMA DIGIT FIVE 16A65 ; 5.0 ; ; 5 # Nd MRO DIGIT FIVE 16AC5 ; 5.0 ; ; 5 # Nd TANGSA DIGIT FIVE 16B55 ; 5.0 ; ; 5 # Nd PAHAWH HMONG DIGIT FIVE @@ -1181,7 +1187,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 140 +# Total code points: 141 # ================================================ @@ -1288,6 +1294,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 12440 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX VARIANT FORM ASH9 1244E ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH TENU 1246B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U VARIANT FORM +16136 ; 6.0 ; ; 6 # Nd GURUNG KHEMA DIGIT SIX 16A66 ; 6.0 ; ; 6 # Nd MRO DIGIT SIX 16AC6 ; 6.0 ; ; 6 # Nd TANGSA DIGIT SIX 16B56 ; 6.0 ; ; 6 # Nd PAHAWH HMONG DIGIT SIX @@ -1316,7 +1323,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 124 +# Total code points: 125 # ================================================ @@ -1421,6 +1428,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 12429 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN SHAR2 12441..12443 ; 7.0 ; ; 7 # Nl [3] CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN3..CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN B 1246C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U VARIANT FORM +16137 ; 7.0 ; ; 7 # Nd GURUNG KHEMA DIGIT SEVEN 16A67 ; 7.0 ; ; 7 # Nd MRO DIGIT SEVEN 16AC7 ; 7.0 ; ; 7 # Nd TANGSA DIGIT SEVEN 16B57 ; 7.0 ; ; 7 # Nd PAHAWH HMONG DIGIT SEVEN @@ -1449,7 +1457,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 124 +# Total code points: 125 # ================================================ @@ -1551,6 +1559,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1242A ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT SHAR2 12444..12445 ; 8.0 ; ; 8 # Nl [2] CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU..CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU3 1246D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U VARIANT FORM +16138 ; 8.0 ; ; 8 # Nd GURUNG KHEMA DIGIT EIGHT 16A68 ; 8.0 ; ; 8 # Nd MRO DIGIT EIGHT 16AC8 ; 8.0 ; ; 8 # Nd TANGSA DIGIT EIGHT 16B58 ; 8.0 ; ; 8 # Nd PAHAWH HMONG DIGIT EIGHT @@ -1578,7 +1587,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 119 +# Total code points: 120 # ================================================ @@ -1683,6 +1692,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1242B ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE SHAR2 12446..12449 ; 9.0 ; ; 9 # Nl [4] CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU..CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU A 1246E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +16139 ; 9.0 ; ; 9 # Nd GURUNG KHEMA DIGIT NINE 16A69 ; 9.0 ; ; 9 # Nd MRO DIGIT NINE 16AC9 ; 9.0 ; ; 9 # Nd TANGSA DIGIT NINE 16B59 ; 9.0 ; ; 9 # Nd PAHAWH HMONG DIGIT NINE @@ -1711,7 +1721,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 125 +# Total code points: 126 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 13779ae05..321cb20b1 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -298,6 +298,7 @@ public enum Block_Values implements Named { Gujarati("Gujarati"), Gunjala_Gondi("Gunjala_Gondi"), Gurmukhi("Gurmukhi"), + Gurung_Khema("Gurung_Khema"), Halfwidth_And_Fullwidth_Forms("Half_And_Full_Forms"), Combining_Half_Marks("Half_Marks"), Hangul_Syllables("Hangul"), @@ -1753,6 +1754,7 @@ public enum Script_Values implements Named { Grantha("Gran"), Greek("Grek"), Gujarati("Gujr"), + Gurung_Khema("Gukh"), Gurmukhi("Guru"), Hangul("Hang"), Han("Hani"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 07f528d8e..f4344e517 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -971,9 +971,9 @@ File: IndicPositionalCategory # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, Limbu, -# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, Kannada, +# Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, +# Limbu, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, # Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, # Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, # Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, @@ -1078,9 +1078,9 @@ File: IndicSyllabicCategory # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, Limbu, -# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, Kannada, +# Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Kirat Rai, Lao, Lepcha, +# Limbu, Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, # Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, # Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, # Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 8f1d6a5dd..35f065386 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -121,6 +121,7 @@ Greek_Ext ; Greek_Extended Gujarati ; Gujarati Gunjala_Gondi ; Gunjala_Gondi Gurmukhi ; Gurmukhi +Gurung_Khema ; Gurung_Khema Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms Compat_Jamo ; Hangul_Compatibility_Jamo Jamo ; Hangul_Jamo diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index a9978cd9b..e02375280 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -557,7 +557,7 @@ Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000- \p{LB=ID} ⊃ $IDInclusions \p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions - [\u20C0-\u20CF]] -Let $BrahmicLineBreaking = [\p{sc=Balinese}\p{sc=Batak}\p{sc=Brahmi}\p{sc=Cham}\p{sc=DivesAkuru}\p{sc=Grantha}\p{sc=Javanese}\p{sc=Makasar}\p{sc=Kawi}\p{sc=Cham}\p{sc=Makasar}\p{sc=Tulu_Tigalari}] +Let $BrahmicLineBreaking = [\p{sc=Balinese}\p{sc=Batak}\p{sc=Brahmi}\p{sc=Cham}\p{sc=DivesAkuru}\p{sc=Grantha}\p{sc=Javanese}\p{sc=Makasar}\p{sc=Kawi}\p{sc=Cham}\p{sc=Makasar}\p{sc=Tulu_Tigalari}\p{sc=Gurung_Khema}] Let $VFScripts = [\p{sc=Batak}] Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288\U00013379\U0001342F\U00013437\U0001343C\U0001343E\U000145CE\U0001E95E-\U0001E95F]