From 1f791fe19c7b88e190c03aee67de3f20e6a9564b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 9 Nov 2023 20:26:45 -0800 Subject: [PATCH] Kawi sign nukta (#567) --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 14 +++++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- .../data/ucd/dev/IndicPositionalCategory.txt | 3 ++- .../data/ucd/dev/IndicSyllabicCategory.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 5 +++-- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + unicodetools/data/ucd/dev/VerticalOrientation.txt | 3 ++- .../ucd/dev/auxiliary/GraphemeBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../data/ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedJoiningType.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 +++-- .../org/unicode/text/UCD/UnicodeInvariantTest.txt | 6 ++++++ 21 files changed, 71 insertions(+), 42 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 8a96c5734..d33496325 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-06, 03:48:21 GMT +# Date: 2023-11-10, 01:47:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2026,6 +2026,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11F5A ; 16.0 # KAWI SIGN NUKTA 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE @@ -2043,6 +2044,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 1050 +# Total code points: 1051 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index bc819fc33..6f0672daa 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-06, 03:48:52 GMT +# Date: 2023-11-10, 01:48:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3424,6 +3424,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Case_Ignorable # Mn KAWI VOWEL SIGN EU 11F42 ; Case_Ignorable # Mn KAWI CONJOINER +11F5A ; Case_Ignorable # Mn KAWI SIGN NUKTA 13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED @@ -3479,7 +3480,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2722 +# Total code points: 2723 # ================================================ @@ -8124,6 +8125,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11F41 ; ID_Continue # Mc KAWI SIGN KILLER 11F42 ; ID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; ID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; ID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; ID_Continue # Lo LISU LETTER YHA 12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -8310,7 +8312,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140413 +# Total code points: 140414 # ================================================ @@ -10277,6 +10279,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11F41 ; XID_Continue # Mc KAWI SIGN KILLER 11F42 ; XID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; XID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; XID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; XID_Continue # Lo LISU LETTER YHA 12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -10463,7 +10466,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140394 +# Total code points: 140395 # ================================================ @@ -10864,6 +10867,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11F36..11F3A ; Grapheme_Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Grapheme_Extend # Mn KAWI VOWEL SIGN EU 11F42 ; Grapheme_Extend # Mn KAWI CONJOINER +11F5A ; Grapheme_Extend # Mn KAWI SIGN NUKTA 13440 ; Grapheme_Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Grapheme_Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -10903,7 +10907,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2133 +# Total code points: 2134 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 384e6e62c..c53d2b8ac 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-06, 03:49:00 GMT +# Date: 2023-11-10, 01:48:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2271,6 +2271,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 11F42 ; N # Mn KAWI CONJOINER 11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; N # Mn KAWI SIGN NUKTA 11FB0 ; N # Lo LISU LETTER YHA 11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index bad2697b9..8afb4936d 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-16.0.0.txt -# Date: 2023-11-09, 18:48:36 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -592,6 +592,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F02 ; Top # Lo KAWI SIGN REPHA 11F36..11F37 ; Top # Mn [2] KAWI VOWEL SIGN I..KAWI VOWEL SIGN II 11F40 ; Top # Mn KAWI VOWEL SIGN EU +11F5A ; Top # Mn KAWI SIGN NUKTA # Indic_Positional_Category=Bottom diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 0bc45f818..455b67ffa 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-11-02, 22:55:33 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -251,6 +251,7 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 1183A ; Nukta # Mn DOGRA SIGN NUKTA 11943 ; Nukta # Mn DIVES AKURU SIGN NUKTA 11D42 ; Nukta # Mn MASARAM GONDI SIGN NUKTA +11F5A ; Nukta # Mn KAWI SIGN NUKTA # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3dbae2171..835e639c8 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-06, 03:49:01 GMT +# Date: 2023-11-10, 01:48:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3150,6 +3150,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11F43..11F44 ; BA # Po [2] KAWI DANDA..KAWI DOUBLE DANDA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; AS # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; CM # Mn KAWI SIGN NUKTA 11FB0 ; AL # Lo LISU LETTER YHA 11FC0..11FD4 ; AL # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; AL # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 6ed3e075b..ce1232123 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-11-06, 03:49:06 GMT +# Date: 2023-11-10, 01:58:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1088,6 +1088,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11F5A ; Diacritic # Mn KAWI SIGN NUKTA 13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM @@ -1113,7 +1114,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1152 +# Total code points: 1153 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index d41e316d4..bcecfd3d0 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-06, 03:49:30 GMT +# Date: 2023-11-10, 01:48:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3026,8 +3026,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 11F42 ; Kawi # Mn KAWI CONJOINER 11F43..11F4F ; Kawi # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; Kawi # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; Kawi # Mn KAWI SIGN NUKTA -# Total code points: 86 +# Total code points: 87 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f65dbdfdb..ac5a58311 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -21898,6 +21898,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11F57;KAWI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 11F58;KAWI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 11F59;KAWI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11F5A;KAWI SIGN NUKTA;Mn;0;NSM;;;;;N;;;;; 11FB0;LISU LETTER YHA;Lo;0;L;;;;;N;;;;; 11FC0;TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH;No;0;L;;;;1/320;N;;;;; 11FC1;TAMIL FRACTION ONE ONE-HUNDRED-AND-SIXTIETH;No;0;L;;;;1/160;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 602c20492..805cfd6a4 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-06, 03:49:34 GMT +# Date: 2023-11-10, 01:48:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2099,6 +2099,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11F42 ; R # Mn KAWI CONJOINER 11F43..11F4F ; R # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; R # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; R # Mn KAWI SIGN NUKTA 11FB0 ; R # Lo LISU LETTER YHA 11FC0..11FD4 ; R # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; R # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index f2ee0f839..c65c8497c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-11-02, 22:55:32 GMT +# Date: 2023-11-10, 01:48:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -422,6 +422,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -462,7 +463,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2138 +# Total code points: 2139 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 151c99c5e..90517db87 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-06, 03:49:31 GMT +# Date: 2023-11-10, 01:48:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -530,6 +530,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F41 ; Extend # Mc KAWI SIGN KILLER 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -571,7 +572,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2559 +# Total code points: 2560 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 09cb979e4..32297919e 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-06, 03:49:34 GMT +# Date: 2023-11-10, 01:48:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -566,6 +566,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F40 ; Extend # Mn KAWI VOWEL SIGN EU 11F41 ; Extend # Mc KAWI SIGN KILLER 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -608,7 +609,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2563 +# Total code points: 2564 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 145c5b131..503a90d7a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-06, 03:48:48 GMT +# Date: 2023-11-10, 01:47:58 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1199,8 +1199,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 819486 code points not listed here. -# Total code points: 1095545 +# The above property value applies to 819485 code points not listed here. +# Total code points: 1095544 # ================================================ @@ -2344,6 +2344,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; NSM # Mn KAWI VOWEL SIGN EU 11F42 ; NSM # Mn KAWI CONJOINER +11F5A ; NSM # Mn KAWI SIGN NUKTA 13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -2380,7 +2381,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2001 +# Total code points: 2002 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 0f4b855b3..ea14d2d3d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-06, 03:48:51 GMT +# Date: 2023-11-10, 01:48:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1760,6 +1760,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11F40 ; 0 # Mn KAWI VOWEL SIGN EU 11F43..11F4F ; 0 # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; 0 # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; 0 # Mn KAWI SIGN NUKTA 11FB0 ; 0 # Lo LISU LETTER YHA 11FC0..11FD4 ; 0 # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; 0 # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2035,7 +2036,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 825716 code points not listed here. +# The above property value applies to 825715 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 66b62c669..86508516a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-06, 03:48:54 GMT +# Date: 2023-11-10, 01:48:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1793,6 +1793,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11F42 ; N # Mn KAWI CONJOINER 11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL 11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; N # Mn KAWI SIGN NUKTA 11FB0 ; N # Lo LISU LETTER YHA 11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2075,7 +2076,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765234 code points not listed here. +# The above property value applies to 765233 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 35d22b5e1..97180eda6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-06, 03:48:55 GMT +# Date: 2023-11-10, 01:48:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -538,7 +538,7 @@ FFFE..FFFF ; Cn # [2] .. 11EF9..11EFF ; Cn # [7] .. 11F11 ; Cn # 11F3B..11F3D ; Cn # [3] .. -11F5A..11FAF ; Cn # [86] .. +11F5B..11FAF ; Cn # [85] .. 11FB1..11FBF ; Cn # [15] .. 11FF2..11FFE ; Cn # [13] .. 1239A..123FF ; Cn # [102] .. @@ -735,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 823668 +# Total code points: 823667 # ================================================ @@ -3007,6 +3007,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11F36..11F3A ; Mn # [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Mn # KAWI VOWEL SIGN EU 11F42 ; Mn # KAWI CONJOINER +11F5A ; Mn # KAWI SIGN NUKTA 13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -3043,7 +3044,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1993 +# Total code points: 1994 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 1df4e026a..72e3386c8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-11-02, 22:55:27 GMT +# Date: 2023-11-10, 01:48:06 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -531,6 +531,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 11F36..11F3A ; T # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; T # Mn KAWI VOWEL SIGN EU 11F42 ; T # Mn KAWI CONJOINER +11F5A ; T # Mn KAWI SIGN NUKTA 13430..1343F ; T # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; T # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; T # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED @@ -573,6 +574,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2158 +# Total code points: 2159 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index bcd0d769c..d3bb1d2a2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-06, 03:51:48 GMT +# Date: 2023-11-10, 01:48:06 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 761788 code points not listed here. -# Total code points: 899256 +# The above property value applies to 761787 code points not listed here. +# Total code points: 899255 # ================================================ @@ -2320,6 +2320,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11F3E..11F3F ; CM # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; CM # Mn KAWI VOWEL SIGN EU 11F41 ; CM # Mc KAWI SIGN KILLER +11F5A ; CM # Mn KAWI SIGN NUKTA 13440 ; CM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; CM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE @@ -2363,7 +2364,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2438 +# Total code points: 2439 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8434fcab9..cd80a4355 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-06, 03:48:57 GMT +# Date: 2023-11-10, 01:48:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -32563,6 +32563,7 @@ FFFD ; REPLACEMENT CHARACTER 11F57 ; KAWI DIGIT SEVEN 11F58 ; KAWI DIGIT EIGHT 11F59 ; KAWI DIGIT NINE +11F5A ; KAWI SIGN NUKTA 11FB0 ; LISU LETTER YHA 11FC0 ; TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH 11FC1 ; TAMIL FRACTION ONE ONE-HUNDRED-AND-SIXTIETH @@ -45226,6 +45227,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150863 +# Total code points: 150864 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 3c077270c..14d3e096d 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -540,6 +540,12 @@ Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MA # See 177-CXX. \p{name=/COMBINING .* LETTER/} ⊆ [\p{Alphabetic}\p{Diacritic}] +# Nuktas should probably be diacritic, but as of 15.1 this is only the case of +# those that have NUKTA in their name. +# See https://github.com/unicode-org/properties/issues/195#issuecomment-1804962555. +Let $nonDiacriticNuktas = [\u1BE6\U00010A38\U00010A39\U00010A3A\U0001133B] +[\p{InSc=Nukta} - \p{Diacritic}] = $nonDiacriticNuktas + ########################## # LineBreak property ##########################