From 4244786ceea77e42a3487e8c050761b372a06974 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:07:22 +0200 Subject: [PATCH 01/10] UnicodeData.txt from L2/23-206 --- unicodetools/data/ucd/dev/UnicodeData.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 20da9b7d8..c3bf25cd2 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,8 @@ +1AD9;COMBINING SHARP SIGN;Mn;230;NSM;;;;;N;;;;; +1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; +1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1ADC;COMBINING FALLING DIAGONAL DIAERESIS;Mn;230;NSM;;;;;N;;;;; +1ADD;COMBINING DOT-AND-RING BELOW;Mn;230;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From ce9e88cd594ae994eaed82d4e6589d54cadb1d1a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:09:50 +0200 Subject: [PATCH 02/10] LB=CM --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index daea5c0ac..6a8884152 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-24, 21:03:44 GMT +# Date: 2023-10-25, 10:08:51 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -777,6 +777,7 @@ 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; CM # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B05..1B33 ; AK # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA From 5dd4516e0be7e45ab297947c6d0221b6a2e167f6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:13:59 +0200 Subject: [PATCH 03/10] Scripts.txt --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 376dac297..3c8cb13ef 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +1AD9..1ADD ; Inherited # Scripts-16.0.0.txt # Date: 2023-10-24, 21:04:04 GMT # © 2023 Unicode®, Inc. From 83c033b1c7ac817fe36d1db70ab6758cf8c27bc9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:16:05 +0200 Subject: [PATCH 04/10] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 17 +++++++++++------ unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/NormalizationTest.txt | 12 +++++++++++- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 10 +++++----- .../data/ucd/dev/VerticalOrientation.txt | 3 ++- .../ucd/dev/auxiliary/GraphemeBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++++---- .../ucd/dev/extracted/DerivedJoiningType.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 9 +++++++-- 17 files changed, 80 insertions(+), 47 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index a0de787d6..83fabda3f 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-24, 21:03:11 GMT +# Date: 2023-10-25, 10:15:08 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2012,6 +2012,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 0897 ; 16.0 # ARABIC PEPET 0C5C ; 16.0 # TELUGU ARCHAIC SHRII 0CDC ; 16.0 # KANNADA ARCHAIC SHRII +1AD9..1ADD ; 16.0 # [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO @@ -2028,6 +2029,6 @@ A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK -# Total code points: 246 +# Total code points: 251 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 5d2157ef5..f67596c60 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-24, 21:03:37 GMT +# Date: 2023-10-25, 10:15:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3168,6 +3168,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; Case_Ignorable # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3467,7 +3468,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2718 +# Total code points: 2723 # ================================================ @@ -7403,6 +7404,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; ID_Continue # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8283,7 +8285,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140344 +# Total code points: 140349 # ================================================ @@ -9541,6 +9543,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; XID_Continue # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10426,7 +10429,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140325 +# Total code points: 140330 # ================================================ @@ -10643,6 +10646,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ..;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -6144,6 +6139,11 @@ 1ACC;COMBINING LATIN SMALL LETTER INSULAR G;Mn;230;NSM;;;;;N;;;;; 1ACD;COMBINING LATIN SMALL LETTER INSULAR R;Mn;230;NSM;;;;;N;;;;; 1ACE;COMBINING LATIN SMALL LETTER INSULAR T;Mn;230;NSM;;;;;N;;;;; +1AD9;COMBINING SHARP SIGN;Mn;230;NSM;;;;;N;;;;; +1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; +1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1ADC;COMBINING FALLING DIAGONAL DIAERESIS;Mn;230;NSM;;;;;N;;;;; +1ADD;COMBINING DOT-AND-RING BELOW;Mn;230;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 08b73924c..07152d190 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-24, 21:04:08 GMT +# Date: 2023-10-25, 10:16:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -741,6 +741,7 @@ 1AB0..1ABD ; R # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; R # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; R # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; R # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; R # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; R # Mc BALINESE SIGN BISAH 1B05..1B33 ; R # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 058e3bb76..2983b1ee7 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:03:44 GMT +# Date: 2023-10-25, 10:15:35 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -238,6 +238,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; Extend # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -461,7 +462,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2139 +# Total code points: 2144 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index c1fd23e22..8afa1f417 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:04:05 GMT +# Date: 2023-10-25, 10:16:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -248,6 +248,7 @@ 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; Extend # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -569,7 +570,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2559 +# Total code points: 2564 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 62f655d99..23e333fb2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:04:08 GMT +# Date: 2023-10-25, 10:16:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -284,6 +284,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; Extend # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -606,7 +607,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2563 +# Total code points: 2568 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index b3216b731..5b81eb834 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-24, 21:03:35 GMT +# Date: 2023-10-25, 10:15:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1192,8 +1192,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820290 code points not listed here. -# Total code points: 1096260 +# The above property value applies to 820285 code points not listed here. +# Total code points: 1096255 # ================================================ @@ -2161,6 +2161,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; NSM # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2370,7 +2371,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2002 +# Total code points: 2007 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c3ef6239a..fe1d165c9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-24, 21:03:36 GMT +# Date: 2023-10-25, 10:15:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2026,8 +2026,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826520 code points not listed here. -# Total code points: 1113182 +# The above property value applies to 826515 code points not listed here. +# Total code points: 1113177 # ================================================ @@ -2699,6 +2699,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; 230 # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2765,7 +2766,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 522 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index b9c329c1d..485a5a209 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-24, 21:03:39 GMT +# Date: 2023-10-25, 10:15:30 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -692,6 +692,7 @@ 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; N # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2065,7 +2066,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 766038 code points not listed here. +# The above property value applies to 766033 code points not listed here. # Total code points: 792618 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2a5ac8419..55f026ec2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-24, 21:03:39 GMT +# Date: 2023-10-25, 10:15:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -228,7 +228,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1ACF..1AD8 ; Cn # [10] .. +1ADE..1AFF ; Cn # [34] .. 1B4D..1B4F ; Cn # [3] .. 1B7F ; Cn # 1BF4..1BFB ; Cn # [8] .. @@ -734,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824472 +# Total code points: 824467 # ================================================ @@ -2829,6 +2830,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; Mn # [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3035,7 +3037,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1994 +# Total code points: 1999 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 4ae6df297..956c5dd5d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-10-24, 21:03:40 GMT +# Date: 2023-10-25, 10:15:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -350,6 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; T # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -572,6 +573,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2159 +# Total code points: 2164 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 0581bb420..c40f25caa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-24, 21:03:41 GMT +# Date: 2023-10-25, 10:15:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762480 code points not listed here. -# Total code points: 899948 +# The above property value applies to 762475 code points not listed here. +# Total code points: 899943 # ================================================ @@ -2046,6 +2046,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD9..1ADD ; CM # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2358,7 +2359,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2438 +# Total code points: 2443 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index b9437c06f..0c6c9bdec 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-24, 21:03:41 GMT +# Date: 2023-10-25, 10:15:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -6114,6 +6114,11 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1AD9 ; COMBINING SHARP SIGN +1ADA ; COMBINING FLAT SIGN +1ADB ; COMBINING DOWN TACK ABOVE +1ADC ; COMBINING FALLING DIAGONAL DIAERESIS +1ADD ; COMBINING DOT-AND-RING BELOW 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -44422,6 +44427,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150059 +# Total code points: 150064 # EOF From 73d172d4854db56e3478c22e7e483f965d7e43f3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:18:23 +0200 Subject: [PATCH 05/10] =?UTF-8?q?Let=E2=80=99s=20say=20that=20the=20diacri?= =?UTF-8?q?tics=20are=20diacritics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index a77d2d267..dfd7e3472 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1AD9..1ADD; Diacritic # PropList-16.0.0.txt # Date: 2023-10-24, 21:03:48 GMT # © 2023 Unicode®, Inc. From 755b3063338d38aa369c1af783d77baf4b4aa7dc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:20:04 +0200 Subject: [PATCH 06/10] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index dfd7e3472..437e75826 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -1AD9..1ADD; Diacritic # PropList-16.0.0.txt -# Date: 2023-10-24, 21:03:48 GMT +# Date: 2023-10-25, 10:19:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -968,6 +967,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY 1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT +1AD9..1ADD ; Diacritic # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW 1B34 ; Diacritic # Mn BALINESE SIGN REREKAN 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG @@ -1111,7 +1111,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1152 +# Total code points: 1157 # ================================================ From 74fe9e83ff3c6414a0357cd42a77d1b2b939a801 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 18 Dec 2023 17:53:44 +0100 Subject: [PATCH 07/10] UnicodeData.txt from L2/23-206R --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 83a973b88..a2fc09a27 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -6143,7 +6143,7 @@ 1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; 1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; 1ADC;COMBINING FALLING DIAGONAL DIAERESIS;Mn;230;NSM;;;;;N;;;;; -1ADD;COMBINING DOT-AND-RING BELOW;Mn;230;NSM;;;;;N;;;;; +1ADD;COMBINING DOT-AND-RING BELOW;Mn;220;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; From 254f9875e91993ecadf41a67bc96436fb3abdeee Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 18 Dec 2023 17:56:25 +0100 Subject: [PATCH 08/10] Regenerate UCD --- unicodetools/data/ucd/dev/NormalizationTest.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index b5f763134..2b99ffcae 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2023-10-25, 10:15:39 GMT +# Date: 2023-12-18, 16:55:39 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -18032,8 +18032,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1ADB 0315 0300 05AE 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062; # (a◌᫛◌̕◌̀◌֮b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOWN TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1ADC 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062; # (a◌̕◌̀◌֮◌᫜b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING FALLING DIAGONAL DIAERESIS, LATIN SMALL LETTER B 0061 1ADC 0315 0300 05AE 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062; # (a◌᫜◌̕◌̀◌֮b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING FALLING DIAGONAL DIAERESIS, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B -0061 0315 0300 05AE 1ADD 0062;00E0 05AE 1ADD 0315 0062;0061 05AE 0300 1ADD 0315 0062;00E0 05AE 1ADD 0315 0062;0061 05AE 0300 1ADD 0315 0062; # (a◌̕◌̀◌֮◌᫝b; à◌֮◌᫝◌̕b; a◌֮◌̀◌᫝◌̕b; à◌֮◌᫝◌̕b; a◌֮◌̀◌᫝◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOT-AND-RING BELOW, LATIN SMALL LETTER B -0061 1ADD 0315 0300 05AE 0062;0061 05AE 1ADD 0300 0315 0062;0061 05AE 1ADD 0300 0315 0062;0061 05AE 1ADD 0300 0315 0062;0061 05AE 1ADD 0300 0315 0062; # (a◌᫝◌̕◌̀◌֮b; a◌֮◌᫝◌̀◌̕b; a◌֮◌᫝◌̀◌̕b; a◌֮◌᫝◌̀◌̕b; a◌֮◌᫝◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOT-AND-RING BELOW, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 1ADD 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062; # (a◌֚◌̖◌᷺◌᫝b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOT-AND-RING BELOW, LATIN SMALL LETTER B +0061 1ADD 059A 0316 1DFA 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062; # (a◌᫝◌֚◌̖◌᷺b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOT-AND-RING BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 3099 093C 16FF0 1B34 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062; # (a◌゙◌𖿰़◌᬴b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, BALINESE SIGN REREKAN, LATIN SMALL LETTER B 0061 1B34 3099 093C 16FF0 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062; # (a◌᬴◌゙◌𖿰़b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; ) LATIN SMALL LETTER A, BALINESE SIGN REREKAN, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, LATIN SMALL LETTER B 0061 05B0 094D 3099 1B44 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062; # (a◌ְ◌्◌゙᭄b; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, BALINESE ADEG ADEG, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index fe1d165c9..884574e57 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-25, 10:15:28 GMT +# Date: 2023-12-18, 16:55:13 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2557,6 +2557,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1ABF..1AC0 ; 220 # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW +1ADD ; 220 # Mn COMBINING DOT-AND-RING BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2589,7 +2590,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 183 # ================================================ @@ -2699,7 +2700,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T -1AD9..1ADD ; 230 # Mn [5] COMBINING SHARP SIGN..COMBINING DOT-AND-RING BELOW +1AD9..1ADC ; 230 # Mn [4] COMBINING SHARP SIGN..COMBINING FALLING DIAGONAL DIAERESIS 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2766,7 +2767,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 522 +# Total code points: 521 # ================================================ From 2f32b84fbb148afa37f817bf5d118ff934788c56 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Apr 2024 23:11:29 +0200 Subject: [PATCH 09/10] Approve the name change for provisionally assigned character U+1ADC COMBINING FALLING DIAGONAL DIAERESIS to COMBINING DIAERESIS WITH RAISED LEFT DOT. --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 943901a83..f2bf5968e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -6140,7 +6140,7 @@ 1AD9;COMBINING SHARP SIGN;Mn;230;NSM;;;;;N;;;;; 1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; 1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; -1ADC;COMBINING FALLING DIAGONAL DIAERESIS;Mn;230;NSM;;;;;N;;;;; +1ADC;COMBINING DIAERESIS WITH RAISED LEFT DOT;Mn;230;NSM;;;;;N;;;;; 1ADD;COMBINING DOT-AND-RING BELOW;Mn;220;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; From 667e8e740059376b27f07cf43b9f7ec577f7a0ff Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Apr 2024 23:35:51 +0200 Subject: [PATCH 10/10] Regenerate UCD --- unicodetools/data/ucd/dev/NormalizationTest.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 4 ++-- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index fa8ea55a5..15b1f76a6 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-04-25, 21:08:16 GMT +# Date: 2024-04-25, 21:15:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -18104,8 +18104,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1ADA 0315 0300 05AE 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062; # (a◌᫚◌̕◌̀◌֮b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING FLAT SIGN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1ADB 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062; # (a◌̕◌̀◌֮◌᫛b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOWN TACK ABOVE, LATIN SMALL LETTER B 0061 1ADB 0315 0300 05AE 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062; # (a◌᫛◌̕◌̀◌֮b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOWN TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B -0061 0315 0300 05AE 1ADC 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062; # (a◌̕◌̀◌֮◌᫜b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING FALLING DIAGONAL DIAERESIS, LATIN SMALL LETTER B -0061 1ADC 0315 0300 05AE 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062; # (a◌᫜◌̕◌̀◌֮b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING FALLING DIAGONAL DIAERESIS, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADC 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062; # (a◌̕◌̀◌֮◌᫜b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DIAERESIS WITH RAISED LEFT DOT, LATIN SMALL LETTER B +0061 1ADC 0315 0300 05AE 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062; # (a◌᫜◌̕◌̀◌֮b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DIAERESIS WITH RAISED LEFT DOT, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 1ADD 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062; # (a◌֚◌̖◌᷺◌᫝b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOT-AND-RING BELOW, LATIN SMALL LETTER B 0061 1ADD 059A 0316 1DFA 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062; # (a◌᫝◌֚◌̖◌᷺b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOT-AND-RING BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 3099 093C 16FF0 1B34 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062; # (a◌゙◌𖿰़◌᬴b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, BALINESE SIGN REREKAN, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 2487e12a2..e52ffd39c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-25, 21:07:47 GMT +# Date: 2024-04-25, 21:15:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2738,7 +2738,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T -1AD9..1ADC ; 230 # Mn [4] COMBINING SHARP SIGN..COMBINING FALLING DIAGONAL DIAERESIS +1AD9..1ADC ; 230 # Mn [4] COMBINING SHARP SIGN..COMBINING DIAERESIS WITH RAISED LEFT DOT 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 2d957d277..af1d220fd 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-04-25, 21:07:58 GMT +# Date: 2024-04-25, 21:15:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -6115,7 +6115,7 @@ 1AD9 ; COMBINING SHARP SIGN 1ADA ; COMBINING FLAT SIGN 1ADB ; COMBINING DOWN TACK ABOVE -1ADC ; COMBINING FALLING DIAGONAL DIAERESIS +1ADC ; COMBINING DIAERESIS WITH RAISED LEFT DOT 1ADD ; COMBINING DOT-AND-RING BELOW 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA