From 35adbecfd3fdb605f2b845ffb68d57e5fbe8c341 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Mar 2024 23:10:02 +0100 Subject: [PATCH 1/8] UnicodeData.txt lines from proposal --- unicodetools/data/ucd/dev/UnicodeData.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 08593a319..ebdee19dc 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,15 @@ +1AE0;COMBINING LEFT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE1;COMBINING RIGHT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE2;COMBINING MINUS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE3;COMBINING INVERTED BRIDGE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE4;COMBINING SQUARE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE5;COMBINING SEAGULL ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE6;COMBINING DOUBLE ARCH BELOW;Mn;220;NSM;;;;;N;;;;; +1AE7;COMBINING DOUBLE ARCH ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE8;COMBINING EQUALS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE9;COMBINING LEFT ANGLE CENTERED ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEA;COMBINING UPWARDS ARROW ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEB;COMBINING DOUBLE RIGHTWARDS ARROW ABOVE;Mn;234;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 938018366f15cb1a288546558ed183ee0aee0025 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Mar 2024 23:12:06 +0100 Subject: [PATCH 2/8] LineBreak.txt as described in the proposal --- unicodetools/data/ucd/dev/LineBreak.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 1d8bd89ec..a3336846d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,3 +1,5 @@ +1AEB;GL +1AE0..1AEA;CM # LineBreak-16.0.0.txt # Date: 2024-02-02, 23:11:30 GMT # © 2024 Unicode®, Inc. From 5852bf87a578f296b6faf5544971c200c7691f1f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Mar 2024 23:13:08 +0100 Subject: [PATCH 3/8] Inherited --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 2e0b4fd4b..fdb258e4e 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +1AE0..1AEB;Inherited # Scripts-16.0.0.txt # Date: 2024-02-02, 23:11:49 GMT # © 2024 Unicode®, Inc. From ff4555f2d1466f234c7b319c16640ad07c085389 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Mar 2024 23:17:43 +0100 Subject: [PATCH 4/8] diacritics are diacritics. --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 1e3e2912b..a9aeb17ad 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1AE0..1AEB;Diacritic # PropList-16.0.0.txt # Date: 2024-03-12, 13:28:44 GMT # © 2024 Unicode®, Inc. From 5eb6703b3e7f7f92633001ea9cc38844b3226ba5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Mar 2024 23:19:37 +0100 Subject: [PATCH 5/8] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 17 +++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++--- .../data/ucd/dev/NormalizationTest.txt | 26 ++++++++++++++++++- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 24 ++++++++--------- .../data/ucd/dev/VerticalOrientation.txt | 3 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 9 ++++--- .../dev/extracted/DerivedCombiningClass.txt | 16 +++++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 5 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 12 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 16 ++++++++++-- 19 files changed, 121 insertions(+), 63 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d116dccaa..90457a9df 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-02-02, 23:11:18 GMT +# Date: 2024-03-15, 22:18:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2012,6 +2012,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 0897 ; 16.0 # ARABIC PEPET 0C5C ; 16.0 # TELUGU ARCHAIC SHRII 0CDC ; 16.0 # KANNADA ARCHAIC SHRII +1AE0..1AEB ; 16.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B4E..1B4F ; 16.0 # [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B7F ; 16.0 # BALINESE PANTI BAWAK 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE @@ -2059,6 +2060,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5187 +# Total code points: 5199 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1e054c4f2..eeb1df089 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-02-02, 23:11:24 GMT +# Date: 2024-03-15, 22:19:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3195,6 +3195,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; Case_Ignorable # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3505,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2761 # ================================================ @@ -7458,6 +7459,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; ID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8370,7 +8372,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144543 +# Total code points: 144555 # ================================================ @@ -9640,6 +9642,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; XID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10557,7 +10560,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144524 +# Total code points: 144536 # ================================================ @@ -10776,6 +10779,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ..;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -6151,6 +6139,18 @@ 1ACC;COMBINING LATIN SMALL LETTER INSULAR G;Mn;230;NSM;;;;;N;;;;; 1ACD;COMBINING LATIN SMALL LETTER INSULAR R;Mn;230;NSM;;;;;N;;;;; 1ACE;COMBINING LATIN SMALL LETTER INSULAR T;Mn;230;NSM;;;;;N;;;;; +1AE0;COMBINING LEFT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE1;COMBINING RIGHT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE2;COMBINING MINUS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE3;COMBINING INVERTED BRIDGE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE4;COMBINING SQUARE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE5;COMBINING SEAGULL ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE6;COMBINING DOUBLE ARCH BELOW;Mn;220;NSM;;;;;N;;;;; +1AE7;COMBINING DOUBLE ARCH ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE8;COMBINING EQUALS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE9;COMBINING LEFT ANGLE CENTERED ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEA;COMBINING UPWARDS ARROW ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEB;COMBINING DOUBLE RIGHTWARDS ARROW ABOVE;Mn;234;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 187a0888d..ce2aa8d33 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-02-02, 23:11:51 GMT +# Date: 2024-03-15, 22:19:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -741,6 +741,7 @@ 1AB0..1ABD ; R # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; R # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; R # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; R # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; R # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; R # Mc BALINESE SIGN BISAH 1B05..1B33 ; R # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 1241b2d2b..84a86daba 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2024-02-02, 23:11:29 GMT +# Date: 2024-03-15, 22:19:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -241,6 +241,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -490,7 +491,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2190 +# Total code points: 2202 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index cec52074f..567de0f48 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-02-02, 23:11:49 GMT +# Date: 2024-03-15, 22:19:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -248,6 +248,7 @@ 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2613 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index fd760073c..82cfa26f2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-02-22, 15:58:24 GMT +# Date: 2024-03-15, 22:19:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -284,6 +284,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2617 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index d991ed1b8..8409e077b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-02-02, 23:11:23 GMT +# Date: 2024-03-15, 22:18:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1219,8 +1219,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815349 code points not listed here. -# Total code points: 1095518 +# The above property value applies to 815337 code points not listed here. +# Total code points: 1095506 # ================================================ @@ -2190,6 +2190,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; NSM # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2408,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2028 +# Total code points: 2040 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 639fce0ae..003d72031 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-02-02, 23:11:23 GMT +# Date: 2024-03-15, 22:19:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2060,8 +2060,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821579 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821567 code points not listed here. +# Total code points: 1113166 # ================================================ @@ -2595,6 +2595,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1ABF..1AC0 ; 220 # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW +1AE6 ; 220 # Mn COMBINING DOUBLE ARCH BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2627,7 +2628,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 183 # ================================================ @@ -2737,6 +2738,8 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AE5 ; 230 # Mn [6] COMBINING LEFT TACK ABOVE..COMBINING SEAGULL ABOVE +1AE7..1AEA ; 230 # Mn [4] COMBINING DOUBLE ARCH ABOVE..COMBINING UPWARDS ARROW ABOVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2803,7 +2806,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 527 # ================================================ @@ -2835,9 +2838,10 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 035D..035E ; 234 # Mn [2] COMBINING DOUBLE BREVE..COMBINING DOUBLE MACRON 0360..0361 ; 234 # Mn [2] COMBINING DOUBLE TILDE..COMBINING DOUBLE INVERTED BREVE +1AEB ; 234 # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; 234 # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 44585c48d..d1e29ca6e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-02-02, 23:11:25 GMT +# Date: 2024-03-15, 22:19:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -692,6 +692,7 @@ 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2103,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761097 code points not listed here. +# The above property value applies to 761085 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index b9105da02..90400fed8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-02-02, 23:11:25 GMT +# Date: 2024-03-15, 22:19:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -228,7 +228,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1ACF..1ADF ; Cn # [17] .. +1AEC..1AFF ; Cn # [20] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -747,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819531 +# Total code points: 819519 # ================================================ @@ -2857,6 +2858,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; Mn # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3074,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2020 +# Total code points: 2032 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 6407d5c0d..f3c809038 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2024-02-02, 23:11:26 GMT +# Date: 2024-03-15, 22:19:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -350,6 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEB ; T # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -581,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2197 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 3a3ecf9c1..b5e54561b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-02-02, 23:11:26 GMT +# Date: 2024-03-15, 22:19:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757651 code points not listed here. -# Total code points: 895119 +# The above property value applies to 757639 code points not listed here. +# Total code points: 895107 # ================================================ @@ -316,6 +316,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 0F12 ; GL # Po TIBETAN MARK RGYA GRAM SHAD 0FD9..0FDA ; GL # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 180E ; GL # Cf MONGOLIAN VOWEL SEPARATOR +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; GL # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE 1DFC ; GL # Mn COMBINING DOUBLE INVERTED BREVE BELOW 2007 ; GL # Zs FIGURE SPACE @@ -326,7 +327,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 13439..1343B ; GL # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER -# Total code points: 32 +# Total code points: 33 # ================================================ @@ -2056,6 +2057,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2382,7 +2384,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2479 +# Total code points: 2490 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 4778d471e..490c91cb0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-02-02, 23:11:26 GMT +# Date: 2024-03-15, 22:19:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -6114,6 +6114,18 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1AE0 ; COMBINING LEFT TACK ABOVE +1AE1 ; COMBINING RIGHT TACK ABOVE +1AE2 ; COMBINING MINUS SIGN ABOVE +1AE3 ; COMBINING INVERTED BRIDGE ABOVE +1AE4 ; COMBINING SQUARE ABOVE +1AE5 ; COMBINING SEAGULL ABOVE +1AE6 ; COMBINING DOUBLE ARCH BELOW +1AE7 ; COMBINING DOUBLE ARCH ABOVE +1AE8 ; COMBINING EQUALS SIGN ABOVE +1AE9 ; COMBINING LEFT ANGLE CENTERED ABOVE +1AEA ; COMBINING UPWARDS ARROW ABOVE +1AEB ; COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45369,6 +45381,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155000 +# Total code points: 155012 # EOF From 51a9db3f609fc1ccd40083d3964fdd1781885a66 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 28 Aug 2024 16:15:42 +0200 Subject: [PATCH 6/8] en-GB-oxendict --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 458cf7ccf..446e94b7a 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -6146,7 +6146,7 @@ 1AE6;COMBINING DOUBLE ARCH BELOW;Mn;220;NSM;;;;;N;;;;; 1AE7;COMBINING DOUBLE ARCH ABOVE;Mn;230;NSM;;;;;N;;;;; 1AE8;COMBINING EQUALS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; -1AE9;COMBINING LEFT ANGLE CENTERED ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE9;COMBINING LEFT ANGLE CENTRED ABOVE;Mn;230;NSM;;;;;N;;;;; 1AEA;COMBINING UPWARDS ARROW ABOVE;Mn;230;NSM;;;;;N;;;;; 1AEB;COMBINING DOUBLE RIGHTWARDS ARROW ABOVE;Mn;234;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; From 9ac6d046daa0f876c64a0a78e001b6bbf3be1dcb Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 28 Aug 2024 16:20:13 +0200 Subject: [PATCH 7/8] Regenerate UCD --- unicodetools/data/ucd/dev/NormalizationTest.txt | 6 +++--- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index d8805c906..1c4f618a3 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-06-06, 10:12:52 GMT +# Date: 2024-08-28, 14:19:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18116,8 +18116,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1AE7 0315 0300 05AE 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062; # (a◌᫧◌̕◌̀◌֮b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOUBLE ARCH ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1AE8 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062; # (a◌̕◌̀◌֮◌᫨b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING EQUALS SIGN ABOVE, LATIN SMALL LETTER B 0061 1AE8 0315 0300 05AE 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062; # (a◌᫨◌̕◌̀◌֮b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING EQUALS SIGN ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B -0061 0315 0300 05AE 1AE9 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062; # (a◌̕◌̀◌֮◌᫩b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ANGLE CENTERED ABOVE, LATIN SMALL LETTER B -0061 1AE9 0315 0300 05AE 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062; # (a◌᫩◌̕◌̀◌֮b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT ANGLE CENTERED ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE9 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062; # (a◌̕◌̀◌֮◌᫩b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ANGLE CENTRED ABOVE, LATIN SMALL LETTER B +0061 1AE9 0315 0300 05AE 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062; # (a◌᫩◌̕◌̀◌֮b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT ANGLE CENTRED ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1AEA 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062; # (a◌̕◌̀◌֮◌᫪b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING UPWARDS ARROW ABOVE, LATIN SMALL LETTER B 0061 1AEA 0315 0300 05AE 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062; # (a◌᫪◌̕◌̀◌֮b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING UPWARDS ARROW ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0345 035D 035C 1AEB 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062; # (a◌ͅ◌͝◌͜◌᫫b; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; ) LATIN SMALL LETTER A, COMBINING GREEK YPOGEGRAMMENI, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, COMBINING DOUBLE RIGHTWARDS ARROW ABOVE, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index e467ec6c7..1c870e92d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-06-06, 10:12:45 GMT +# Date: 2024-08-28, 14:19:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6121,7 +6121,7 @@ 1AE6 ; COMBINING DOUBLE ARCH BELOW 1AE7 ; COMBINING DOUBLE ARCH ABOVE 1AE8 ; COMBINING EQUALS SIGN ABOVE -1AE9 ; COMBINING LEFT ANGLE CENTERED ABOVE +1AE9 ; COMBINING LEFT ANGLE CENTRED ABOVE 1AEA ; COMBINING UPWARDS ARROW ABOVE 1AEB ; COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00 ; BALINESE SIGN ULU RICEM From 867b8afb08c7b92415b63b3f0aeabf5d9b9db580 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 16 Oct 2024 22:36:38 +0200 Subject: [PATCH 8/8] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 35ef236f7..ba062a72d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-08-28, 14:32:04 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-16, 20:35:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2010,7 +2010,6 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT # Newly assigned in Unicode 16.0.0 (September, 2024) 0897 ; 16.0 # ARABIC PEPET -1AE0..1AEB ; 16.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B4E..1B4F ; 16.0 # [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B7F ; 16.0 # BALINESE PANTI BAWAK 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE @@ -2058,6 +2057,16 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5197 +# Total code points: 5185 + +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE + +# Total code points: 12 # EOF