From a3e374000ae31f2139a96a22584b6f6dad9b9238 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 17:02:26 +0100 Subject: [PATCH 01/38] ARABIC LETTER NOON WITH RING ABOVE (#703) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt from the proposal * lb=AL like nearby noons * Script=Arabic * ArabicShaping.txt * Regenerate UCD * Comparison with other نs * New syntax * Ignore Unicode_1_Name * Ignore Block too * Expect what the comment says we expect. --- unicodetools/data/ucd/dev/ArabicShaping.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 14 +++++++-- .../data/ucd/dev/DerivedCoreProperties.txt | 28 ++++++++--------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 8 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 8 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 8 ++--- .../dev/extracted/DerivedCombiningClass.txt | 8 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 8 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 11 ++++--- .../ucd/dev/extracted/DerivedJoiningGroup.txt | 7 +++-- .../ucd/dev/extracted/DerivedJoiningType.txt | 7 +++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 12 ++++---- .../data/ucd/dev/extracted/DerivedName.txt | 7 +++-- .../text/UCD/AdditionComparisons/118.txt | 30 +++++++++++++++++++ 19 files changed, 111 insertions(+), 67 deletions(-) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 3c9e0ca80..20d103cd5 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -482,6 +482,7 @@ 088C; TAH WITH 3 DOTS BELOW; D; TAH 088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF 088E; VERTICAL TAIL; R; VERTICAL TAIL +088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON 0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group 0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..a94174a98 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-15, 12:06:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2059,4 +2059,14 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Total code points: 5185 +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE + +# Total code points: 1 + # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1..952f65edf 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-15, 12:06:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -344,7 +344,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Alphabetic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897 ; Alphabetic # Mn ARABIC PEPET 08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142760 # ================================================ @@ -6259,7 +6259,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141270 # ================================================ @@ -7068,7 +7068,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144542 # ================================================ @@ -8441,7 +8441,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141247 # ================================================ @@ -9250,7 +9250,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144523 # ================================================ @@ -11190,7 +11190,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0860..086A ; Grapheme_Base # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Grapheme_Base # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; Grapheme_Base # Sk ARABIC RAISED ROUND DOT -0889..088E ; Grapheme_Base # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Grapheme_Base # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; Grapheme_Base # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Grapheme_Base # Lm ARABIC SMALL FARSI YEH 0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA @@ -12812,7 +12812,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152731 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..a86664d64 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-05-31, 18:59:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -332,7 +332,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce258217..174be204c 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-15, 12:06:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -278,7 +278,7 @@ 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..fd898bef9 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-05-31, 19:00:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -869,7 +869,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE 0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; Arabic # Sk ARABIC RAISED ROUND DOT -0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Arabic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -926,7 +926,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1373 +# Total code points: 1374 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..f323b4ad6 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -2121,6 +2121,7 @@ 088C;ARABIC LETTER TAH WITH THREE DOTS BELOW;Lo;0;AL;;;;;N;;;;; 088D;ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;; +088F;ARABIC LETTER NOON WITH RING ABOVE;Lo;0;AL;;;;;N;;;;; 0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..82f24ccc7 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-05-31, 19:00:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -266,7 +266,7 @@ 0860..086A ; R # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; R # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; R # Sk ARABIC RAISED ROUND DOT -0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; R # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6b..3de833156 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-15, 12:07:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2022,7 +2022,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; OLetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; OLetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; OLetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; OLetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; OLetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -2585,7 +2585,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136909 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0d..c99a5cc9b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-10-15, 12:07:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -745,7 +745,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ALetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ALetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ALetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ALetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ALetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -1355,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33792 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..fe077592f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-15, 12:06:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2442,7 +2442,7 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM @@ -2501,7 +2501,7 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 293 code points not listed here. +# The above property value applies to 292 code points not listed here. # Total code points: 1767 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..a8f2afcb0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-15, 12:06:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -182,7 +182,7 @@ 0860..086A ; 0 # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; 0 # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; 0 # Sk ARABIC RAISED ROUND DOT -0889..088E ; 0 # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; 0 # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; 0 # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08A0..08C8 ; 0 # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; 0 # Lm ARABIC SMALL FARSI YEH @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821580 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..357665e7f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-15, 12:06:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -223,7 +223,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -2103,7 +2103,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761098 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..aa74b7e2e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-15, 12:06:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36,7 +36,6 @@ 085C..085D ; Cn # [2] .. 085F ; Cn # 086B..086F ; Cn # [5] .. -088F ; Cn # 0892..0896 ; Cn # [5] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. @@ -747,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819532 # ================================================ @@ -2203,7 +2202,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Lo # [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Lo # [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Lo # [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; Lo # [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; Lo # DEVANAGARI SIGN AVAGRAHA @@ -2708,7 +2707,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136478 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index 17778a8a0..d2c5547ed 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ -# DerivedJoiningGroup-16.0.0.txt -# Date: 2024-07-30, 21:15:55 GMT +# DerivedJoiningGroup-17.0.0.txt +# Date: 2024-10-15, 12:06:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -231,8 +231,9 @@ 06B9..06BC ; Noon # Lo [4] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH RING 0767..0769 ; Noon # Lo [3] ARABIC LETTER NOON WITH TWO DOTS BELOW..ARABIC LETTER NOON WITH SMALL V 0889 ; Noon # Lo ARABIC LETTER NOON WITH INVERTED SMALL V +088F ; Noon # Lo ARABIC LETTER NOON WITH RING ABOVE -# Total code points: 9 +# Total code points: 10 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 3841a92cc..d08bfdd7e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ -# DerivedJoiningType-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedJoiningType-17.0.0.txt +# Date: 2024-10-15, 12:06:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,6 +70,7 @@ 0868 ; D # Lo SYRIAC LETTER MALAYALAM LLA 0886 ; D # Lo ARABIC LETTER THIN YEH 0889..088D ; D # Lo [5] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW +088F ; D # Lo ARABIC LETTER NOON WITH RING ABOVE 08A0..08A9 ; D # Lo [10] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08AF..08B0 ; D # Lo [2] ARABIC LETTER SAD WITH THREE DOTS BELOW..ARABIC LETTER GAF WITH INVERTED STROKE 08B3..08B8 ; D # Lo [6] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER TEH WITH SMALL TEH ABOVE @@ -111,7 +112,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 612 +# Total code points: 613 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e92..d368bd0ba 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-15, 12:06:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757652 code points not listed here. +# Total code points: 895120 # ================================================ @@ -665,7 +665,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH 0904..0939 ; AL # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -1613,7 +1613,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26680 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..17697332e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-15, 12:06:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2096,6 +2096,7 @@ 088C ; ARABIC LETTER TAH WITH THREE DOTS BELOW 088D ; ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW 088E ; ARABIC VERTICAL TAIL +088F ; ARABIC LETTER NOON WITH RING ABOVE 0890 ; ARABIC POUND MARK ABOVE 0891 ; ARABIC PIASTRE MARK ABOVE 0897 ; ARABIC PEPET @@ -45367,6 +45368,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 154999 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt new file mode 100644 index 000000000..58662bbb8 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt @@ -0,0 +1,30 @@ +# ARABIC LETTER NOON WITH RING ABOVE (088F) +# https://github.com/unicode-org/utc-release-management/issues/118 + +Let $OldNoons := [ ن ڹ ں ڻ ڼ ڽ ݧ ݨ ݩ ࢉ ] + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block Unicode_1_Name: + +# « Another ن, propertywise like the others. ». + +# Differs from ڽ (with three dots above) in Joining_Group, +# ڽ being jg=Nya vs. jg=Noon for the others; see +# https://www.unicode.org/versions/latest/ch09.pdf#G39824. +Ignoring Joining_Group: +Propertywise [ $OldNoons \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike +end Ignoring; + +Propertywise [ $OldNoons - [ڽ] \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike + +end Ignoring; + +end Ignoring; + +end Ignoring; From 7da5a74ba89954c233aa47a24e975d4914de556c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 17:17:50 +0100 Subject: [PATCH 02/38] Bengali Sanskrit BA (#758) * UnicodeData.txt line from L2/22-268R * lb=AL * script * InSC from proposal * Regenerate UCD * UTC-178-C38 Approve the name change for provisionally assigned character U+09FF from BENGALI LETTER ALTERNATE BA to BENGALI LETTER SANSKRIT BA. * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 23 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++-- .../data/ucd/dev/IndicSyllabicCategory.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 7 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 ++-- .../dev/extracted/DerivedCombiningClass.txt | 5 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 ++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 ++++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++-- 16 files changed, 61 insertions(+), 39 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index a94174a98..2bfaba38d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-10-15, 12:06:14 GMT +# Date: 2024-11-13, 16:03:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2066,7 +2066,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) 088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE +09FF ; 17.0 # BENGALI LETTER SANSKRIT BA -# Total code points: 1 +# Total code points: 2 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 952f65edf..25aac7794 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-15, 12:06:47 GMT +# Date: 2024-11-13, 16:03:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -386,6 +386,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 09E2..09E3 ; Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL 09F0..09F1 ; Alphabetic # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Alphabetic # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; Alphabetic # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; Alphabetic # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; Alphabetic # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142760 +# Total code points: 142761 # ================================================ @@ -6280,6 +6281,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 09DF..09E1 ; ID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -6962,7 +6964,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141270 +# Total code points: 141271 # ================================================ @@ -7116,6 +7118,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 09F0..09F1 ; ID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; ID_Continue # Mn BENGALI SANDHI MARK +09FF ; ID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; ID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; ID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; ID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -8370,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144542 +# Total code points: 144543 # ================================================ @@ -8462,6 +8465,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; XID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -9148,7 +9152,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141247 +# Total code points: 141248 # ================================================ @@ -9298,6 +9302,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; XID_Continue # Mn BENGALI SANDHI MARK +09FF ; XID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -10557,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144523 +# Total code points: 144524 # ================================================ @@ -11229,6 +11234,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 09FB ; Grapheme_Base # Sc BENGALI GANDA MARK 09FC ; Grapheme_Base # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; Grapheme_Base # Po BENGALI ABBREVIATION SIGN +09FF ; Grapheme_Base # Lo BENGALI LETTER SANSKRIT BA 0A03 ; Grapheme_Base # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; Grapheme_Base # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; Grapheme_Base # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI @@ -12812,7 +12818,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152731 +# Total code points: 152732 # ================================================ @@ -12926,6 +12932,7 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 09DC..09DD ; InCB; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF ; InCB; Consonant # Lo BENGALI LETTER YYA 09F0..09F1 ; InCB; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FF ; InCB; Consonant # Lo BENGALI LETTER SANSKRIT BA 0A95..0AA8 ; InCB; Consonant # Lo [20] GUJARATI LETTER KA..GUJARATI LETTER NA 0AAA..0AB0 ; InCB; Consonant # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA 0AB2..0AB3 ; InCB; Consonant # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA @@ -12943,7 +12950,7 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 0C58..0C5A ; InCB; Consonant # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0D15..0D3A ; InCB; Consonant # Lo [38] MALAYALAM LETTER KA..MALAYALAM LETTER TTTA -# Total code points: 240 +# Total code points: 241 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a86664d64..a6456503f 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-05-31, 18:59:23 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-11-13, 16:04:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -391,6 +391,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index dc0760462..b5ac9d2a7 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:21 GMT +# Date: 2024-06-06, 09:53:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -814,6 +814,7 @@ AA74..AA76 ; Consonant_Placeholder # Lo [3] MYANMAR LOGOGRAM KHAMTI OAY..MY 09DC..09DD ; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF ; Consonant # Lo BENGALI LETTER YYA 09F0..09F1 ; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FF ; Consonant # Lo BENGALI LETTER SANSKRIT BA 0A15..0A28 ; Consonant # Lo [20] GURMUKHI LETTER KA..GURMUKHI LETTER NA 0A2A..0A30 ; Consonant # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 0A32..0A33 ; Consonant # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 174be204c..1fb5157e3 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-15, 12:06:57 GMT +# Date: 2024-11-13, 16:04:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -338,6 +338,7 @@ 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN 09FE ; CM # Mn BENGALI SANDHI MARK +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; CM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; CM # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index fd898bef9..31fa7a356 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ -# Scripts-16.0.0.txt -# Date: 2024-05-31, 19:00:14 GMT +# Scripts-17.0.0.txt +# Date: 2024-11-13, 16:04:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1014,8 +1014,9 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; Bengali # Po BENGALI ABBREVIATION SIGN 09FE ; Bengali # Mn BENGALI SANDHI MARK +09FF ; Bengali # Lo BENGALI LETTER SANSKRIT BA -# Total code points: 96 +# Total code points: 97 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f323b4ad6..6644d363f 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -2453,6 +2453,7 @@ 09FC;BENGALI LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;; 09FD;BENGALI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 09FE;BENGALI SANDHI MARK;Mn;230;NSM;;;;;N;;;;; +09FF;BENGALI LETTER SANSKRIT BA;Lo;0;L;;;;;N;;;;; 0A01;GURMUKHI SIGN ADAK BINDI;Mn;0;NSM;;;;;N;;;;; 0A02;GURMUKHI SIGN BINDI;Mn;0;NSM;;;;;N;;;;; 0A03;GURMUKHI SIGN VISARGA;Mc;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 82f24ccc7..b7b77d6b4 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-05-31, 19:00:21 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-11-13, 16:04:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -325,6 +325,7 @@ 09FC ; R # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; R # Po BENGALI ABBREVIATION SIGN 09FE ; R # Mn BENGALI SANDHI MARK +09FF ; R # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; R # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; R # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; R # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 3de833156..74787a3b9 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-15, 12:07:32 GMT +# Date: 2024-11-13, 16:04:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2043,6 +2043,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; OLetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2585,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136909 +# Total code points: 136910 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index c99a5cc9b..7418faa9a 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-10-15, 12:07:36 GMT +# Date: 2024-11-13, 16:04:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -766,6 +766,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ALetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -1355,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33792 +# Total code points: 33793 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index fe077592f..286c768bc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-15, 12:06:44 GMT +# Date: 2024-11-13, 16:03:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -198,6 +198,7 @@ 09FA ; L # So BENGALI ISSHAR 09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; L # Po BENGALI ABBREVIATION SIGN +09FF ; L # Lo BENGALI LETTER SANSKRIT BA 0A03 ; L # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI @@ -1214,7 +1215,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815350 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a8f2afcb0..b0cf0ca6c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-15, 12:06:46 GMT +# Date: 2024-11-13, 16:03:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -232,6 +232,7 @@ 09FB ; 0 # Sc BENGALI GANDA MARK 09FC ; 0 # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; 0 # Po BENGALI ABBREVIATION SIGN +09FF ; 0 # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; 0 # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; 0 # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; 0 # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -2060,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821580 code points not listed here. +# The above property value applies to 821579 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 357665e7f..2eb9bc962 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-15, 12:06:49 GMT +# Date: 2024-11-13, 16:03:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -280,6 +280,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -2103,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761098 code points not listed here. +# The above property value applies to 761097 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index aa74b7e2e..037c06b31 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-15, 12:06:50 GMT +# Date: 2024-11-13, 16:03:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -50,7 +50,7 @@ 09D8..09DB ; Cn # [4] .. 09DE ; Cn # 09E4..09E5 ; Cn # [2] .. -09FF..0A00 ; Cn # [2] .. +0A00 ; Cn # 0A04 ; Cn # 0A0B..0A0E ; Cn # [4] .. 0A11..0A12 ; Cn # [2] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819532 +# Total code points: 819531 # ================================================ @@ -2221,6 +2221,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 09DF..09E1 ; Lo # [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; Lo # [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Lo # BENGALI LETTER VEDIC ANUSVARA +09FF ; Lo # BENGALI LETTER SANSKRIT BA 0A05..0A0A ; Lo # [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; Lo # [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; Lo # [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2707,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136478 +# Total code points: 136479 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index d368bd0ba..931ee1f27 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-15, 12:06:52 GMT +# Date: 2024-11-13, 16:03:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757652 code points not listed here. -# Total code points: 895120 +# The above property value applies to 757651 code points not listed here. +# Total code points: 895119 # ================================================ @@ -690,6 +690,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 09FA ; AL # So BENGALI ISSHAR 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; AL # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; AL # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -1613,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26680 +# Total code points: 26681 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 17697332e..3b46c52f1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-10-15, 12:06:52 GMT +# Date: 2024-11-13, 16:03:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2428,6 +2428,7 @@ 09FC ; BENGALI LETTER VEDIC ANUSVARA 09FD ; BENGALI ABBREVIATION SIGN 09FE ; BENGALI SANDHI MARK +09FF ; BENGALI LETTER SANSKRIT BA 0A01 ; GURMUKHI SIGN ADAK BINDI 0A02 ; GURMUKHI SIGN BINDI 0A03 ; GURMUKHI SIGN VISARGA @@ -45368,6 +45369,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154999 +# Total code points: 155000 # EOF From 97d58bf3aee06fc64f2aff9c8e7b4372d1611112 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 17:38:02 +0100 Subject: [PATCH 03/38] Oriya dots above (#781) * UnicodeData.txt lines from L2/24-106 * lb=CM * Oriya * IndicMeowCategory from L2/24-106 * Both diacritics and the double dot an extender * Regenerate UCD * more invariant exceptions * oops * Typo in ORIYA --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 22 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- .../data/ucd/dev/IndicPositionalCategory.txt | 4 ++-- .../data/ucd/dev/IndicSyllabicCategory.txt | 6 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- unicodetools/data/ucd/dev/PropList.txt | 12 +++++----- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 2 ++ .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 8 +++---- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++----- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++----- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++----- .../data/ucd/dev/extracted/DerivedName.txt | 6 +++-- .../unicode/text/UCD/UnicodeInvariantTest.txt | 3 ++- 21 files changed, 76 insertions(+), 70 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 2bfaba38d..ef061a1d3 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 16:03:37 GMT +# Date: 2024-11-13, 16:21:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2067,7 +2067,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE 09FF ; 17.0 # BENGALI LETTER SANSKRIT BA +0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE -# Total code points: 2 +# Total code points: 4 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 25aac7794..915d290db 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 16:03:55 GMT +# Date: 2024-11-13, 16:21:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3104,7 +3104,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0B3F ; Case_Ignorable # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Case_Ignorable # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Case_Ignorable # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Case_Ignorable # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Case_Ignorable # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Case_Ignorable # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Case_Ignorable # Mn TAMIL SIGN ANUSVARA 0BC0 ; Case_Ignorable # Mn TAMIL VOWEL SIGN II @@ -3506,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2751 # ================================================ @@ -7179,7 +7179,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0B47..0B48 ; ID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; ID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; ID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; ID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; ID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; ID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; ID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; ID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144543 +# Total code points: 144545 # ================================================ @@ -9363,7 +9363,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; XID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; XID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144524 +# Total code points: 144526 # ================================================ @@ -10685,7 +10685,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 0B3F ; Extend # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -495,7 +495,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2198 +# Total code points: 2200 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 74787a3b9..010e2eaea 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:04:21 GMT +# Date: 2024-11-13, 16:22:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -111,7 +111,7 @@ 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -586,7 +586,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2603 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 7418faa9a..2d3d10c79 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:04:23 GMT +# Date: 2024-11-13, 16:22:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -147,7 +147,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -623,7 +623,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2607 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 286c768bc..de30f8204 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 16:03:53 GMT +# Date: 2024-11-13, 16:21:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815350 code points not listed here. -# Total code points: 1095513 +# The above property value applies to 815348 code points not listed here. +# Total code points: 1095511 # ================================================ @@ -2108,7 +2108,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0B3F ; NSM # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; NSM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; NSM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; NSM # Mn TAMIL SIGN ANUSVARA 0BC0 ; NSM # Mn TAMIL VOWEL SIGN II @@ -2409,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2028 +# Total code points: 2030 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index b0cf0ca6c..a67aa3100 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 16:03:55 GMT +# Date: 2024-11-13, 16:21:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -291,7 +291,7 @@ 0B41..0B44 ; 0 # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; 0 # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; 0 # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B55..0B56 ; 0 # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; 0 # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; 0 # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; 0 # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; 0 # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -2061,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821579 code points not listed here. +# The above property value applies to 821577 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 2eb9bc962..d3efc8f0b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 16:03:57 GMT +# Date: 2024-11-13, 16:21:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -344,7 +344,7 @@ 0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; N # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; N # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; N # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761097 code points not listed here. +# The above property value applies to 761095 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 037c06b31..4e2501104 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 16:03:57 GMT +# Date: 2024-11-13, 16:21:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -90,7 +90,7 @@ 0B3A..0B3B ; Cn # [2] .. 0B45..0B46 ; Cn # [2] .. 0B49..0B4A ; Cn # [2] .. -0B4E..0B54 ; Cn # [7] .. +0B4E..0B52 ; Cn # [5] .. 0B58..0B5B ; Cn # [4] .. 0B5E ; Cn # 0B64..0B65 ; Cn # [2] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819531 +# Total code points: 819529 # ================================================ @@ -2773,7 +2773,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0B3F ; Mn # ORIYA VOWEL SIGN I 0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Mn # ORIYA SIGN VIRAMA -0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Mn # [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Mn # TAMIL SIGN ANUSVARA 0BC0 ; Mn # TAMIL VOWEL SIGN II @@ -3072,7 +3072,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2020 +# Total code points: 2022 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index d08bfdd7e..443125089 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-10-15, 12:06:51 GMT +# Date: 2024-11-13, 16:21:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -266,7 +266,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 0B3F ; T # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; T # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; T # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; T # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; T # Mn TAMIL SIGN ANUSVARA 0BC0 ; T # Mn TAMIL VOWEL SIGN II @@ -582,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2187 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 931ee1f27..3c482e58d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 16:03:58 GMT +# Date: 2024-11-13, 16:21:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757651 code points not listed here. -# Total code points: 895119 +# The above property value applies to 757649 code points not listed here. +# Total code points: 895117 # ================================================ @@ -1967,7 +1967,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0B47..0B48 ; CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; CM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; CM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; CM # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; CM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; CM # Mn TAMIL SIGN ANUSVARA @@ -2388,7 +2388,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2470 +# Total code points: 2472 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 3b46c52f1..253d43288 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 16:03:58 GMT +# Date: 2024-11-13, 16:21:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2663,6 +2663,8 @@ 0B4B ; ORIYA VOWEL SIGN O 0B4C ; ORIYA VOWEL SIGN AU 0B4D ; ORIYA SIGN VIRAMA +0B53 ; ORIYA SIGN DOT ABOVE +0B54 ; ORIYA SIGN DOUBLE DOT ABOVE 0B55 ; ORIYA SIGN OVERLINE 0B56 ; ORIYA AI LENGTH MARK 0B57 ; ORIYA AU LENGTH MARK @@ -45369,6 +45371,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155000 +# Total code points: 155002 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 570de2e90..4e4685029 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -700,6 +700,7 @@ Let $nonAlphabeticBindus := [] Let $nonAlphabeticDependentVowels := [ \N{ORIYA SIGN OVERLINE} + \p{Name=/^ORIYA SIGN (DOUBLE )?DOT ABOVE$/} # L2/24-106R, related to the overline. \N{THAI CHARACTER MAITAIKHU} \N{LIMBU SIGN KEMPHRENG} \N{SHARADA VOWEL MODIFIER MARK} @@ -1379,4 +1380,4 @@ Ignoring Unicode_1_Name Confusable_MA: end Ignoring; -end Ignoring; \ No newline at end of file +end Ignoring; From 5e40d67f7a45c1496ed4c345d0cbf90649734cc8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 18:21:21 +0100 Subject: [PATCH 04/38] Kannada and Telugu: Archaic Ligature SHRII (#862) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "No archaic shriis in 16 (#786)" This reverts commit aeefbb8fda1e74a0e959e0cf5d69ff8316121d25. * Don’t try to bring back the IDNA mapping table --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 38 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 +-- unicodetools/data/ucd/dev/LineBreak.txt | 6 +-- unicodetools/data/ucd/dev/Scripts.txt | 10 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 2 + .../data/ucd/dev/VerticalOrientation.txt | 6 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 8 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 8 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 8 ++-- .../dev/extracted/DerivedCombiningClass.txt | 8 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 8 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 14 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 12 +++--- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++- 15 files changed, 76 insertions(+), 70 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index ef061a1d3..361ae7087 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 16:21:36 GMT +# Date: 2024-11-13, 16:38:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2068,7 +2068,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE 09FF ; 17.0 # BENGALI LETTER SANSKRIT BA 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE +0C5C ; 17.0 # TELUGU ARCHAIC SHRII +0CDC ; 17.0 # KANNADA ARCHAIC SHRII -# Total code points: 4 +# Total code points: 6 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 915d290db..28640c632 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 16:21:56 GMT +# Date: 2024-11-13, 16:38:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -478,7 +478,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Alphabetic # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU @@ -498,7 +498,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Alphabetic # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -1442,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142761 +# Total code points: 142763 # ================================================ @@ -6329,7 +6329,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -6338,7 +6338,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -6964,7 +6964,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141271 +# Total code points: 141273 # ================================================ @@ -7221,7 +7221,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -7243,7 +7243,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144545 +# Total code points: 144547 # ================================================ @@ -8513,7 +8513,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -8522,7 +8522,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -9152,7 +9152,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141248 +# Total code points: 141250 # ================================================ @@ -9405,7 +9405,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -9427,7 +9427,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144526 +# Total code points: 144528 # ================================================ @@ -11311,7 +11311,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0C3D ; Grapheme_Base # Lo TELUGU SIGN AVAGRAHA 0C41..0C44 ; Grapheme_Base # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C58..0C5A ; Grapheme_Base # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Grapheme_Base # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Grapheme_Base # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Grapheme_Base # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C66..0C6F ; Grapheme_Base # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C77 ; Grapheme_Base # Po TELUGU SIGN SIDDHAM @@ -11329,7 +11329,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0CBE ; Grapheme_Base # Mc KANNADA VOWEL SIGN AA 0CC1 ; Grapheme_Base # Mc KANNADA VOWEL SIGN U 0CC3..0CC4 ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR -0CDD..0CDE ; Grapheme_Base # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Grapheme_Base # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Grapheme_Base # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; Grapheme_Base # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; Grapheme_Base # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -12818,7 +12818,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152732 +# Total code points: 152734 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index e5f391ab3..9cf4406bf 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 16:22:01 GMT +# Date: 2024-11-13, 16:38:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -503,7 +503,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -529,7 +529,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index bd7bb5294..0adaea260 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 16:22:02 GMT +# Date: 2024-11-13, 16:38:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -450,7 +450,7 @@ 0C4A..0C4D ; CM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; CM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; CM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -476,7 +476,7 @@ 0CCA..0CCB ; CM # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; CM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; CM # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 7aaea2a1a..05817b648 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 16:22:25 GMT +# Date: 2024-11-13, 16:39:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1156,7 +1156,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Telugu # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -1164,7 +1164,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; Telugu # So TELUGU SIGN TUUMU -# Total code points: 100 +# Total code points: 101 # ================================================ @@ -1187,14 +1187,14 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Kannada # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0CF3 ; Kannada # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -# Total code points: 91 +# Total code points: 92 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 053dba6c8..1a933a3b1 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -2866,6 +2866,7 @@ 0C58;TELUGU LETTER TSA;Lo;0;L;;;;;N;;;;; 0C59;TELUGU LETTER DZA;Lo;0;L;;;;;N;;;;; 0C5A;TELUGU LETTER RRRA;Lo;0;L;;;;;N;;;;; +0C5C;TELUGU ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0C5D;TELUGU LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0C60;TELUGU LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; 0C61;TELUGU LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; @@ -2962,6 +2963,7 @@ 0CCD;KANNADA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; 0CD5;KANNADA LENGTH MARK;Mc;0;L;;;;;N;;;;; 0CD6;KANNADA AI LENGTH MARK;Mc;0;L;;;;;N;;;;; +0CDC;KANNADA ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0CDD;KANNADA LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0CDE;KANNADA LETTER FA;Lo;0;L;;;;;N;;;;; 0CE0;KANNADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index e2b88a7cc..2a8fcb64d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 16:22:28 GMT +# Date: 2024-11-13, 16:39:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -437,7 +437,7 @@ 0C4A..0C4D ; R # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; R # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; R # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; R # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; R # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; R # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; R # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; R # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -463,7 +463,7 @@ 0CCA..0CCB ; R # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; R # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; R # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; R # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; R # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; R # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; R # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; R # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 010e2eaea..1185503c0 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:22:26 GMT +# Date: 2024-11-13, 16:39:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2091,7 +2091,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; OLetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2100,7 +2100,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; OLetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2586,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136910 +# Total code points: 136912 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 2d3d10c79..f99235a6b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:22:28 GMT +# Date: 2024-11-13, 16:39:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -814,7 +814,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ALetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -823,7 +823,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ALetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1356,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33793 +# Total code points: 33795 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index de30f8204..aecb29e0e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 16:21:54 GMT +# Date: 2024-11-13, 16:38:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -274,7 +274,7 @@ 0C3D ; L # Lo TELUGU SIGN AVAGRAHA 0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C77 ; L # Po TELUGU SIGN SIDDHAM @@ -295,7 +295,7 @@ 0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -1215,7 +1215,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815348 code points not listed here. +# The above property value applies to 815346 code points not listed here. # Total code points: 1095511 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a67aa3100..3469c47c7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 16:21:55 GMT +# Date: 2024-11-13, 16:38:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -336,7 +336,7 @@ 0C46..0C48 ; 0 # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4C ; 0 # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C58..0C5A ; 0 # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; 0 # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; 0 # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; 0 # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; 0 # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; 0 # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -361,7 +361,7 @@ 0CCA..0CCB ; 0 # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; 0 # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; 0 # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; 0 # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; 0 # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; 0 # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; 0 # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; 0 # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -2061,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821577 code points not listed here. +# The above property value applies to 821575 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index d3efc8f0b..e24073197 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 16:21:57 GMT +# Date: 2024-11-13, 16:38:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -392,7 +392,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -418,7 +418,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761095 code points not listed here. +# The above property value applies to 761093 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 4e2501104..f14ea8635 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 16:21:57 GMT +# Date: 2024-11-13, 16:38:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -119,7 +119,7 @@ 0C49 ; Cn # 0C4E..0C54 ; Cn # [7] .. 0C57 ; Cn # -0C5B..0C5C ; Cn # [2] .. +0C5B ; Cn # 0C5E..0C5F ; Cn # [2] .. 0C64..0C65 ; Cn # [2] .. 0C70..0C76 ; Cn # [7] .. @@ -131,7 +131,7 @@ 0CC5 ; Cn # 0CC9 ; Cn # 0CCE..0CD4 ; Cn # [7] .. -0CD7..0CDC ; Cn # [6] .. +0CD7..0CDB ; Cn # [5] .. 0CDF ; Cn # 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819529 +# Total code points: 819527 # ================================================ @@ -2269,7 +2269,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; Lo # TELUGU SIGN AVAGRAHA 0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Lo # TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Lo # [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2278,7 +2278,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; Lo # KANNADA SIGN AVAGRAHA -0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Lo # [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2708,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136479 +# Total code points: 136481 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 3c482e58d..2967c392c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 16:21:59 GMT +# Date: 2024-11-13, 16:38:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757649 code points not listed here. -# Total code points: 895117 +# The above property value applies to 757647 code points not listed here. +# Total code points: 895115 # ================================================ @@ -745,7 +745,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0C2A..0C39 ; AL # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; AL # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C78..0C7E ; AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; AL # So TELUGU SIGN TUUMU @@ -756,7 +756,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0CAA..0CB3 ; AL # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; AL # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; AL # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1614,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26681 +# Total code points: 26683 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 253d43288..20d056f93 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 16:21:59 GMT +# Date: 2024-11-13, 16:38:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2841,6 +2841,7 @@ 0C58 ; TELUGU LETTER TSA 0C59 ; TELUGU LETTER DZA 0C5A ; TELUGU LETTER RRRA +0C5C ; TELUGU ARCHAIC SHRII 0C5D ; TELUGU LETTER NAKAARA POLLU 0C60 ; TELUGU LETTER VOCALIC RR 0C61 ; TELUGU LETTER VOCALIC LL @@ -2937,6 +2938,7 @@ 0CCD ; KANNADA SIGN VIRAMA 0CD5 ; KANNADA LENGTH MARK 0CD6 ; KANNADA AI LENGTH MARK +0CDC ; KANNADA ARCHAIC SHRII 0CDD ; KANNADA LETTER NAKAARA POLLU 0CDE ; KANNADA LETTER FA 0CE0 ; KANNADA LETTER VOCALIC RR @@ -45371,6 +45373,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155002 +# Total code points: 155004 # EOF From 617b9d193ffc5c6814b39b87b3cbd5ac70750620 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 18:49:44 +0100 Subject: [PATCH 05/38] Combining double caron (#779) * UnicodeData.txt line from L2/24-105 * lb=CM * Inherited * Diacritic * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 22 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- .../data/ucd/dev/NormalizationTest.txt | 4 +++- unicodetools/data/ucd/dev/PropList.txt | 5 +++-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++----- .../dev/extracted/DerivedCombiningClass.txt | 10 ++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++----- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++----- .../data/ucd/dev/extracted/DerivedName.txt | 5 +++-- 19 files changed, 68 insertions(+), 62 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 361ae7087..fdc63ca6f 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 16:38:26 GMT +# Date: 2024-11-13, 17:23:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2070,7 +2070,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE 0C5C ; 17.0 # TELUGU ARCHAIC SHRII 0CDC ; 17.0 # KANNADA ARCHAIC SHRII +1ACF ; 17.0 # COMBINING DOUBLE CARON -# Total code points: 6 +# Total code points: 7 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 28640c632..3a330db8e 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 16:38:43 GMT +# Date: 2024-11-13, 17:23:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3195,7 +3195,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; Case_Ignorable # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3506,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2751 +# Total code points: 2752 # ================================================ @@ -7460,7 +7460,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; ID_Continue # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144547 +# Total code points: 144548 # ================================================ @@ -9644,7 +9644,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; XID_Continue # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144528 +# Total code points: 144529 # ================================================ @@ -10783,7 +10783,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -495,7 +495,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2200 +# Total code points: 2201 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 1185503c0..6abb2f7d2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:39:08 GMT +# Date: 2024-11-13, 17:23:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -247,7 +247,7 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +586,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2603 +# Total code points: 2604 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index f99235a6b..96a1fe880 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 16:39:10 GMT +# Date: 2024-11-13, 17:23:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -283,7 +283,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +623,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2607 +# Total code points: 2608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index aecb29e0e..a0f66966d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 16:38:41 GMT +# Date: 2024-11-13, 17:23:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815346 code points not listed here. -# Total code points: 1095511 +# The above property value applies to 815345 code points not listed here. +# Total code points: 1095510 # ================================================ @@ -2190,7 +2190,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; NSM # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2409,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2030 +# Total code points: 2031 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3469c47c7..55d320c71 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 16:38:42 GMT +# Date: 2024-11-13, 17:23:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2061,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821575 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821574 code points not listed here. +# Total code points: 1113177 # ================================================ @@ -2737,7 +2737,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1ACB..1ACF ; 230 # Mn [5] COMBINING TRIPLE ACUTE ACCENT..COMBINING DOUBLE CARON 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2804,7 +2804,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 518 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index e24073197..84155dc38 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 16:38:44 GMT +# Date: 2024-11-13, 17:23:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -692,7 +692,7 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; N # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761093 code points not listed here. +# The above property value applies to 761092 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index f14ea8635..29de338a3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 16:38:44 GMT +# Date: 2024-11-13, 17:23:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -227,7 +227,7 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1AD0..1AFF ; Cn # [48] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819527 +# Total code points: 819526 # ================================================ @@ -2856,7 +2856,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; Mn # [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3072,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2022 +# Total code points: 2023 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 443125089..6986f8e7f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 16:21:58 GMT +# Date: 2024-11-13, 17:23:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -350,7 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; T # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -582,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2187 +# Total code points: 2188 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 2967c392c..15cacb25b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 16:38:45 GMT +# Date: 2024-11-13, 17:23:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757647 code points not listed here. -# Total code points: 895115 +# The above property value applies to 757646 code points not listed here. +# Total code points: 895114 # ================================================ @@ -2055,7 +2055,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ACF ; CM # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2388,7 +2388,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2472 +# Total code points: 2473 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 20d056f93..29f7e5ae5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 16:38:46 GMT +# Date: 2024-11-13, 17:23:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6118,6 +6118,7 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1ACF ; COMBINING DOUBLE CARON 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45373,6 +45374,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155004 +# Total code points: 155005 # EOF From 7a3737c56cd9d9698cf5986e3b9d5b5261122e13 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Wed, 13 Nov 2024 11:27:48 -0800 Subject: [PATCH 06/38] UTC-176-C35 Six compound tone diacritics (#637) * UTC-176-C35 Six compound tone diacritics * Missed one file * Diacritics are, in fact, diacritics. * Regenerate UCD --------- Co-authored-by: Robin Leroy --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 22 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- .../data/ucd/dev/NormalizationTest.txt | 16 ++++++++++++-- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +++++ .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++----- .../dev/extracted/DerivedCombiningClass.txt | 10 ++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++----- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++----- .../data/ucd/dev/extracted/DerivedName.txt | 10 +++++++-- 19 files changed, 89 insertions(+), 65 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index fdc63ca6f..9c57f526b 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 17:23:03 GMT +# Date: 2024-11-13, 17:50:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2070,8 +2070,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE 0C5C ; 17.0 # TELUGU ARCHAIC SHRII 0CDC ; 17.0 # KANNADA ARCHAIC SHRII -1ACF ; 17.0 # COMBINING DOUBLE CARON +1ACF..1AD5 ; 17.0 # [7] COMBINING DOUBLE CARON..COMBINING MACRON-VERTICAL-LINE -# Total code points: 7 +# Total code points: 13 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 3a330db8e..531cc7816 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 17:23:21 GMT +# Date: 2024-11-13, 17:50:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3195,7 +3195,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; Case_Ignorable # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; Case_Ignorable # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3506,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2752 +# Total code points: 2758 # ================================================ @@ -7460,7 +7460,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACF ; ID_Continue # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; ID_Continue # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144548 +# Total code points: 144554 # ================================================ @@ -9644,7 +9644,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACF ; XID_Continue # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; XID_Continue # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144529 +# Total code points: 144535 # ================================================ @@ -10783,7 +10783,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -495,7 +495,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2201 +# Total code points: 2207 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 6abb2f7d2..1a242c881 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 17:23:48 GMT +# Date: 2024-11-13, 17:50:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -247,7 +247,7 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +586,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2604 +# Total code points: 2610 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 96a1fe880..1120cd60a 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 17:23:49 GMT +# Date: 2024-11-13, 17:51:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -283,7 +283,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; Extend # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +623,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2608 +# Total code points: 2614 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a0f66966d..dc9976e2c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 17:23:19 GMT +# Date: 2024-11-13, 17:50:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815345 code points not listed here. -# Total code points: 1095510 +# The above property value applies to 815339 code points not listed here. +# Total code points: 1095504 # ================================================ @@ -2190,7 +2190,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; NSM # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; NSM # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2409,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2031 +# Total code points: 2037 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 55d320c71..f544c79e5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 17:23:20 GMT +# Date: 2024-11-13, 17:50:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2061,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821574 code points not listed here. -# Total code points: 1113177 +# The above property value applies to 821568 code points not listed here. +# Total code points: 1113171 # ================================================ @@ -2737,7 +2737,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1ACF ; 230 # Mn [5] COMBINING TRIPLE ACUTE ACCENT..COMBINING DOUBLE CARON +1ACB..1AD5 ; 230 # Mn [11] COMBINING TRIPLE ACUTE ACCENT..COMBINING MACRON-VERTICAL-LINE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2804,7 +2804,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 518 +# Total code points: 524 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 84155dc38..535c77663 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 17:23:22 GMT +# Date: 2024-11-13, 17:50:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -692,7 +692,7 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; N # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; N # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761092 code points not listed here. +# The above property value applies to 761086 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 29de338a3..6fbbecc6f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 17:23:23 GMT +# Date: 2024-11-13, 17:50:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -227,7 +227,7 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1AD0..1AFF ; Cn # [48] .. +1AD6..1AFF ; Cn # [42] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819526 +# Total code points: 819520 # ================================================ @@ -2856,7 +2856,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACF ; Mn # [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; Mn # [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3072,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2023 +# Total code points: 2029 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 6986f8e7f..1963202c1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 17:23:24 GMT +# Date: 2024-11-13, 17:50:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -350,7 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; T # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; T # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -582,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2188 +# Total code points: 2194 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 15cacb25b..c51ee0406 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 17:23:24 GMT +# Date: 2024-11-13, 17:50:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757646 code points not listed here. -# Total code points: 895114 +# The above property value applies to 757640 code points not listed here. +# Total code points: 895108 # ================================================ @@ -2055,7 +2055,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACF ; CM # Mn [17] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE CARON +1ABF..1AD5 ; CM # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2388,7 +2388,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2473 +# Total code points: 2479 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 29f7e5ae5..f7a36b3fb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 17:23:24 GMT +# Date: 2024-11-13, 17:50:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6119,6 +6119,12 @@ 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T 1ACF ; COMBINING DOUBLE CARON +1AD0 ; COMBINING VERTICAL-LINE-ACUTE +1AD1 ; COMBINING GRAVE-VERTICAL-LINE +1AD2 ; COMBINING VERTICAL-LINE-GRAVE +1AD3 ; COMBINING ACUTE-VERTICAL-LINE +1AD4 ; COMBINING VERTICAL-LINE-MACRON +1AD5 ; COMBINING MACRON-VERTICAL-LINE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45374,6 +45380,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155005 +# Total code points: 155011 # EOF From 6a6f6a5f76044a0ea8ab4b6bf2667cb44d98611a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 20:42:47 +0100 Subject: [PATCH 07/38] Remove expired canned rules (#964) * Remove expired canned rules * spots --- .../java/org/unicode/tools/Segmenter.java | 526 ------------------ 1 file changed, 526 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java index 994a79e7d..2b3644b85 100644 --- a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java +++ b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java @@ -20,8 +20,6 @@ import com.ibm.icu.text.UnicodeSet.XSymbolTable; import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.ULocale; -import java.io.IOException; -import java.io.PrintWriter; import java.text.ParsePosition; import java.util.ArrayList; import java.util.Collection; @@ -40,9 +38,6 @@ import org.unicode.cldr.util.RegexUtilities; import org.unicode.cldr.util.TransliteratorUtilities; import org.unicode.props.UnicodeProperty; -import org.unicode.text.UCD.Default; -import org.unicode.text.UCD.ToolUnicodePropertySource; -import org.unicode.text.utility.Settings; import org.unicode.tools.Segmenter.SegmentationRule.Breaks; /** Ordered list of rules, with variables resolved before building. Use Builder to make. */ @@ -1004,525 +999,4 @@ public Map getOriginalVariables() { public UnicodeMap getSamples() { return samples; } - - // TODO: delete? move elsewhere? - // Only used in main() to write to some files. Out of sync with SegmenterDefault.txt. - private static final String[][] cannedRules = { - { - "GraphemeClusterBreak", - "$CR=\\p{Grapheme_Cluster_Break=CR}", - "$LF=\\p{Grapheme_Cluster_Break=LF}", - "$Control=\\p{Grapheme_Cluster_Break=Control}", - "$Extend=\\p{Grapheme_Cluster_Break=Extend}", - "$ZWJ=\\p{Grapheme_Cluster_Break=ZWJ}", - "$RI=\\p{Grapheme_Cluster_Break=Regional_Indicator}", - "$Prepend=\\p{Grapheme_Cluster_Break=Prepend}", - "$SpacingMark=\\p{Grapheme_Cluster_Break=SpacingMark}", - "$L=\\p{Grapheme_Cluster_Break=L}", - "$V=\\p{Grapheme_Cluster_Break=V}", - "$T=\\p{Grapheme_Cluster_Break=T}", - "$LV=\\p{Grapheme_Cluster_Break=LV}", - "$LVT=\\p{Grapheme_Cluster_Break=LVT}", - "$Virama=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Virama}]", - "$LinkingConsonant=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Consonant}]", - - // "$E_Base=\\p{Grapheme_Cluster_Break=E_Base}", - // "$E_Modifier=\\p{Grapheme_Cluster_Break=E_Modifier}", - - "$ExtPict=\\p{Extended_Pictographic}", - "$ExtCccZwj=[[$Extend-\\p{ccc=0}] $ZWJ]", - // "$EBG=\\p{Grapheme_Cluster_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Grapheme_Cluster_Break=Glue_After_Zwj}", - - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break between a CR and LF. Otherwise, break before and after controls.", - "3) $CR \u00D7 $LF", - "4) ( $Control | $CR | $LF ) \u00F7", - "5) \u00F7 ( $Control | $CR | $LF )", - "# Do not break Hangul syllable sequences.", - "6) $L \u00D7 ( $L | $V | $LV | $LVT )", - "7) ( $LV | $V ) \u00D7 ( $V | $T )", - "8) ( $LVT | $T) \u00D7 $T", - "# Do not break before extending characters or ZWJ.", - // "9) \u00D7 ($Extend | $ZWJ | $Virama)", - "9) \u00D7 ($Extend | $ZWJ)", - "# Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters.", - "9.1) \u00D7 $SpacingMark", - "9.2) $Prepend \u00D7", - "9.3) $LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* \u00D7 $LinkingConsonant", - "# Do not break within emoji modifier sequences or emoji zwj sequences.", - // "10) $E_Base $Extend* × $E_Modifier", - "11) $ExtPict $Extend* $ZWJ × $ExtPict", - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "12) ^ ($RI $RI)* $RI × $RI", - "13) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere.", - }, - { - "LineBreak", - "# Variables", - "$AI=\\p{Line_Break=Ambiguous}", - "$AL=\\p{Line_Break=Alphabetic}", - "$B2=\\p{Line_Break=Break_Both}", - "$BA=\\p{Line_Break=Break_After}", - "$BB=\\p{Line_Break=Break_Before}", - "$BK=\\p{Line_Break=Mandatory_Break}", - "$CB=\\p{Line_Break=Contingent_Break}", - "$CL=\\p{Line_Break=Close_Punctuation}", - "$CP=\\p{Line_Break=CP}", - "$CM1=\\p{Line_Break=Combining_Mark}", - "$CR=\\p{Line_Break=Carriage_Return}", - "$EX=\\p{Line_Break=Exclamation}", - "$GL=\\p{Line_Break=Glue}", - "$H2=\\p{Line_Break=H2}", - "$H3=\\p{Line_Break=H3}", - "$HL=\\p{Line_Break=HL}", - "$HY=\\p{Line_Break=Hyphen}", - "$ID=\\p{Line_Break=Ideographic}", - "$IN=\\p{Line_Break=Inseparable}", - "$IS=\\p{Line_Break=Infix_Numeric}", - "$JL=\\p{Line_Break=JL}", - "$JT=\\p{Line_Break=JT}", - "$JV=\\p{Line_Break=JV}", - "$LF=\\p{Line_Break=Line_Feed}", - "$NL=\\p{Line_Break=Next_Line}", - "$NS=\\p{Line_Break=Nonstarter}", - "$NU=\\p{Line_Break=Numeric}", - "$OP=\\p{Line_Break=Open_Punctuation}", - "$PO=\\p{Line_Break=Postfix_Numeric}", - "$PR=\\p{Line_Break=Prefix_Numeric}", - "$QU=\\p{Line_Break=Quotation}", - "$SA=\\p{Line_Break=Complex_Context}", - "$SG=\\p{Line_Break=Surrogate}", - "$SP=\\p{Line_Break=Space}", - "$SY=\\p{Line_Break=Break_Symbols}", - "$WJ=\\p{Line_Break=Word_Joiner}", - "$XX=\\p{Line_Break=Unknown}", - "$ZW=\\p{Line_Break=ZWSpace}", - "$CJ=\\p{Line_Break=Conditional_Japanese_Starter}", - "$RI=\\p{Line_Break=Regional_Indicator}", - "$EB=\\p{Line_Break=E_Base}", - "$EM=\\p{Line_Break=E_Modifier}", - "$ZWJ_O=\\p{Line_Break=ZWJ}", - "$ZWJ=\\p{Line_Break=ZWJ}", - "# Macros", - "$CM=[$CM1 $ZWJ]", - "# LB 1 Assign a line breaking class to each code point of the input. ", - "# Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm.", - "# NOTE: CB is ok to fall through, but must handle others here.", - // "show $AL", - "$AL=[$AI $AL $SG $XX $SA]", - "$NS=[$NS $CJ]", - // "show $AL", - // "$oldAL=$AL", // for debugging - "# WARNING: Fixes for Rule 9", - "# Treat X (CM|ZWJ* as if it were X.", - "# Where X is any line break class except SP, BK, CR, LF, NL or ZW.", - "$X=$CM*", - "# Macros", - "$Spec1_=[$SP $BK $CR $LF $NL $ZW]", - "$Spec2_=[^ $SP $BK $CR $LF $NL $ZW]", - "$Spec3a_=[^ $SP $BA $HY $CM]", - "$Spec3b_=[^ $BA $HY $CM]", - "$Spec4_=[^ $NU $CM]", - "$AI=($AI $X)", - "$AL=($AL $X)", - "$B2=($B2 $X)", - "$BA=($BA $X)", - "$BB=($BB $X)", - "$CB=($CB $X)", - "$CL=($CL $X)", - "$CP=($CP $X)", - "$CM=($CM $X)", - // "$CM=($CM $X)", - "$EX=($EX $X)", - "$GL=($GL $X)", - "$H2=($H2 $X)", - "$H3=($H3 $X)", - "$HL=($HL $X)", - "$HY=($HY $X)", - "$ID=($ID $X)", - "$IN=($IN $X)", - "$IS=($IS $X)", - "$JL=($JL $X)", - "$JT=($JT $X)", - "$JV=($JV $X)", - "$NS=($NS $X)", - "$NU=($NU $X)", - "$OP=($OP $X)", - "$PO=($PO $X)", - "$PR=($PR $X)", - "$QU=($QU $X)", - "$SA=($SA $X)", - "$SG=($SG $X)", - "$SY=($SY $X)", - "$WJ=($WJ $X)", - "$XX=($XX $X)", - "$RI=($RI $X)", - "$EB=($EB $X)", - "$EM=($EM $X)", - "$ZWJ=($ZWJ $X)", - "# OUT OF ORDER ON PURPOSE", - "# LB 10 Treat any remaining combining mark as AL.", - "$AL=($AL | ^ $CM | (?<=$Spec1_) $CM)", - "# Rules", - "# LB 4 Always break after hard line breaks (but never between CR and LF).", - "4) $BK \u00F7", - "# LB 5 Treat CR followed by LF, as well as CR, LF and NL as hard line breaks.", - "5.01) $CR \u00D7 $LF", - "5.02) $CR \u00F7", - "5.03) $LF \u00F7", - "5.04) $NL \u00F7", - "# LB 6 Do not break before hard line breaks.", - "6) \u00D7 ( $BK | $CR | $LF | $NL )", - "# LB 7 Do not break before spaces or zero-width space.", - "7.01) \u00D7 $SP", - "7.02) \u00D7 $ZW", - "# LB 8 Break before any character following a zero-width space, even if one or more spaces intervene.", - "8) $ZW $SP* \u00F7", - "# LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences)", - "8.1) $ZWJ_O \u00D7", - "# LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character", - "# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.)", - "9) $Spec2_ \u00D7 $CM", - "#WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)!", - "# LB 11 Do not break before or after WORD JOINER and related characters.", - "11.01) \u00D7 $WJ", - "11.02) $WJ \u00D7", - "# LB 12 Do not break after NBSP and related characters.", - // "12.01) [^$SP] \u00D7 $GL", - "12) $GL \u00D7", - "12.1) $Spec3a_ \u00D7 $GL", - "12.2) $Spec3b_ $CM+ \u00D7 $GL", - "12.3) ^ $CM+ \u00D7 $GL", - "# LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces.", - "# Using customization 7.", - "13.01) \u00D7 $EX", - "13.02) $Spec4_ \u00D7 ($CL | $CP | $IS | $SY)", - "13.03) $Spec4_ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - "13.04) ^ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - // "13.03) $Spec4_ \u00D7 $IS", - // "13.04) $Spec4_ \u00D7 $SY", - "#LB 14 Do not break after \u2018[\u2019, even after spaces.", - "14) $OP $SP* \u00D7", - "# LB 15 Do not break within \u2018\"[\u2019, even with intervening spaces.", - "15) $QU $SP* \u00D7 $OP", - "# LB 16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.", - "16) ($CL | $CP) $SP* \u00D7 $NS", - "# LB 17 Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces.", - "17) $B2 $SP* \u00D7 $B2", - "# LB 18 Break after spaces.", - "18) $SP \u00F7", - "# LB 19 Do not break before or after \u2018\"\u2019.", - "19.01) \u00D7 $QU", - "19.02) $QU \u00D7", - "# LB 20 Break before and after unresolved CB.", - "20.01) \u00F7 $CB", - "20.02) $CB \u00F7", - "# LB 21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents.", - "21.01) \u00D7 $BA", - "21.02) \u00D7 $HY", - "21.03) \u00D7 $NS", - "21.04) $BB \u00D7", - "# LB 21a Don't break after Hebrew + Hyphen.", - "21.1) $HL ($HY | $BA) \u00D7", - "# LB 21b Don’t break between Solidus and Hebrew letters.", - "21.2) $SY × $HL", - "# LB 22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis.", - // "show $AL", - "22.01) ($AL | $HL) \u00D7 $IN", - "22.02) $EX \u00D7 $IN", - "22.03) ($ID | $EB | $EM) \u00D7 $IN", - "22.04) $IN \u00D7 $IN", - "22.05) $NU \u00D7 $IN", - "# LB 23 Do not break between digits and letters.", - // "23.01) ($ID | $EB | $EM) \u00D7 $PO", - "23.02) ($AL | $HL) \u00D7 $NU", - "23.03) $NU \u00D7 ($AL | $HL)", - "# LB 24 Do not break between prefix and letters or ideographs.", - "23.12) $PR \u00D7 ($ID | $EB | $EM)", - "23.13) ($ID | $EB | $EM) \u00D7 $PO", - "# LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.", - "24.02) ($PR | $PO) \u00D7 ($AL | $HL)", - "24.03) ($AL | $HL) \u00D7 ($PR | $PO)", - "# Using customization 7", - "# LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ?", - "# Insert \u00D7 every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after ", - "25.01) ($PR | $PO) \u00D7 ( $OP | $HY )? $NU", - "25.02) ( $OP | $HY ) \u00D7 $NU", - "25.03) $NU \u00D7 ($NU | $SY | $IS)", - "25.04) $NU ($NU | $SY | $IS)* \u00D7 ($NU | $SY | $IS | $CL | $CP)", - "25.05) $NU ($NU | $SY | $IS)* ($CL | $CP)? \u00D7 ($PO | $PR)", - "#LB 26 Do not break a Korean syllable.", - "26.01) $JL \u00D7 $JL | $JV | $H2 | $H3", - "26.02) $JV | $H2 \u00D7 $JV | $JT", - "26.03) $JT | $H3 \u00D7 $JT", - "# LB 27 Treat a Korean Syllable Block the same as ID.", - "27.01) $JL | $JV | $JT | $H2 | $H3 \u00D7 $PO", - "27.02) $PR \u00D7 $JL | $JV | $JT | $H2 | $H3", - "# LB 28 Do not break between alphabetics (\"at\").", - "28) ($AL | $HL) \u00D7 ($AL | $HL)", - "# LB 29 Do not break between numeric punctuation and alphabetics (\"e.g.\").", - "29) $IS \u00D7 ($AL | $HL)", - "# LB 30 Do not break between letters, numbers or ordinary symbols and opening or closing punctuation.", - "30.01) ($AL | $HL | $NU) \u00D7 $OP", - "30.02) $CP \u00D7 ($AL | $HL | $NU)", - "# LB 30a Break between two Regional Indicators if and only if there is an even number of them before the point being considered.", - "30.11) ^ ($RI $RI)* $RI × $RI", - "30.12) [^$RI] ($RI $RI)* $RI × $RI", - "30.13) $RI ÷ $RI", - "30.2) $EB × $EM", - }, - { - "SentenceBreak", - "$CR=\\p{Sentence_Break=CR}", - "$LF=\\p{Sentence_Break=LF}", - "$Extend=\\p{Sentence_Break=Extend}", - "$Format=\\p{Sentence_Break=Format}", - "$Sep=\\p{Sentence_Break=Sep}", - "$Sp=\\p{Sentence_Break=Sp}", - "$Lower=\\p{Sentence_Break=Lower}", - "$Upper=\\p{Sentence_Break=Upper}", - "$OLetter=\\p{Sentence_Break=OLetter}", - "$Numeric=\\p{Sentence_Break=Numeric}", - "$ATerm=\\p{Sentence_Break=ATerm}", - "$STerm=\\p{Sentence_Break=STerm}", - "$Close=\\p{Sentence_Break=Close}", - "$SContinue=\\p{Sentence_Break=SContinue}", - "$Any=.", - // "# subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need.", - // "$NotStuff=[^$OLetter $Upper $Lower $Sep]", - // "# $ATerm and $Sterm are temporary, to match ICU until UTC decides.", - - "# WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend", - "$FE=[$Format $Extend]", - "# Special rules", - "$NotPreLower_=[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]", - // "$NotSep_=[^ $Sep $CR $LF]", - - // "$FE=$Extend* $Format*", - "$Sp=($Sp $FE*)", - "$Lower=($Lower $FE*)", - "$Upper=($Upper $FE*)", - "$OLetter=($OLetter $FE*)", - "$Numeric=($Numeric $FE*)", - "$ATerm=($ATerm $FE*)", - "$STerm=($STerm $FE*)", - "$Close=($Close $FE*)", - "$SContinue=($SContinue $FE*)", - "# Macros", - "$ParaSep = ($Sep | $CR | $LF)", - "$SATerm = ($STerm | $ATerm)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Break after paragraph separators.", - "4) $ParaSep \u00F7", - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - "# Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "5) \u00D7 [$Format $Extend]", - "# Do not break after full stop in certain contexts. [See note below.]", - "# Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter,", - "# is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase.", - "# For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.", - "6) $ATerm \u00D7 $Numeric", - "7) ($Upper | $Lower) $ATerm \u00D7 $Upper", - "8) $ATerm $Close* $Sp* \u00D7 $NotPreLower_* $Lower", - "8.1) $SATerm $Close* $Sp* \u00D7 ($SContinue | $SATerm)", - "# Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator.", - "9) $SATerm $Close* \u00D7 ( $Close | $Sp | $ParaSep )", - "# Note the fix to $Sp*, $Sep?", - "10) $SATerm $Close* $Sp* \u00D7 ( $Sp | $ParaSep )", - "11) $SATerm $Close* $Sp* $ParaSep? \u00F7", - "#Otherwise, do not break", - "998) \u00D7 $Any", - }, - { - "WordBreak", - "$CR=\\p{Word_Break=CR}", - "$LF=\\p{Word_Break=LF}", - "$Newline=\\p{Word_Break=Newline}", - // "$Control=\\p{Word_Break=Control}", - "$Extend=\\p{Word_Break=Extend}", - // "$NEWLINE=[$CR $LF \\u0085 \\u000B \\u000C \\u2028 \\u2029]", - // "$Sep=\\p{Sentence_Break=Sep}", - "# Now normal variables", - "$Format=\\p{Word_Break=Format}", - "$Katakana=\\p{Word_Break=Katakana}", - "$ALetter=\\p{Word_Break=ALetter}", - "$MidLetter=\\p{Word_Break=MidLetter}", - "$MidNum=\\p{Word_Break=MidNum}", - "$MidNumLet=\\p{Word_Break=MidNumLet}", - "$Numeric=\\p{Word_Break=Numeric}", - "$ExtendNumLet=\\p{Word_Break=ExtendNumLet}", - "$RI=\\p{Word_Break=Regional_Indicator}", - "$Hebrew_Letter=\\p{Word_Break=Hebrew_Letter}", - "$Double_Quote=\\p{Word_Break=Double_Quote}", - "$Single_Quote=\\p{Word_Break=Single_Quote}", - - // "$E_Base=\\p{Word_Break=E_Base}", - // "$E_Modifier=\\p{Word_Break=E_Modifier}", - "$ZWJ=\\p{Word_Break=ZWJ}", - "$ExtPict=\\p{Extended_Pictographic}", - - // "$EBG=\\p{Word_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Word_Break=Glue_After_Zwj}", - - "$WSegSpace=\\p{Word_Break=WSegSpace}", - "# Macros", - "$AHLetter=($ALetter | $Hebrew_Letter)", - "$MidNumLetQ=($MidNumLet | $Single_Quote)", - "# WARNING: For Rule 4: Fixes for GC, Format", - // "# Subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Add format and extend to everything", - "$FE=[$Format $Extend $ZWJ]", - "# Special rules", - "$NotBreak_=[^ $Newline $CR $LF ]", - // "$FE= ($Extend | $Format)*", - "$Katakana=($Katakana $FE*)", - "$ALetter=($ALetter $FE*)", - "$MidLetter=($MidLetter $FE*)", - "$MidNum=($MidNum $FE*)", - "$MidNumLet=($MidNumLet $FE*)", - "$Numeric=($Numeric $FE*)", - "$ExtendNumLet=($ExtendNumLet $FE*)", - "$RI=($RI $FE*)", - "$Hebrew_Letter=($Hebrew_Letter $FE*)", - "$Double_Quote=($Double_Quote $FE*)", - "$Single_Quote=($Single_Quote $FE*)", - - // "$E_Base=($E_Base $FE*)", - // "$E_Modifier=($E_Modifier $FE*)", - // "$ZWJ=($ZWJ $FE*)", don't do this one! - // "$Glue_After_Zwj=($Glue_After_Zwj $FE*)", - // "$EBG=($EBG $FE*)", - - "$AHLetter=($AHLetter $FE*)", - "$MidNumLetQ=($MidNumLetQ $FE*)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Otherwise break before and after Newlines (including CR and LF)", - "3.1) ($Newline | $CR | $LF) \u00F7", - "3.2) \u00F7 ($Newline | $CR | $LF)", - "# Do not break within emoji zwj sequences.", - "3.3) $ZWJ × $ExtPict", - "3.4) $WSegSpace × $WSegSpace", - - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - // "3.9) \u00D7 $Extend", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "# Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "4) \u00D7 [$Format $Extend]", - "4) $NotBreak_ \u00D7 [$Format $Extend $ZWJ]", - "# Vanilla rules", - "# Do not break between most letters.", - "5) $AHLetter \u00D7 $AHLetter", - "# Do not break letters across certain punctuation.", - "6) $AHLetter \u00D7 ($MidLetter | $MidNumLetQ) $AHLetter", - "7) $AHLetter ($MidLetter | $MidNumLetQ) \u00D7 $AHLetter", - "7.1) $Hebrew_Letter × $Single_Quote", - "7.2) $Hebrew_Letter × $Double_Quote $Hebrew_Letter", - "7.3) $Hebrew_Letter $Double_Quote × $Hebrew_Letter", - "# Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).", - "8) $Numeric \u00D7 $Numeric", - "9) $AHLetter \u00D7 $Numeric", - "10) $Numeric \u00D7 $AHLetter", - "# Do not break within sequences, such as “3.2” or “3,456.789”.", - "11) $Numeric ($MidNum | $MidNumLetQ) \u00D7 $Numeric", - "12) $Numeric \u00D7 ($MidNum | $MidNumLetQ) $Numeric", - "# Do not break between Katakana.", - "13) $Katakana \u00D7 $Katakana", - "# Do not break from extenders.", - "13.1) ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) \u00D7 $ExtendNumLet", - "13.2) $ExtendNumLet \u00D7 ($AHLetter | $Numeric | $Katakana)", - - // "# Do not break within emoji modifier sequences.", - // "14) $E_Base × $E_Modifier", - - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "15) ^ ($RI $RI)* $RI × $RI", - "16) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere (including around ideographs).", - } - }; - - public static void main(String[] args) throws IOException { - for (int i = 0; i < cannedRules.length; ++i) { - String type = cannedRules[i][0]; - boolean hadHash = false; - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "segmentation/", type + "Rules.txt")) { - out.println("# Segmentation rules for " + type); - out.println("#"); - out.println("# Character Classes"); - out.println("#"); - for (int j = 1; j < cannedRules[i].length; ++j) { - String cannedRule = cannedRules[i][j].trim(); - if (cannedRule.equals("#")) { - continue; - } - boolean hasHash = cannedRule.startsWith("#"); - if (hasHash && !hadHash) { - out.println("#"); - } - out.println(cannedRule); - if (hasHash) { - out.println("#"); - } - hadHash = hasHash; - } - } - } - - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "cldr/segmentation/", "rootAddon.xml")) { - out.println( - "\n" - + "\n" - + "\n" - + "\n" - + "\t\n" - + "\t\t\n" - + "\t\t\n" - + "\t\n" - + "\t"); - for (final String type : - new String[] { - "GraphemeClusterBreak", "LineBreak", "SentenceBreak", "WordBreak" - }) { - final Builder segBuilder = - Segmenter.make(ToolUnicodePropertySource.make(Default.ucdVersion()), type); - out.print(segBuilder.toString(type, "\t\t")); - if (type.equals("")) { - out.print( - "\t\t\t\n" - + "\t\t\t\t\n" - + "\t\t\t\n"); - } - } - out.println("\t\n" + ""); - } - } } From 47d165a233ced67aeb3a3896724ff00d73ad9129 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 22:00:19 +0100 Subject: [PATCH 08/38] 3 compound tone diacritics 1AD6..1AD8 (#588) * UnicodeData.txt from L2/23-208 * LB=CM * Scripts.txt * Diacritics are diacritics. * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 22 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- .../data/ucd/dev/NormalizationTest.txt | 8 ++++++- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +++ .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++----- .../dev/extracted/DerivedCombiningClass.txt | 10 ++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++----- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++----- .../data/ucd/dev/extracted/DerivedName.txt | 7 ++++-- 19 files changed, 76 insertions(+), 64 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 9c57f526b..a24865de5 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 17:50:18 GMT +# Date: 2024-11-13, 19:41:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2070,8 +2070,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE 0C5C ; 17.0 # TELUGU ARCHAIC SHRII 0CDC ; 17.0 # KANNADA ARCHAIC SHRII -1ACF..1AD5 ; 17.0 # [7] COMBINING DOUBLE CARON..COMBINING MACRON-VERTICAL-LINE +1ACF..1AD8 ; 17.0 # [10] COMBINING DOUBLE CARON..COMBINING MACRON-ACUTE-GRAVE -# Total code points: 13 +# Total code points: 16 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 531cc7816..8dad8df1d 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 17:50:34 GMT +# Date: 2024-11-13, 19:41:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3195,7 +3195,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; Case_Ignorable # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; Case_Ignorable # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3506,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2758 +# Total code points: 2761 # ================================================ @@ -7460,7 +7460,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD5 ; ID_Continue # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; ID_Continue # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144554 +# Total code points: 144557 # ================================================ @@ -9644,7 +9644,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD5 ; XID_Continue # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; XID_Continue # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144535 +# Total code points: 144538 # ================================================ @@ -10783,7 +10783,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -495,7 +495,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2207 +# Total code points: 2210 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 1a242c881..158e1014d 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 17:50:59 GMT +# Date: 2024-11-13, 19:42:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -247,7 +247,7 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +586,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2610 +# Total code points: 2613 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 1120cd60a..e85bddb54 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 17:51:01 GMT +# Date: 2024-11-13, 19:42:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -283,7 +283,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; Extend # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +623,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2614 +# Total code points: 2617 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index dc9976e2c..2bd2e39ba 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 17:50:32 GMT +# Date: 2024-11-13, 19:41:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815339 code points not listed here. -# Total code points: 1095504 +# The above property value applies to 815336 code points not listed here. +# Total code points: 1095501 # ================================================ @@ -2190,7 +2190,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; NSM # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; NSM # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2409,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2037 +# Total code points: 2040 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index f544c79e5..000a34270 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 17:50:34 GMT +# Date: 2024-11-13, 19:41:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2061,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821568 code points not listed here. -# Total code points: 1113171 +# The above property value applies to 821565 code points not listed here. +# Total code points: 1113168 # ================================================ @@ -2737,7 +2737,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1AD5 ; 230 # Mn [11] COMBINING TRIPLE ACUTE ACCENT..COMBINING MACRON-VERTICAL-LINE +1ACB..1AD8 ; 230 # Mn [14] COMBINING TRIPLE ACUTE ACCENT..COMBINING MACRON-ACUTE-GRAVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2804,7 +2804,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 524 +# Total code points: 527 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 535c77663..23037fe00 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 17:50:35 GMT +# Date: 2024-11-13, 19:41:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -692,7 +692,7 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; N # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; N # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761086 code points not listed here. +# The above property value applies to 761083 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 6fbbecc6f..e940037f3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 17:50:36 GMT +# Date: 2024-11-13, 19:41:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -227,7 +227,7 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1AD6..1AFF ; Cn # [42] .. +1AD9..1AFF ; Cn # [39] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819520 +# Total code points: 819517 # ================================================ @@ -2856,7 +2856,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD5 ; Mn # [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; Mn # [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3072,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2029 +# Total code points: 2032 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 1963202c1..2a1034c07 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 17:50:36 GMT +# Date: 2024-11-13, 19:41:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -350,7 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; T # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; T # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -582,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2194 +# Total code points: 2197 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c51ee0406..a91c7453c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 17:50:37 GMT +# Date: 2024-11-13, 19:41:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757640 code points not listed here. -# Total code points: 895108 +# The above property value applies to 757637 code points not listed here. +# Total code points: 895105 # ================================================ @@ -2055,7 +2055,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD5 ; CM # Mn [23] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-VERTICAL-LINE +1ABF..1AD8 ; CM # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2388,7 +2388,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2479 +# Total code points: 2482 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index f7a36b3fb..7dc96c32d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 17:50:37 GMT +# Date: 2024-11-13, 19:41:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6125,6 +6125,9 @@ 1AD3 ; COMBINING ACUTE-VERTICAL-LINE 1AD4 ; COMBINING VERTICAL-LINE-MACRON 1AD5 ; COMBINING MACRON-VERTICAL-LINE +1AD6 ; COMBINING VERTICAL-LINE-ACUTE-GRAVE +1AD7 ; COMBINING VERTICAL-LINE-GRAVE-ACUTE +1AD8 ; COMBINING MACRON-ACUTE-GRAVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45380,6 +45383,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155011 +# Total code points: 155014 # EOF From 8f518642376777a4eff7e458c136925fc896bc3b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 22:20:09 +0100 Subject: [PATCH 09/38] 5 Harrington diacritics (#587) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt from L2/23-206 * LB=CM * Scripts.txt * Regenerate UCD * Let’s say that the diacritics are diacritics * Regenerate UCD * UnicodeData.txt from L2/23-206R * Regenerate UCD * Approve the name change for provisionally assigned character U+1ADC COMBINING FALLING DIAGONAL DIAERESIS to COMBINING DIAERESIS WITH RAISED LEFT DOT. * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 22 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- .../data/ucd/dev/NormalizationTest.txt | 12 +++++++++- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 5 +++++ .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++----- .../dev/extracted/DerivedCombiningClass.txt | 13 ++++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++----- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++----- .../data/ucd/dev/extracted/DerivedName.txt | 9 ++++++-- 19 files changed, 86 insertions(+), 65 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index a24865de5..dd8846909 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 19:41:18 GMT +# Date: 2024-11-13, 21:07:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2070,8 +2070,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE 0C5C ; 17.0 # TELUGU ARCHAIC SHRII 0CDC ; 17.0 # KANNADA ARCHAIC SHRII -1ACF..1AD8 ; 17.0 # [10] COMBINING DOUBLE CARON..COMBINING MACRON-ACUTE-GRAVE +1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW -# Total code points: 16 +# Total code points: 21 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 8dad8df1d..14aacf7bf 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 19:41:37 GMT +# Date: 2024-11-13, 21:07:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3195,7 +3195,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; Case_Ignorable # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; Case_Ignorable # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3506,7 +3506,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2761 +# Total code points: 2766 # ================================================ @@ -7460,7 +7460,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD8 ; ID_Continue # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; ID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144557 +# Total code points: 144562 # ================================================ @@ -9644,7 +9644,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD8 ; XID_Continue # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; XID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144538 +# Total code points: 144543 # ================================================ @@ -10783,7 +10783,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -495,7 +495,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2210 +# Total code points: 2215 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 158e1014d..015ce1c42 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 19:42:03 GMT +# Date: 2024-11-13, 21:08:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -247,7 +247,7 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +586,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2613 +# Total code points: 2618 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e85bddb54..ee543e71d 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 19:42:05 GMT +# Date: 2024-11-13, 21:08:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -283,7 +283,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; Extend # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +623,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2617 +# Total code points: 2622 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2bd2e39ba..4d67d80ab 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 19:41:35 GMT +# Date: 2024-11-13, 21:07:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815336 code points not listed here. -# Total code points: 1095501 +# The above property value applies to 815331 code points not listed here. +# Total code points: 1095496 # ================================================ @@ -2190,7 +2190,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; NSM # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; NSM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2409,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2040 +# Total code points: 2045 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 000a34270..3f16e7c62 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 19:41:37 GMT +# Date: 2024-11-13, 21:07:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2061,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821565 code points not listed here. -# Total code points: 1113168 +# The above property value applies to 821560 code points not listed here. +# Total code points: 1113163 # ================================================ @@ -2596,6 +2596,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1ABF..1AC0 ; 220 # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW +1ADD ; 220 # Mn COMBINING DOT-AND-RING BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2628,7 +2629,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 183 # ================================================ @@ -2737,7 +2738,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1AD8 ; 230 # Mn [14] COMBINING TRIPLE ACUTE ACCENT..COMBINING MACRON-ACUTE-GRAVE +1ACB..1ADC ; 230 # Mn [18] COMBINING TRIPLE ACUTE ACCENT..COMBINING DIAERESIS WITH RAISED LEFT DOT 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2804,7 +2805,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 527 +# Total code points: 531 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 23037fe00..c72d382d0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 19:41:39 GMT +# Date: 2024-11-13, 21:07:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -692,7 +692,7 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; N # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761083 code points not listed here. +# The above property value applies to 761078 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index e940037f3..27331217a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 19:41:39 GMT +# Date: 2024-11-13, 21:07:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -227,7 +227,7 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1AD9..1AFF ; Cn # [39] .. +1ADE..1AFF ; Cn # [34] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -746,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819517 +# Total code points: 819512 # ================================================ @@ -2856,7 +2856,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1AD8 ; Mn # [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; Mn # [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3072,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2032 +# Total code points: 2037 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 2a1034c07..c2d9a40ef 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 19:41:40 GMT +# Date: 2024-11-13, 21:07:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -350,7 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; T # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; T # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -582,6 +582,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2197 +# Total code points: 2202 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index a91c7453c..f04298e3c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 19:41:40 GMT +# Date: 2024-11-13, 21:07:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757637 code points not listed here. -# Total code points: 895105 +# The above property value applies to 757632 code points not listed here. +# Total code points: 895100 # ================================================ @@ -2055,7 +2055,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1AD8 ; CM # Mn [26] COMBINING LATIN SMALL LETTER W BELOW..COMBINING MACRON-ACUTE-GRAVE +1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2388,7 +2388,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2482 +# Total code points: 2487 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 7dc96c32d..447bb6104 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 19:41:40 GMT +# Date: 2024-11-13, 21:07:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6128,6 +6128,11 @@ 1AD6 ; COMBINING VERTICAL-LINE-ACUTE-GRAVE 1AD7 ; COMBINING VERTICAL-LINE-GRAVE-ACUTE 1AD8 ; COMBINING MACRON-ACUTE-GRAVE +1AD9 ; COMBINING SHARP SIGN +1ADA ; COMBINING FLAT SIGN +1ADB ; COMBINING DOWN TACK ABOVE +1ADC ; COMBINING DIAERESIS WITH RAISED LEFT DOT +1ADD ; COMBINING DOT-AND-RING BELOW 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45383,6 +45388,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155014 +# Total code points: 155019 # EOF From 2fb2e8b3015a5a12969edfc52996217d0df96b4e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 22:33:21 +0100 Subject: [PATCH 10/38] 12 IPA diacritics, 11 above and 1 below (#742) * UnicodeData.txt lines from proposal * LineBreak.txt as described in the proposal * Inherited * diacritics are diacritics. * Regenerate UCD * en-GB-oxendict * Regenerate UCD * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 17 +++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++- .../data/ucd/dev/NormalizationTest.txt | 26 ++++++++++++++++++- unicodetools/data/ucd/dev/PropList.txt | 5 ++-- unicodetools/data/ucd/dev/Scripts.txt | 5 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 12 +++++++++ .../data/ucd/dev/VerticalOrientation.txt | 3 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 9 ++++--- .../dev/extracted/DerivedCombiningClass.txt | 16 +++++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 5 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 12 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 16 ++++++++++-- 19 files changed, 121 insertions(+), 47 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index dd8846909..7444c951e 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 21:07:25 GMT +# Date: 2024-11-13, 21:22:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2071,7 +2071,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0C5C ; 17.0 # TELUGU ARCHAIC SHRII 0CDC ; 17.0 # KANNADA ARCHAIC SHRII 1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE -# Total code points: 21 +# Total code points: 33 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 14aacf7bf..437bcdc7a 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 21:07:43 GMT +# Date: 2024-11-13, 21:22:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3196,6 +3196,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; Case_Ignorable # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Case_Ignorable # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3506,7 +3507,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2766 +# Total code points: 2778 # ================================================ @@ -7461,6 +7462,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ADD ; ID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; ID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8373,7 +8375,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144562 +# Total code points: 144574 # ================================================ @@ -9645,6 +9647,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ADD ; XID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; XID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10562,7 +10565,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144543 +# Total code points: 144555 # ================================================ @@ -10784,6 +10787,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .... 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -495,7 +496,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2215 +# Total code points: 2227 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 015ce1c42..6fbcfa5e8 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:08:10 GMT +# Date: 2024-11-13, 21:23:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -248,6 +248,7 @@ 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -586,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2618 +# Total code points: 2630 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index ee543e71d..935c90d01 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:08:13 GMT +# Date: 2024-11-13, 21:23:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -284,6 +284,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -623,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2622 +# Total code points: 2634 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 4d67d80ab..92d70ffa7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 21:07:41 GMT +# Date: 2024-11-13, 21:22:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815331 code points not listed here. -# Total code points: 1095496 +# The above property value applies to 815319 code points not listed here. +# Total code points: 1095484 # ================================================ @@ -2191,6 +2191,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; NSM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; NSM # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2409,7 +2410,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2045 +# Total code points: 2057 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3f16e7c62..b29cba1f7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 21:07:43 GMT +# Date: 2024-11-13, 21:22:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2061,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821560 code points not listed here. -# Total code points: 1113163 +# The above property value applies to 821548 code points not listed here. +# Total code points: 1113151 # ================================================ @@ -2597,6 +2597,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW 1ADD ; 220 # Mn COMBINING DOT-AND-RING BELOW +1AE6 ; 220 # Mn COMBINING DOUBLE ARCH BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2629,7 +2630,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 183 +# Total code points: 184 # ================================================ @@ -2739,6 +2740,8 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ADC ; 230 # Mn [18] COMBINING TRIPLE ACUTE ACCENT..COMBINING DIAERESIS WITH RAISED LEFT DOT +1AE0..1AE5 ; 230 # Mn [6] COMBINING LEFT TACK ABOVE..COMBINING SEAGULL ABOVE +1AE7..1AEA ; 230 # Mn [4] COMBINING DOUBLE ARCH ABOVE..COMBINING UPWARDS ARROW ABOVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2805,7 +2808,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 531 +# Total code points: 541 # ================================================ @@ -2837,9 +2840,10 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 035D..035E ; 234 # Mn [2] COMBINING DOUBLE BREVE..COMBINING DOUBLE MACRON 0360..0361 ; 234 # Mn [2] COMBINING DOUBLE TILDE..COMBINING DOUBLE INVERTED BREVE +1AEB ; 234 # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; 234 # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c72d382d0..720171224 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:07:45 GMT +# Date: 2024-11-13, 21:22:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -693,6 +693,7 @@ 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2104,7 +2105,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761078 code points not listed here. +# The above property value applies to 761066 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 27331217a..5ec607ad7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 21:07:45 GMT +# Date: 2024-11-13, 21:22:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -227,7 +227,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ADE..1AFF ; Cn # [34] .. +1ADE..1ADF ; Cn # [2] .. +1AEC..1AFF ; Cn # [20] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -746,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819512 +# Total code points: 819500 # ================================================ @@ -2857,6 +2858,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ADD ; Mn # [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Mn # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3072,7 +3074,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2037 +# Total code points: 2049 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index c2d9a40ef..d57fd1f6f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 21:07:46 GMT +# Date: 2024-11-13, 21:22:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -351,6 +351,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; T # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; T # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -582,6 +583,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2202 +# Total code points: 2214 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index f04298e3c..e42e13f6b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 21:07:46 GMT +# Date: 2024-11-13, 21:22:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757632 code points not listed here. -# Total code points: 895100 +# The above property value applies to 757620 code points not listed here. +# Total code points: 895088 # ================================================ @@ -312,6 +312,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 0F12 ; GL # Po TIBETAN MARK RGYA GRAM SHAD 0FD9..0FDA ; GL # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 180E ; GL # Cf MONGOLIAN VOWEL SEPARATOR +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; GL # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE 1DFC ; GL # Mn COMBINING DOUBLE INVERTED BREVE BELOW 2007 ; GL # Zs FIGURE SPACE @@ -329,7 +330,7 @@ FE2D..FE2E ; GL # Mn [2] COMBINING CONJOINING MACRON BELOW..COMBINING CYRIL 13439..1343B ; GL # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER -# Total code points: 41 +# Total code points: 42 # ================================================ @@ -2056,6 +2057,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2388,7 +2390,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2487 +# Total code points: 2498 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 447bb6104..31baaea9b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 21:07:46 GMT +# Date: 2024-11-13, 21:22:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -6133,6 +6133,18 @@ 1ADB ; COMBINING DOWN TACK ABOVE 1ADC ; COMBINING DIAERESIS WITH RAISED LEFT DOT 1ADD ; COMBINING DOT-AND-RING BELOW +1AE0 ; COMBINING LEFT TACK ABOVE +1AE1 ; COMBINING RIGHT TACK ABOVE +1AE2 ; COMBINING MINUS SIGN ABOVE +1AE3 ; COMBINING INVERTED BRIDGE ABOVE +1AE4 ; COMBINING SQUARE ABOVE +1AE5 ; COMBINING SEAGULL ABOVE +1AE6 ; COMBINING DOUBLE ARCH BELOW +1AE7 ; COMBINING DOUBLE ARCH ABOVE +1AE8 ; COMBINING EQUALS SIGN ABOVE +1AE9 ; COMBINING LEFT ANGLE CENTRED ABOVE +1AEA ; COMBINING UPWARDS ARROW ABOVE +1AEB ; COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -45388,6 +45400,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155019 +# Total code points: 155031 # EOF From 422bf479b05a3ff2bde561d2bbf6914ce99c0963 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 22:47:00 +0100 Subject: [PATCH 11/38] One Chess Symbol (#664) * UnicodeData.txt line from L2/24-018 * lb=AL like U+2BF9. * Common like U+2BF9. * Regenerate UCD * move it * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 7 +++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++--- unicodetools/data/ucd/dev/PropList.txt | 6 ++---- unicodetools/data/ucd/dev/Scripts.txt | 7 +++---- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + unicodetools/data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 11 +++++------ .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 7 +++---- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 7 +++---- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 10 ++++------ .../data/ucd/dev/extracted/DerivedLineBreak.txt | 11 +++++------ unicodetools/data/ucd/dev/extracted/DerivedName.txt | 5 +++-- 14 files changed, 41 insertions(+), 50 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 7444c951e..13742ee49 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 21:22:30 GMT +# Date: 2024-11-13, 21:34:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2072,7 +2072,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0CDC ; 17.0 # KANNADA ARCHAIC SHRII 1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW 1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE +2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE -# Total code points: 33 +# Total code points: 34 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 437bcdc7a..7b44bcca3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 21:22:48 GMT +# Date: 2024-11-13, 21:35:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -11806,8 +11806,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2B45..2B46 ; Grapheme_Base # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; Grapheme_Base # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Grapheme_Base # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; Grapheme_Base # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; Grapheme_Base # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Grapheme_Base # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; Grapheme_Base # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; Grapheme_Base # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Grapheme_Base # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -12822,7 +12821,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152734 +# Total code points: 152735 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 2bd32a68b..e7019bcf1 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:22:52 GMT +# Date: 2024-11-13, 21:35:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1353,8 +1353,7 @@ 2B55 ; W # So HEAVY LARGE CIRCLE 2B56..2B59 ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index bec9be145..071dcb538 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 21:22:53 GMT +# Date: 2024-11-13, 21:35:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1320,8 +1320,7 @@ 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; AL # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index cced0526c..1cbd57d2a 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-13, 21:23:00 GMT +# Date: 2024-11-13, 21:35:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1811,9 +1811,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR 2B74..2B75 ; Pattern_Syntax # Cn [2] .. -2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B96 ; Pattern_Syntax # Cn -2B97..2BFF ; Pattern_Syntax # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Pattern_Syntax # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 6282eb580..47a23f20b 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 21:23:13 GMT +# Date: 2024-11-13, 21:35:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -306,8 +306,7 @@ 2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Common # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET @@ -633,7 +632,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9053 +# Total code points: 9054 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 5f7a66113..631fca167 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -10272,6 +10272,7 @@ 2B93;NEWLINE RIGHT;So;0;ON;;;;;N;;;;; 2B94;FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE;So;0;ON;;;;;N;;;;; 2B95;RIGHTWARDS BLACK ARROW;So;0;ON;;;;;N;;;;; +2B96;EQUALS SIGN WITH INFINITY ABOVE;So;0;ON;;;;;N;;;;; 2B97;SYMBOL FOR TYPE A ELECTRONICS;So;0;ON;;;;;N;;;;; 2B98;THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; 2B99;THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index d720bb978..afadbe23c 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 21:23:15 GMT +# Date: 2024-11-13, 21:35:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1094,7 +1094,7 @@ 2B4D..2B4F ; R # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B50..2B59 ; U # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; R # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; R # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B76..2B96 ; R # So [33] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..EQUALS SIGN WITH INFINITY ABOVE 2B97 ; U # So SYMBOL FOR TYPE A ELECTRONICS 2B98..2BB7 ; R # So [32] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..RIBBON ARROW RIGHT DOWN 2BB8..2BD1 ; U # So [26] UPWARDS WHITE ARROW FROM BAR WITH HORIZONTAL BAR..UNCERTAINTY SIGN diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 92d70ffa7..6386021e6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 21:22:46 GMT +# Date: 2024-11-13, 21:35:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1215,8 +1215,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815319 code points not listed here. -# Total code points: 1095484 +# The above property value applies to 815318 code points not listed here. +# Total code points: 1095483 # ================================================ @@ -1732,8 +1732,7 @@ FF1A ; CS # Po FULLWIDTH COLON 2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; ON # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; ON # No COPTIC FRACTION ONE HALF @@ -1998,7 +1997,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6751 +# Total code points: 6752 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index b29cba1f7..84fbc414d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 21:22:47 GMT +# Date: 2024-11-13, 21:35:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -912,8 +912,7 @@ 2B45..2B46 ; 0 # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; 0 # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; 0 # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; 0 # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; 0 # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; 0 # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; 0 # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; 0 # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; 0 # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -2061,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821548 code points not listed here. +# The above property value applies to 821547 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 720171224..408410f88 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:22:49 GMT +# Date: 2024-11-13, 21:35:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1071,8 +1071,7 @@ 2B4D..2B4F ; N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B51..2B54 ; N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; N # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; N # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -2105,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761066 code points not listed here. +# The above property value applies to 761065 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 5ec607ad7..9c9216414 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 21:22:49 GMT +# Date: 2024-11-13, 21:35:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -263,7 +263,6 @@ 242A..243F ; Cn # [22] .. 244B..245F ; Cn # [21] .. 2B74..2B75 ; Cn # [2] .. -2B96 ; Cn # 2CF4..2CF8 ; Cn # [5] .. 2D26 ; Cn # 2D28..2D2C ; Cn # [5] .. @@ -747,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819500 +# Total code points: 819499 # ================================================ @@ -4176,8 +4175,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 2B00..2B2F ; So # [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE 2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; So # [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR 2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP @@ -4285,7 +4283,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7376 +# Total code points: 7377 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index e42e13f6b..49803bb27 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 21:22:51 GMT +# Date: 2024-11-13, 21:35:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757620 code points not listed here. -# Total code points: 895088 +# The above property value applies to 757619 code points not listed here. +# Total code points: 895087 # ================================================ @@ -1080,8 +1080,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2B47..2B4C ; AL # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; AL # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; AL # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1615,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26683 +# Total code points: 26684 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 31baaea9b..20b8bd47d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 21:22:51 GMT +# Date: 2024-11-13, 21:35:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -10247,6 +10247,7 @@ 2B93 ; NEWLINE RIGHT 2B94 ; FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE 2B95 ; RIGHTWARDS BLACK ARROW +2B96 ; EQUALS SIGN WITH INFINITY ABOVE 2B97 ; SYMBOL FOR TYPE A ELECTRONICS 2B98 ; THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD 2B99 ; THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD @@ -45400,6 +45401,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155031 +# Total code points: 155032 # EOF From 7b4359d60c2f01b9733ed7e6c4138ab1e45bffe0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 23:01:53 +0100 Subject: [PATCH 12/38] Latin small and capital pharyngeal voiced fricative (#589) * UnicodeData.txt from L2/23-219 * LB=AL * Scripts.txt * Regenerate UCD --- unicodetools/data/ucd/dev/CaseFolding.txt | 3 +- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +- .../data/ucd/dev/DerivedCoreProperties.txt | 86 +++++++++---------- .../ucd/dev/DerivedNormalizationProps.txt | 13 +-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 9 +- unicodetools/data/ucd/dev/LineBreak.txt | 9 +- unicodetools/data/ucd/dev/Scripts.txt | 11 ++- unicodetools/data/ucd/dev/UnicodeData.txt | 4 +- .../data/ucd/dev/VerticalOrientation.txt | 9 +- .../dev/auxiliary/SentenceBreakProperty.txt | 12 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 11 ++- .../ucd/dev/extracted/DerivedBidiClass.txt | 11 ++- .../dev/extracted/DerivedCombiningClass.txt | 11 ++- .../dev/extracted/DerivedEastAsianWidth.txt | 11 ++- .../dev/extracted/DerivedGeneralCategory.txt | 15 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 15 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 6 +- 17 files changed, 121 insertions(+), 120 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 1b7a9c156..e52ba9146 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ # CaseFolding-16.0.0.txt -# Date: 2024-04-30, 21:48:11 GMT +# Date: 2024-06-07, 20:34:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1243,6 +1243,7 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 13742ee49..d13de90a8 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 21:34:52 GMT +# Date: 2024-11-13, 21:48:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2073,7 +2073,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW 1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE +A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE -# Total code points: 34 +# Total code points: 36 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 7b44bcca3..448553ba3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 21:35:10 GMT +# Date: 2024-11-13, 21:48:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -273,8 +273,8 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 01BC..01BF ; Alphabetic # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Alphabetic # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Alphabetic # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Alphabetic # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Alphabetic # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Alphabetic # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; Alphabetic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; Alphabetic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; Alphabetic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -834,8 +834,7 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; Alphabetic # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -1442,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142763 +# Total code points: 142765 # ================================================ @@ -1596,7 +1595,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 024B ; Lowercase # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lowercase # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lowercase # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lowercase # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lowercase # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2074,6 +2073,7 @@ A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lowercase # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lowercase # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN @@ -2751,6 +2751,7 @@ A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Uppercase # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S @@ -2804,7 +2805,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1978 +# Total code points: 1979 # ================================================ @@ -2822,7 +2823,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 01BC..01BF ; Cased # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C4..0293 ; Cased # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Cased # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Cased # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Cased # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Cased # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Cased # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2912,8 +2913,7 @@ A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; Cased # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -2989,7 +2989,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4579 # ================================================ @@ -4112,6 +4112,7 @@ A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S @@ -4131,7 +4132,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1460 +# Total code points: 1461 # ================================================ @@ -4749,6 +4750,7 @@ A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Uppercased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S @@ -4771,7 +4773,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1552 +# Total code points: 1553 # ================================================ @@ -5388,6 +5390,7 @@ A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Titlecased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S @@ -5410,7 +5413,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1479 +# Total code points: 1480 # ================================================ @@ -6024,6 +6027,7 @@ A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S @@ -6046,7 +6050,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1533 +# Total code points: 1534 # ================================================ @@ -6158,8 +6162,7 @@ A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I -A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7B0..A7D1 ; Changes_When_Casemapped # L& [34] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER CLOSED INSULAR G A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI @@ -6187,7 +6190,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2981 +# Total code points: 2983 # ================================================ @@ -6212,8 +6215,8 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 01BC..01BF ; ID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -6564,8 +6567,7 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; ID_Start # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -6965,7 +6967,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141273 +# Total code points: 141275 # ================================================ @@ -6994,8 +6996,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 01BC..01BF ; ID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -7651,8 +7653,7 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; ID_Continue # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -8375,7 +8376,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144574 +# Total code points: 144576 # ================================================ @@ -8398,8 +8399,8 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -8748,8 +8749,7 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; XID_Start # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -9154,7 +9154,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141250 +# Total code points: 141252 # ================================================ @@ -9180,8 +9180,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -9835,8 +9835,7 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; XID_Continue # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -10565,7 +10564,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144555 +# Total code points: 144557 # ================================================ @@ -11107,8 +11106,8 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 01BC..01BF ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Grapheme_Base # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Grapheme_Base # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Grapheme_Base # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Grapheme_Base # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Grapheme_Base # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Grapheme_Base # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; Grapheme_Base # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; Grapheme_Base # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; Grapheme_Base # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -11986,8 +11985,7 @@ A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Grapheme_Base # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; Grapheme_Base # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Grapheme_Base # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; Grapheme_Base # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -12821,7 +12819,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152735 +# Total code points: 152737 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index ce636abb5..4a4b36243 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ -# DerivedNormalizationProps-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedNormalizationProps-17.0.0.txt +# Date: 2024-11-13, 21:48:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -5448,6 +5448,7 @@ A7C7 ; NFKC_CF; A7C8 # L& LATIN CAPITAL LETTER D WITH S A7C9 ; NFKC_CF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB ; NFKC_CF; 0264 # L& LATIN CAPITAL LETTER RAMS HORN A7CC ; NFKC_CF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; NFKC_CF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_CF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; NFKC_CF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_CF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S @@ -9148,7 +9149,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] .... -# Total code points: 10554 +# Total code points: 10555 # ================================================ @@ -11582,6 +11583,7 @@ A7C7 ; NFKC_SCF; A7C8 # L& LATIN CAPITAL LETTER D WITH A7C9 ; NFKC_SCF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB ; NFKC_SCF; 0264 # L& LATIN CAPITAL LETTER RAMS HORN A7CC ; NFKC_SCF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; NFKC_SCF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_SCF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S @@ -15282,7 +15284,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10516 +# Total code points: 10517 # ================================================ @@ -16052,6 +16054,7 @@ A7C2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER ANG A7C4..A7C7 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S @@ -16300,6 +16303,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10554 +# Total code points: 10555 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index e7019bcf1..525f3ed34 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:35:14 GMT +# Date: 2024-11-13, 21:49:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,8 +180,8 @@ 0252..0260 ; N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0261 ; A # Ll LATIN SMALL LETTER SCRIPT G 0262..0293 ; N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C4 ; A # Sk MODIFIER LETTER UP ARROWHEAD @@ -1549,8 +1549,7 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; N # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 071dcb538..237a89163 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 21:35:15 GMT +# Date: 2024-11-13, 21:49:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -132,8 +132,8 @@ 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; AL # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; AL # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -1576,8 +1576,7 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; AL # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; AL # Ll LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 47a23f20b..e98eca2e3 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 21:35:38 GMT +# Date: 2024-11-13, 21:49:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -647,8 +647,8 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Latin # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Latin # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN @@ -675,8 +675,7 @@ A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Latin # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; Latin # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; Latin # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -701,7 +700,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1487 +# Total code points: 1489 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 631fca167..aae6db78a 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -659,7 +659,7 @@ 0292;LATIN SMALL LETTER EZH;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH;;01B7;;01B7 0293;LATIN SMALL LETTER EZH WITH CURL;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH CURL;;;; 0294;LATIN LETTER GLOTTAL STOP;Lo;0;L;;;;;N;;;;; -0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; +0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Lo;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; 0296;LATIN LETTER INVERTED GLOTTAL STOP;Ll;0;L;;;;;N;;;;; 0297;LATIN LETTER STRETCHED C;Ll;0;L;;;;;N;;;;; 0298;LATIN LETTER BILABIAL CLICK;Ll;0;L;;;;;N;LATIN LETTER BULLSEYE;;;; @@ -14308,6 +14308,8 @@ A7CA;LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY;Ll;0;L;;;;;N;;;A7C9;;A7C9 A7CB;LATIN CAPITAL LETTER RAMS HORN;Lu;0;L;;;;;N;;;;0264; A7CC;LATIN CAPITAL LETTER S WITH DIAGONAL STROKE;Lu;0;L;;;;;N;;;;A7CD; A7CD;LATIN SMALL LETTER S WITH DIAGONAL STROKE;Ll;0;L;;;;;N;;;A7CC;;A7CC +A7CE;LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE;Lu;0;L;;;;;N;;;;A7CF; +A7CF;LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;;;A7CE;;A7CE A7D0;LATIN CAPITAL LETTER CLOSED INSULAR G;Lu;0;L;;;;;N;;;;A7D1; A7D1;LATIN SMALL LETTER CLOSED INSULAR G;Ll;0;L;;;;;N;;;A7D0;;A7D0 A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index afadbe23c..3ee1e2b95 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 21:35:41 GMT +# Date: 2024-11-13, 21:49:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -140,8 +140,8 @@ 01C0..01C3 ; R # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; R # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; R # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; R # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; R # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; R # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; R # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; R # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; R # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; R # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -1358,8 +1358,7 @@ A788 ; R # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; R # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; R # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; R # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; R # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; R # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; R # Ll LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; R # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 6fbcfa5e8..34646f2ae 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:23:14 GMT +# Date: 2024-11-13, 21:49:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -780,7 +780,7 @@ E0001 ; Format # Cf LANGUAGE TAG 024B ; Lower # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lower # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lower # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lower # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lower # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lower # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lower # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lower # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -1255,6 +1255,7 @@ A7C3 ; Lower # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lower # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lower # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lower # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lower # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lower # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lower # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN @@ -1930,6 +1931,7 @@ A7C2 ; Upper # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Upper # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Upper # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Upper # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S @@ -1983,13 +1985,13 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1963 +# Total code points: 1964 # ================================================ 01BB ; OLetter # Lo LATIN LETTER TWO WITH STROKE 01C0..01C3 ; OLetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; OLetter # Lo LATIN LETTER GLOTTAL STOP +0294..0295 ; OLetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 02B9..02BF ; OLetter # Lm [7] MODIFIER LETTER PRIME..MODIFIER LETTER LEFT HALF RING 02C6..02D1 ; OLetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02EC ; OLetter # Lm MODIFIER LETTER VOICING @@ -2587,7 +2589,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136912 +# Total code points: 136913 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 935c90d01..bb5a423f0 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:23:15 GMT +# Date: 2024-11-13, 21:49:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -688,8 +688,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ALetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ALetter # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -1004,8 +1004,7 @@ A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ALetter # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ALetter # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; ALetter # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; ALetter # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -1357,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33795 +# Total code points: 33797 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 6386021e6..4244cc093 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 21:35:08 GMT +# Date: 2024-11-13, 21:48:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -138,8 +138,8 @@ 01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; L # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; L # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; L # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP 02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON @@ -663,8 +663,7 @@ A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; L # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -1215,7 +1214,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815318 code points not listed here. +# The above property value applies to 815316 code points not listed here. # Total code points: 1095483 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 84fbc414d..f18e1c9e2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 21:35:09 GMT +# Date: 2024-11-13, 21:48:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -83,8 +83,8 @@ 01BC..01BF ; 0 # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; 0 # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; 0 # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; 0 # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; 0 # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; 0 # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; 0 # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; 0 # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; 0 # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; 0 # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -1093,8 +1093,7 @@ A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; 0 # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; 0 # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; 0 # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; 0 # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; 0 # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -2060,7 +2059,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821547 code points not listed here. +# The above property value applies to 821545 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 408410f88..12a6af6ef 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:35:11 GMT +# Date: 2024-11-13, 21:48:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,8 +91,8 @@ 01DD..0250 ; N # L& [116] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER TURNED A 0252..0260 ; N # L& [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0262..0293 ; N # L& [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C5 ; N # Sk MODIFIER LETTER DOWN ARROWHEAD @@ -1183,8 +1183,7 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; N # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; N # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -2104,7 +2103,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761065 code points not listed here. +# The above property value applies to 761063 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 9c9216414..78edfffc3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 21:35:11 GMT +# Date: 2024-11-13, 21:48:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -293,7 +293,6 @@ A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7CE..A7CF ; Cn # [2] .. A7D2 ; Cn # A7D4 ; Cn # A7DD..A7F1 ; Cn # [21] .. @@ -746,7 +745,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819499 +# Total code points: 819497 # ================================================ @@ -1354,6 +1353,7 @@ A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Lu # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S @@ -1404,7 +1404,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1858 +# Total code points: 1859 # ================================================ @@ -1555,7 +1555,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 024B ; Ll # LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Ll # LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Ll # [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Ll # [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Ll # [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 0371 ; Ll # GREEK SMALL LETTER HETA 0373 ; Ll # GREEK SMALL LETTER ARCHAIC SAMPI 0377 ; Ll # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA @@ -2016,6 +2016,7 @@ A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Ll # LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Ll # LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Ll # LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN @@ -2182,7 +2183,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 00BA ; Lo # MASCULINE ORDINAL INDICATOR 01BB ; Lo # LATIN LETTER TWO WITH STROKE 01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; Lo # LATIN LETTER GLOTTAL STOP +0294..0295 ; Lo # [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 05D0..05EA ; Lo # [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; Lo # [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0620..063F ; Lo # [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE @@ -2708,7 +2709,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136481 +# Total code points: 136482 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 49803bb27..683cf66b0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 21:35:12 GMT +# Date: 2024-11-13, 21:48:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757619 code points not listed here. -# Total code points: 895087 +# The above property value applies to 757617 code points not listed here. +# Total code points: 895085 # ================================================ @@ -587,8 +587,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 01BC..01BF ; AL # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; AL # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -1141,8 +1141,7 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A790..A7D1 ; AL # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q @@ -1614,7 +1613,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26684 +# Total code points: 26686 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 20b8bd47d..8e4a91993 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 21:35:12 GMT +# Date: 2024-11-13, 21:48:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -14281,6 +14281,8 @@ A7CA ; LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CB ; LATIN CAPITAL LETTER RAMS HORN A7CC ; LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CD ; LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CE ; LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE +A7CF ; LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; LATIN CAPITAL LETTER CLOSED INSULAR G A7D1 ; LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; LATIN SMALL LETTER DOUBLE THORN @@ -45401,6 +45403,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155032 +# Total code points: 155034 # EOF From 2fa8b481733863ddac54724a4e64188d7a6e6443 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Wed, 13 Nov 2024 14:16:22 -0800 Subject: [PATCH 13/38] UTC-176-C32 Adding capital double thorn/wynn (#635) * UTC-176-C32 first cut * Fix linebreak * Now Scripts --------- Co-authored-by: Robin Leroy --- unicodetools/data/ucd/dev/CaseFolding.txt | 6 +- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +- .../data/ucd/dev/DerivedCoreProperties.txt | 69 +++++++++---------- .../ucd/dev/DerivedNormalizationProps.txt | 14 ++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 +- unicodetools/data/ucd/dev/LineBreak.txt | 6 +- unicodetools/data/ucd/dev/Scripts.txt | 8 +-- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +- .../data/ucd/dev/VerticalOrientation.txt | 6 +- .../dev/auxiliary/SentenceBreakProperty.txt | 6 +- .../ucd/dev/auxiliary/WordBreakProperty.txt | 8 +-- .../ucd/dev/extracted/DerivedBidiClass.txt | 8 +-- .../dev/extracted/DerivedCombiningClass.txt | 8 +-- .../dev/extracted/DerivedEastAsianWidth.txt | 8 +-- .../dev/extracted/DerivedGeneralCategory.txt | 10 +-- .../ucd/dev/extracted/DerivedLineBreak.txt | 12 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 6 +- 17 files changed, 93 insertions(+), 100 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index e52ba9146..533abab63 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ -# CaseFolding-16.0.0.txt -# Date: 2024-06-07, 20:34:09 GMT +# CaseFolding-17.0.0.txt +# Date: 2024-11-13, 22:03:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1245,6 +1245,8 @@ A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN +A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d13de90a8..9d9cd0412 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 21:48:39 GMT +# Date: 2024-11-13, 22:03:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2074,7 +2074,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE +A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN -# Total code points: 36 +# Total code points: 38 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 448553ba3..1f5ee8261 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 21:48:56 GMT +# Date: 2024-11-13, 22:03:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -834,9 +834,7 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; Alphabetic # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; Alphabetic # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1441,7 +1439,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142765 +# Total code points: 142767 # ================================================ @@ -2753,6 +2751,8 @@ A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OV A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Uppercase # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA @@ -2805,7 +2805,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1979 +# Total code points: 1981 # ================================================ @@ -2913,9 +2913,7 @@ A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A7D1 ; Cased # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; Cased # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2989,7 +2987,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4579 +# Total code points: 4581 # ================================================ @@ -4114,6 +4112,8 @@ A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH S A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA @@ -4132,7 +4132,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1461 +# Total code points: 1463 # ================================================ @@ -4752,6 +4752,8 @@ A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHO A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE A7CF ; Changes_When_Uppercased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA @@ -4773,7 +4775,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1553 +# Total code points: 1555 # ================================================ @@ -5392,6 +5394,8 @@ A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHO A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE A7CF ; Changes_When_Titlecased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA @@ -5413,7 +5417,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1480 +# Total code points: 1482 # ================================================ @@ -6029,6 +6033,8 @@ A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH S A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA @@ -6050,7 +6056,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1534 +# Total code points: 1536 # ================================================ @@ -6162,8 +6168,7 @@ A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I -A7B0..A7D1 ; Changes_When_Casemapped # L& [34] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER CLOSED INSULAR G -A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7B0..A7DC ; Changes_When_Casemapped # L& [45] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -6190,7 +6195,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2983 +# Total code points: 2987 # ================================================ @@ -6567,9 +6572,7 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; ID_Start # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; ID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -6967,7 +6970,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141275 +# Total code points: 141277 # ================================================ @@ -7653,9 +7656,7 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; ID_Continue # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; ID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -8376,7 +8377,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144576 +# Total code points: 144578 # ================================================ @@ -8749,9 +8750,7 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; XID_Start # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; XID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -9154,7 +9153,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141252 +# Total code points: 141254 # ================================================ @@ -9835,9 +9834,7 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; XID_Continue # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; XID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -10564,7 +10561,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144557 +# Total code points: 144559 # ================================================ @@ -11985,9 +11982,7 @@ A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; Grapheme_Base # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Grapheme_Base # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Grapheme_Base # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; Grapheme_Base # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Grapheme_Base # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -12819,7 +12814,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152737 +# Total code points: 152739 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 4a4b36243..7ee1f72a2 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-17.0.0.txt -# Date: 2024-11-13, 21:48:59 GMT +# Date: 2024-11-13, 22:03:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -5450,6 +5450,8 @@ A7CB ; NFKC_CF; 0264 # L& LATIN CAPITAL LETTER RAMS HOR A7CC ; NFKC_CF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; NFKC_CF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_CF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; NFKC_CF; A7D3 # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; NFKC_CF; A7D5 # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; NFKC_CF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_CF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_CF; A7DB # L& LATIN CAPITAL LETTER LAMBDA @@ -9149,7 +9151,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] .... -# Total code points: 10555 +# Total code points: 10557 # ================================================ @@ -11585,6 +11587,8 @@ A7CB ; NFKC_SCF; 0264 # L& LATIN CAPITAL LETTER RAMS HO A7CC ; NFKC_SCF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; NFKC_SCF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_SCF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; NFKC_SCF; A7D3 # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; NFKC_SCF; A7D5 # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_SCF; A7DB # L& LATIN CAPITAL LETTER LAMBDA @@ -15284,7 +15288,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10517 +# Total code points: 10519 # ================================================ @@ -16056,6 +16060,8 @@ A7C9 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S W A7CB..A7CC ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA @@ -16303,6 +16309,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10555 +# Total code points: 10557 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 525f3ed34..4a39b0c77 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:49:00 GMT +# Date: 2024-11-13, 22:03:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1549,9 +1549,7 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; N # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 237a89163..dd2c46881 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 21:49:01 GMT +# Date: 2024-11-13, 22:03:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1576,9 +1576,7 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; AL # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index e98eca2e3..e199ef2b4 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 21:49:20 GMT +# Date: 2024-11-13, 22:04:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -675,9 +675,7 @@ A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; Latin # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Latin # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; Latin # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -700,7 +698,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1489 +# Total code points: 1491 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index aae6db78a..66dc3c27d 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -14312,8 +14312,10 @@ A7CE;LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE;Lu;0;L;;;;;N;;;;A7CF; A7CF;LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;;;A7CE;;A7CE A7D0;LATIN CAPITAL LETTER CLOSED INSULAR G;Lu;0;L;;;;;N;;;;A7D1; A7D1;LATIN SMALL LETTER CLOSED INSULAR G;Ll;0;L;;;;;N;;;A7D0;;A7D0 -A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;;; -A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;;; +A7D2;LATIN CAPITAL LETTER DOUBLE THORN;Lu;0;L;;;;;N;;;;A7D3; +A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;A7D2;;A7D2 +A7D4;LATIN CAPITAL LETTER DOUBLE WYNN;Lu;0;L;;;;;N;;;;A7D5; +A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;A7D4;;A7D4 A7D6;LATIN CAPITAL LETTER MIDDLE SCOTS S;Lu;0;L;;;;;N;;;;A7D7; A7D7;LATIN SMALL LETTER MIDDLE SCOTS S;Ll;0;L;;;;;N;;;A7D6;;A7D6 A7D8;LATIN CAPITAL LETTER SIGMOID S;Lu;0;L;;;;;N;;;;A7D9; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 3ee1e2b95..d6167b1f8 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 21:49:22 GMT +# Date: 2024-11-13, 22:04:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1358,9 +1358,7 @@ A788 ; R # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; R # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; R # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; R # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; R # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; R # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; R # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; R # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; R # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 34646f2ae..76b500a96 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:49:20 GMT +# Date: 2024-11-13, 22:04:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1933,6 +1933,8 @@ A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLA A7CB..A7CC ; Upper # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Upper # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Upper # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Upper # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Upper # L& LATIN CAPITAL LETTER LAMBDA @@ -1985,7 +1987,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1964 +# Total code points: 1966 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index bb5a423f0..211ba68df 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:49:22 GMT +# Date: 2024-11-13, 22:04:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1004,9 +1004,7 @@ A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; ALetter # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ALetter # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; ALetter # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1356,7 +1354,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33797 +# Total code points: 33799 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 4244cc093..97d173eb6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 21:48:54 GMT +# Date: 2024-11-13, 22:03:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -663,9 +663,7 @@ A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; L # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; L # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1214,7 +1212,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815316 code points not listed here. +# The above property value applies to 815314 code points not listed here. # Total code points: 1095483 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index f18e1c9e2..dc20c62a1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 21:48:55 GMT +# Date: 2024-11-13, 22:03:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1093,9 +1093,7 @@ A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; 0 # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; 0 # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; 0 # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; 0 # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; 0 # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; 0 # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -2059,7 +2057,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821545 code points not listed here. +# The above property value applies to 821543 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 12a6af6ef..3e5013b07 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 21:48:57 GMT +# Date: 2024-11-13, 22:03:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1183,9 +1183,7 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; N # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -2103,7 +2101,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761063 code points not listed here. +# The above property value applies to 761061 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 78edfffc3..5c4d67c71 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 21:48:58 GMT +# Date: 2024-11-13, 22:03:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -293,8 +293,6 @@ A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7D2 ; Cn # -A7D4 ; Cn # A7DD..A7F1 ; Cn # [21] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. @@ -745,7 +743,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819497 +# Total code points: 819495 # ================================================ @@ -1355,6 +1353,8 @@ A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CE ; Lu # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Lu # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Lu # LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA @@ -1404,7 +1404,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1859 +# Total code points: 1861 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 683cf66b0..fa995911f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 21:48:59 GMT +# Date: 2024-11-13, 22:03:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757617 code points not listed here. -# Total code points: 895085 +# The above property value applies to 757615 code points not listed here. +# Total code points: 895083 # ================================================ @@ -1141,9 +1141,7 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7D1 ; AL # L& [66] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1613,7 +1611,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26686 +# Total code points: 26688 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8e4a91993..191c963dd 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 21:48:59 GMT +# Date: 2024-11-13, 22:03:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -14285,7 +14285,9 @@ A7CE ; LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7CF ; LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; LATIN CAPITAL LETTER CLOSED INSULAR G A7D1 ; LATIN SMALL LETTER CLOSED INSULAR G +A7D2 ; LATIN CAPITAL LETTER DOUBLE THORN A7D3 ; LATIN SMALL LETTER DOUBLE THORN +A7D4 ; LATIN CAPITAL LETTER DOUBLE WYNN A7D5 ; LATIN SMALL LETTER DOUBLE WYNN A7D6 ; LATIN CAPITAL LETTER MIDDLE SCOTS S A7D7 ; LATIN SMALL LETTER MIDDLE SCOTS S @@ -45403,6 +45405,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155034 +# Total code points: 155036 # EOF From ce13c8b5c1ddd91036c8a95259dfdf8dbdf0892b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 23:43:53 +0100 Subject: [PATCH 14/38] Modifier capital S (#743) * UnicodeData.txt from the proposal * lb=AL most likely * Looks Latin to me * Diacritic and Lowercase I think * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 38 +++++++++---------- .../ucd/dev/DerivedNormalizationProps.txt | 23 ++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +- unicodetools/data/ucd/dev/LineBreak.txt | 4 +- .../data/ucd/dev/NormalizationTest.txt | 3 +- unicodetools/data/ucd/dev/PropList.txt | 9 +++-- unicodetools/data/ucd/dev/Scripts.txt | 6 +-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 6 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 +-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 +-- .../dev/extracted/DerivedCombiningClass.txt | 6 +-- .../extracted/DerivedDecompositionType.txt | 8 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 6 +-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++--- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++- 19 files changed, 84 insertions(+), 76 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 9d9cd0412..c4be3a7a0 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 22:03:17 GMT +# Date: 2024-11-13, 22:18:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2076,7 +2076,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN +A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S -# Total code points: 38 +# Total code points: 39 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1f5ee8261..0893db2d0 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 22:03:35 GMT +# Date: 2024-11-13, 22:18:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -835,7 +835,7 @@ A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; Alphabetic # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Alphabetic # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1439,7 +1439,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142767 +# Total code points: 142768 # ================================================ @@ -2078,7 +2078,7 @@ A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2138,7 +2138,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2570 # ================================================ @@ -2914,7 +2914,7 @@ A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A790..A7DC ; Cased # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Cased # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2987,7 +2987,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4581 +# Total code points: 4582 # ================================================ @@ -3274,7 +3274,7 @@ A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE.. A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN -A7F2..A7F4 ; Case_Ignorable # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Case_Ignorable # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA @@ -3505,7 +3505,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2778 +# Total code points: 2779 # ================================================ @@ -6573,7 +6573,7 @@ A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; ID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; ID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -6970,7 +6970,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141277 +# Total code points: 141278 # ================================================ @@ -7657,7 +7657,7 @@ A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; ID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; ID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -8377,7 +8377,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144578 +# Total code points: 144579 # ================================================ @@ -8751,7 +8751,7 @@ A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; XID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; XID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -9153,7 +9153,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141254 +# Total code points: 141255 # ================================================ @@ -9835,7 +9835,7 @@ A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; XID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; XID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -10561,7 +10561,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144559 +# Total code points: 144560 # ================================================ @@ -11983,7 +11983,7 @@ A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; Grapheme_Base # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Grapheme_Base # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Grapheme_Base # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Grapheme_Base # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -12814,7 +12814,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152739 +# Total code points: 152740 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 7ee1f72a2..743f1cdcb 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-17.0.0.txt -# Date: 2024-11-13, 22:03:39 GMT +# Date: 2024-11-13, 22:18:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -132,6 +132,7 @@ 33DD ; FC_NFKC; 0077 0062 # So SQUARE WB 33DE ; FC_NFKC; 0076 2215 006D # So SQUARE V OVER M 33DF ; FC_NFKC; 0061 2215 006D # So SQUARE A OVER M +A7F1 ; FC_NFKC; 0073 # Lm MODIFIER LETTER CAPITAL S A7F2 ; FC_NFKC; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; FC_NFKC; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; FC_NFKC; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -679,7 +680,7 @@ A7F8 ; FC_NFKC; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE 1F16C ; FC_NFKC; 006D 0072 # So RAISED MR SIGN 1F190 ; FC_NFKC; 0064 006A # So SQUARE DJ -# Total code points: 663 +# Total code points: 664 # ================================================ @@ -1515,7 +1516,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 32C0..33FF ; NFKD_QC; N # So [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL A69C..A69D ; NFKD_QC; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; NFKD_QC; N # Lm MODIFIER LETTER US -A7F2..A7F4 ; NFKD_QC; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; NFKD_QC; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; NFKD_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; NFKD_QC; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; NFKD_QC; N # Lm MODIFIER LETTER SMALL TURNED W @@ -1753,7 +1754,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17085 +# Total code points: 17086 # ================================================ @@ -1946,7 +1947,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 32C0..33FF ; NFKC_QC; N # So [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL A69C..A69D ; NFKC_QC; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; NFKC_QC; N # Lm MODIFIER LETTER US -A7F2..A7F4 ; NFKC_QC; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; NFKC_QC; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; NFKC_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; NFKC_QC; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; NFKC_QC; N # Lm MODIFIER LETTER SMALL TURNED W @@ -2164,7 +2165,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4964 +# Total code points: 4965 # ================================================ @@ -5456,6 +5457,7 @@ A7D6 ; NFKC_CF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE S A7D8 ; NFKC_CF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_CF; A7DB # L& LATIN CAPITAL LETTER LAMBDA A7DC ; NFKC_CF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; NFKC_CF; 0073 # Lm MODIFIER LETTER CAPITAL S A7F2 ; NFKC_CF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_CF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_CF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -9151,7 +9153,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] .... -# Total code points: 10557 +# Total code points: 10558 # ================================================ @@ -11593,6 +11595,7 @@ A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_SCF; A7DB # L& LATIN CAPITAL LETTER LAMBDA A7DC ; NFKC_SCF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; NFKC_SCF; 0073 # Lm MODIFIER LETTER CAPITAL S A7F2 ; NFKC_SCF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_SCF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_SCF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -15288,7 +15291,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10519 +# Total code points: 10520 # ================================================ @@ -16066,7 +16069,7 @@ A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MID A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK @@ -16309,6 +16312,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10557 +# Total code points: 10558 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 4a39b0c77..8fe7aeb60 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:03:40 GMT +# Date: 2024-11-13, 22:18:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1550,7 +1550,7 @@ A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQ A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index dd2c46881..54860f25a 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 22:03:41 GMT +# Date: 2024-11-13, 22:18:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1577,7 +1577,7 @@ A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQ A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 768ef03d4..dd98635df 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-17.0.0.txt -# Date: 2024-11-13, 21:22:57 GMT +# Date: 2024-11-13, 22:18:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2429,6 +2429,7 @@ FEFA 0334;FEFA 0334;FEFA 0334;0644 0625 0334;0644 0627 0334 0655; # (ﻺ◌̴; A69C;A69C;A69C;044A;044A; # (ꚜ; ꚜ; ꚜ; ъ; ъ; ) MODIFIER LETTER CYRILLIC HARD SIGN A69D;A69D;A69D;044C;044C; # (ꚝ; ꚝ; ꚝ; ь; ь; ) MODIFIER LETTER CYRILLIC SOFT SIGN A770;A770;A770;A76F;A76F; # (ꝰ; ꝰ; ꝰ; ꝯ; ꝯ; ) MODIFIER LETTER US +A7F1;A7F1;A7F1;0053;0053; # (꟱; ꟱; ꟱; S; S; ) MODIFIER LETTER CAPITAL S A7F2;A7F2;A7F2;0043;0043; # (ꟲ; ꟲ; ꟲ; C; C; ) MODIFIER LETTER CAPITAL C A7F3;A7F3;A7F3;0046;0046; # (ꟳ; ꟳ; ꟳ; F; F; ) MODIFIER LETTER CAPITAL F A7F4;A7F4;A7F4;0051;0051; # (ꟴ; ꟴ; ꟴ; Q; Q; ) MODIFIER LETTER CAPITAL Q diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 1cbd57d2a..7744fcc3e 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-13, 21:35:23 GMT +# Date: 2024-11-13, 22:18:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1036,6 +1036,7 @@ A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F1 ; Diacritic # Lm MODIFIER LETTER CAPITAL S A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA @@ -1152,7 +1153,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1207 +# Total code points: 1208 # ================================================ @@ -1222,7 +1223,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Other_Lowercase # Lm MODIFIER LETTER US -A7F2..A7F4 ; Other_Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Other_Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W @@ -1232,7 +1233,7 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 312 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index e199ef2b4..13a5ce001 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 22:04:03 GMT +# Date: 2024-11-13, 22:19:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -676,7 +676,7 @@ A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSU A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; Latin # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Latin # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -698,7 +698,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1491 +# Total code points: 1492 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 66dc3c27d..f366c3da2 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -14323,6 +14323,7 @@ A7D9;LATIN SMALL LETTER SIGMOID S;Ll;0;L;;;;;N;;;A7D8;;A7D8 A7DA;LATIN CAPITAL LETTER LAMBDA;Lu;0;L;;;;;N;;;;A7DB; A7DB;LATIN SMALL LETTER LAMBDA;Ll;0;L;;;;;N;;;A7DA;;A7DA A7DC;LATIN CAPITAL LETTER LAMBDA WITH STROKE;Lu;0;L;;;;;N;;;;019B; +A7F1;MODIFIER LETTER CAPITAL S;Lm;0;L; 0053;;;;N;;;;; A7F2;MODIFIER LETTER CAPITAL C;Lm;0;L; 0043;;;;N;;;;; A7F3;MODIFIER LETTER CAPITAL F;Lm;0;L; 0046;;;;N;;;;; A7F4;MODIFIER LETTER CAPITAL Q;Lm;0;L; 0051;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index d6167b1f8..9e6eec88a 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 22:04:05 GMT +# Date: 2024-11-13, 22:19:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1359,7 +1359,7 @@ A789..A78A ; R # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQ A78B..A78E ; R # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; R # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; R # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; R # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; R # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; R # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 76b500a96..44f556893 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 22:04:04 GMT +# Date: 2024-11-13, 22:19:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1262,7 +1262,7 @@ A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lower # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lower # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lower # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lower # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lower # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -1322,7 +1322,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2523 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 211ba68df..6ddc428da 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 22:04:06 GMT +# Date: 2024-11-13, 22:19:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1005,7 +1005,7 @@ A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; ALetter # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; ALetter # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1354,7 +1354,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33799 +# Total code points: 33800 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 97d173eb6..07e2f9666 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 22:03:33 GMT +# Date: 2024-11-13, 22:18:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -664,7 +664,7 @@ A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; L # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; L # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1212,7 +1212,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815314 code points not listed here. +# The above property value applies to 815313 code points not listed here. # Total code points: 1095483 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index dc20c62a1..08ede4e2b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 22:03:35 GMT +# Date: 2024-11-13, 22:18:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1094,7 +1094,7 @@ A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; 0 # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; 0 # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; 0 # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; 0 # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; 0 # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2057,7 +2057,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821543 code points not listed here. +# The above property value applies to 821542 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac..1b8d1ae99 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedDecompositionType-17.0.0.txt +# Date: 2024-11-13, 22:18:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -873,7 +873,7 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 3196..319F ; Super # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK A69C..A69D ; Super # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Super # Lm MODIFIER LETTER US -A7F2..A7F4 ; Super # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Super # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Super # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Super # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W @@ -884,7 +884,7 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 250 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 3e5013b07..0ff7edb44 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:03:37 GMT +# Date: 2024-11-13, 22:18:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1184,7 +1184,7 @@ A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2101,7 +2101,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761061 code points not listed here. +# The above property value applies to 761060 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 5c4d67c71..3b9741ec5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 22:03:37 GMT +# Date: 2024-11-13, 22:18:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -293,7 +293,7 @@ A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7DD..A7F1 ; Cn # [21] .. +A7DD..A7F0 ; Cn # [20] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. @@ -743,7 +743,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819495 +# Total code points: 819494 # ================================================ @@ -2143,7 +2143,7 @@ A69C..A69D ; Lm # [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER C A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A770 ; Lm # MODIFIER LETTER US A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A7F2..A7F4 ; Lm # [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lm # [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A9CF ; Lm # JAVANESE PANGRANGKEP A9E6 ; Lm # MYANMAR MODIFIER LETTER SHAN REDUPLICATION @@ -2173,7 +2173,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 405 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index fa995911f..59a9dd70f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 22:03:38 GMT +# Date: 2024-11-13, 22:18:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757615 code points not listed here. -# Total code points: 895083 +# The above property value applies to 757614 code points not listed here. +# Total code points: 895082 # ================================================ @@ -1142,7 +1142,7 @@ A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUAL A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1611,7 +1611,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26688 +# Total code points: 26689 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 191c963dd..c79c68ffc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 22:03:38 GMT +# Date: 2024-11-13, 22:18:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -14296,6 +14296,7 @@ A7D9 ; LATIN SMALL LETTER SIGMOID S A7DA ; LATIN CAPITAL LETTER LAMBDA A7DB ; LATIN SMALL LETTER LAMBDA A7DC ; LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; MODIFIER LETTER CAPITAL S A7F2 ; MODIFIER LETTER CAPITAL C A7F3 ; MODIFIER LETTER CAPITAL F A7F4 ; MODIFIER LETTER CAPITAL Q @@ -45405,6 +45406,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155036 +# Total code points: 155037 # EOF From 399ba7f8e9b0900898167e26c9ee075beb666c18 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 00:04:04 +0100 Subject: [PATCH 15/38] Twenty-five more Arabic honorifics (#705) * UnicodeData.txt lines from Roozbeh * lb=AL like the other honorifics * Scripts.txt * Regenerate UCD * Fix spelling for unicode-org/sah#484 * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 7 +++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 +++- unicodetools/data/ucd/dev/LineBreak.txt | 5 +++- unicodetools/data/ucd/dev/Scripts.txt | 7 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 25 ++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 5 +++- .../ucd/dev/extracted/DerivedBidiClass.txt | 11 ++++--- .../dev/extracted/DerivedCombiningClass.txt | 7 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 7 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 15 ++++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 11 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 29 +++++++++++++++++-- 13 files changed, 112 insertions(+), 29 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index c4be3a7a0..a072b4d64 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 22:18:26 GMT +# Date: 2024-11-13, 22:47:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2077,7 +2077,10 @@ A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..L A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S +FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH +FD91 ; 17.0 # ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -# Total code points: 39 +# Total code points: 64 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 0893db2d0..425b65947 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 22:18:44 GMT +# Date: 2024-11-13, 22:47:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12099,11 +12099,13 @@ FB40..FB41 ; Grapheme_Base # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW L FB43..FB44 ; Grapheme_Base # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; Grapheme_Base # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; Grapheme_Base # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; Grapheme_Base # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; Grapheme_Base # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; Grapheme_Base # Pe ORNATE LEFT PARENTHESIS FD3F ; Grapheme_Base # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; Grapheme_Base # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Grapheme_Base # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; Grapheme_Base # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Grapheme_Base # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; Grapheme_Base # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Grapheme_Base # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -12332,6 +12334,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; Grapheme_Base # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -12814,7 +12817,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152740 +# Total code points: 152765 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 8fe7aeb60..345bcf401 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:18:48 GMT +# Date: 2024-11-13, 22:47:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1707,11 +1707,13 @@ FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; N # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -1962,6 +1964,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 54860f25a..4d5561bb5 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 22:18:49 GMT +# Date: 2024-11-13, 22:47:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2536,11 +2536,13 @@ FB43..FB44 ; HL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; HL # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; CL # Pe ORNATE LEFT PARENTHESIS FD3F ; OP # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; AL # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -2819,6 +2821,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 13a5ce001..e27f7901f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 22:19:09 GMT +# Date: 2024-11-13, 22:47:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -874,9 +874,11 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; Arabic # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; Arabic # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -886,6 +888,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -922,7 +925,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1374 +# Total code points: 1399 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f366c3da2..29743503b 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -15964,6 +15964,22 @@ FBBF;ARABIC SYMBOL RING;Sk;0;AL;;;;;N;;;;; FBC0;ARABIC SYMBOL SMALL TAH ABOVE;Sk;0;AL;;;;;N;;;;; FBC1;ARABIC SYMBOL SMALL TAH BELOW;Sk;0;AL;;;;;N;;;;; FBC2;ARABIC SYMBOL WASLA ABOVE;Sk;0;AL;;;;;N;;;;; +FBC3;ARABIC LIGATURE JALLA WA-ALAA;So;0;ON;;;;;N;;;;; +FBC4;ARABIC LIGATURE DAAMAT BARAKAATUHUM;So;0;ON;;;;;N;;;;; +FBC5;ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH;So;0;ON;;;;;N;;;;; +FBC6;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM;So;0;ON;;;;;N;;;;; +FBC7;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA;So;0;ON;;;;;N;;;;; +FBC8;ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBC9;ARABIC LIGATURE RAHIMAHUMAA ALLAAH;So;0;ON;;;;;N;;;;; +FBCA;ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCB;ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM;So;0;ON;;;;;N;;;;; +FBCC;ARABIC LIGATURE HAFIZAHU ALLAAH;So;0;ON;;;;;N;;;;; +FBCD;ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCE;ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCF;ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBD0;ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM;So;0;ON;;;;;N;;;;; +FBD1;ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FBD2;ARABIC LIGATURE ALAYHI AR-RAHMAH;So;0;ON;;;;;N;;;;; FBD3;ARABIC LETTER NG ISOLATED FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD4;ARABIC LETTER NG FINAL FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD5;ARABIC LETTER NG INITIAL FORM;Lo;0;AL; 06AD;;;;N;;;;; @@ -16409,6 +16425,7 @@ FD8C;ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM;Lo;0;AL; 0645 FD8D;ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062C 0645;;;;N;;;;; FD8E;ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM;Lo;0;AL; 0645 062E 062C;;;;N;;;;; FD8F;ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062E 0645;;;;N;;;;; +FD91;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA;So;0;ON;;;;;N;;;;; FD92;ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM;Lo;0;AL; 0645 062C 062E;;;;N;;;;; FD93;ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM;Lo;0;AL; 0647 0645 062C;;;;N;;;;; FD94;ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0647 0645 0645;;;;N;;;;; @@ -19580,6 +19597,14 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED4;ARABIC LIGATURE QADDASA ALLAAHU SIRRAH;So;0;ON;;;;;N;;;;; +10ED5;ARABIC LIGATURE QUDDISA SIRRAHUM;So;0;ON;;;;;N;;;;; +10ED6;ARABIC LIGATURE QUDDISA SIRRAHUMAA;So;0;ON;;;;;N;;;;; +10ED7;ARABIC LIGATURE QUDDISAT ASRAARUHUM;So;0;ON;;;;;N;;;;; +10ED8;ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH;So;0;ON;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 9e6eec88a..adae42e8b 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 22:19:12 GMT +# Date: 2024-11-13, 22:47:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1520,11 +1520,13 @@ FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; R # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; R # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; R # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; R # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; R # Pe ORNATE LEFT PARENTHESIS FD3F ; R # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; R # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; R # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; R # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; R # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; R # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; R # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -1783,6 +1785,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 07e2f9666..dfd8e2821 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 22:18:42 GMT +# Date: 2024-11-13, 22:47:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1843,9 +1843,11 @@ A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FBC3..FBD2 ; ON # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD3E ; ON # Pe ORNATE LEFT PARENTHESIS FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD91 ; ON # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK @@ -1933,6 +1935,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; ON # Pd GARAY HYPHEN +10ED1..10ED8 ; ON # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -1994,7 +1997,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6752 +# Total code points: 6777 # ================================================ @@ -2499,8 +2502,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 292 code points not listed here. -# Total code points: 1767 +# The above property value applies to 267 code points not listed here. +# Total code points: 1742 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 08ede4e2b..dd9824c6d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 22:18:43 GMT +# Date: 2024-11-13, 22:47:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1231,11 +1231,13 @@ FB40..FB41 ; 0 # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; 0 # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; 0 # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; 0 # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; 0 # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; 0 # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; 0 # Pe ORNATE LEFT PARENTHESIS FD3F ; 0 # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; 0 # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; 0 # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; 0 # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; 0 # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; 0 # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; 0 # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -1472,6 +1474,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2057,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821542 code points not listed here. +# The above property value applies to 821517 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 0ff7edb44..a53658c73 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:18:45 GMT +# Date: 2024-11-13, 22:47:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1328,11 +1328,13 @@ FB40..FB41 ; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; N # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; N # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -1484,6 +1486,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2101,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761060 code points not listed here. +# The above property value applies to 761035 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 3b9741ec5..e65d152ca 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 22:18:45 GMT +# Date: 2024-11-13, 22:47:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -329,8 +329,7 @@ FB3D ; Cn # FB3F ; Cn # FB42 ; Cn # FB45 ; Cn # -FBC3..FBD2 ; Cn # [16] .. -FD90..FD91 ; Cn # [2] .. +FD90 ; Cn # FDC8..FDCE ; Cn # [7] .. FDD0..FDEF ; Cn # [32] .. FE1A..FE1F ; Cn # [6] .. @@ -435,7 +434,8 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10EFB ; Cn # [55] .. +10EC5..10ED0 ; Cn # [12] .. +10ED9..10EFB ; Cn # [35] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -743,7 +743,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819494 +# Total code points: 819469 # ================================================ @@ -4204,7 +4204,9 @@ A828..A82B ; So # [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK- A836..A837 ; So # [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK A839 ; So # NORTH INDIC QUANTITY MARK AA77..AA79 ; So # [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +FBC3..FBD2 ; So # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD40..FD4F ; So # [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD91 ; So # ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDCF ; So # ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; So # [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FFE4 ; So # FULLWIDTH BROKEN BAR @@ -4219,6 +4221,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND 10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON 10AC8 ; So # MANICHAEAN SIGN UD +10ED1..10ED8 ; So # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 1173F ; So # AHOM SYMBOL VI 11FD5..11FDC ; So # [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI 11FE1..11FF1 ; So # [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA @@ -4284,7 +4287,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7377 +# Total code points: 7402 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 59a9dd70f..c2824ccc7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 22:18:46 GMT +# Date: 2024-11-13, 22:47:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757614 code points not listed here. -# Total code points: 895082 +# The above property value applies to 757589 code points not listed here. +# Total code points: 895057 # ================================================ @@ -1185,9 +1185,11 @@ FB13..FB17 ; AL # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LI FB29 ; AL # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; AL # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU @@ -1313,6 +1315,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1611,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26689 +# Total code points: 26714 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index c79c68ffc..d31ed38a4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 22:18:47 GMT +# Date: 2024-11-13, 22:47:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -26629,6 +26629,22 @@ FBBF ; ARABIC SYMBOL RING FBC0 ; ARABIC SYMBOL SMALL TAH ABOVE FBC1 ; ARABIC SYMBOL SMALL TAH BELOW FBC2 ; ARABIC SYMBOL WASLA ABOVE +FBC3 ; ARABIC LIGATURE JALLA WA-ALAA +FBC4 ; ARABIC LIGATURE DAAMAT BARAKAATUHUM +FBC5 ; ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH +FBC6 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM +FBC7 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA +FBC8 ; ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA +FBC9 ; ARABIC LIGATURE RAHIMAHUMAA ALLAAH +FBCA ; ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA +FBCB ; ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM +FBCC ; ARABIC LIGATURE HAFIZAHU ALLAAH +FBCD ; ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA +FBCE ; ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA +FBCF ; ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA +FBD0 ; ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM +FBD1 ; ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF +FBD2 ; ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3 ; ARABIC LETTER NG ISOLATED FORM FBD4 ; ARABIC LETTER NG FINAL FORM FBD5 ; ARABIC LETTER NG INITIAL FORM @@ -27074,6 +27090,7 @@ FD8C ; ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM FD8D ; ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM FD8E ; ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM FD8F ; ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD91 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92 ; ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM FD93 ; ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM FD94 ; ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM @@ -30245,6 +30262,14 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM +10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM +10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM +10ED4 ; ARABIC LIGATURE QADDASA ALLAAHU SIRRAH +10ED5 ; ARABIC LIGATURE QUDDISA SIRRAHUM +10ED6 ; ARABIC LIGATURE QUDDISA SIRRAHUMAA +10ED7 ; ARABIC LIGATURE QUDDISAT ASRAARUHUM +10ED8 ; ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -45406,6 +45431,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155037 +# Total code points: 155062 # EOF From 5e7b13af05493541795c1158cd07e7fec4a91787 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 15:36:03 +0100 Subject: [PATCH 16/38] Eight more Arabic honorifics (#662) * UnicodeData.txt lines from L2/24-002 * lb=AL like U+FDFD per L2/24-002 * Arabic * Regenerate UCD * Fix spelling for unicode-org/sah#484 * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 ++++--- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 8 ++++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 +++--- unicodetools/data/ucd/dev/LineBreak.txt | 6 +++--- unicodetools/data/ucd/dev/Scripts.txt | 8 ++++---- unicodetools/data/ucd/dev/UnicodeData.txt | 8 ++++++++ unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 12 ++++++------ .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 8 ++++---- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 8 ++++---- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 12 +++++------- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 12 ++++++------ unicodetools/data/ucd/dev/extracted/DerivedName.txt | 12 ++++++++++-- 13 files changed, 64 insertions(+), 49 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index a072b4d64..b69576b8f 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 22:47:02 GMT +# Date: 2024-11-14, 12:16:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2078,9 +2078,10 @@ A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH -FD91 ; 17.0 # ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH 10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -# Total code points: 64 +# Total code points: 72 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 425b65947..ddd0bed1b 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 22:47:20 GMT +# Date: 2024-11-14, 12:16:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12105,9 +12105,9 @@ FD3E ; Grapheme_Base # Pe ORNATE LEFT PARENTHESIS FD3F ; Grapheme_Base # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; Grapheme_Base # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Grapheme_Base # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; Grapheme_Base # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; Grapheme_Base # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Grapheme_Base # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; Grapheme_Base # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; Grapheme_Base # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Grapheme_Base # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; Grapheme_Base # Sc RIAL SIGN FDFD..FDFF ; Grapheme_Base # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -12817,7 +12817,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152765 +# Total code points: 152773 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 345bcf401..962986fa9 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:47:24 GMT +# Date: 2024-11-14, 12:16:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1713,9 +1713,9 @@ FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; N # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 4d5561bb5..4f0edadf4 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 22:47:25 GMT +# Date: 2024-11-14, 12:16:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2542,9 +2542,9 @@ FD3E ; CL # Pe ORNATE LEFT PARENTHESIS FD3F ; OP # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; AL # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; PO # Sc RIAL SIGN FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index e27f7901f..d7cf7e07c 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 22:47:45 GMT +# Date: 2024-11-14, 12:16:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -878,9 +878,9 @@ FBC3..FBD2 ; Arabic # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; Arabic # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; Arabic # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; Arabic # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; Arabic # Sc RIAL SIGN FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -925,7 +925,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1399 +# Total code points: 1407 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 29743503b..ec9c0e0f3 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -16425,6 +16425,7 @@ FD8C;ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM;Lo;0;AL; 0645 FD8D;ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062C 0645;;;;N;;;;; FD8E;ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM;Lo;0;AL; 0645 062E 062C;;;;N;;;;; FD8F;ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062E 0645;;;;N;;;;; +FD90;ARABIC LIGATURE RAHMATU ALLAAHI ALAYH;So;0;ON;;;;;N;;;;; FD91;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA;So;0;ON;;;;;N;;;;; FD92;ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM;Lo;0;AL; 0645 062C 062E;;;;N;;;;; FD93;ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM;Lo;0;AL; 0647 0645 062C;;;;N;;;;; @@ -16480,6 +16481,13 @@ FDC4;ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0639 FDC5;ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0635 0645 0645;;;;N;;;;; FDC6;ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM;Lo;0;AL; 0633 062E 064A;;;;N;;;;; FDC7;ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM;Lo;0;AL; 0646 062C 064A;;;;N;;;;; +FDC8;ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA;So;0;ON;;;;;N;;;;; +FDC9;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH;So;0;ON;;;;;N;;;;; +FDCA;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA;So;0;ON;;;;;N;;;;; +FDCB;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA;So;0;ON;;;;;N;;;;; +FDCC;ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM;So;0;ON;;;;;N;;;;; +FDCD;ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FDCE;ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH;So;0;ON;;;;;N;;;;; FDCF;ARABIC LIGATURE SALAAMUHU ALAYNAA;So;0;ON;;;;;N;;;;; FDF0;ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0635 0644 06D2;;;;N;;;;; FDF1;ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0642 0644 06D2;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index adae42e8b..b4dc08030 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 22:47:47 GMT +# Date: 2024-11-14, 12:16:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1526,9 +1526,9 @@ FD3E ; R # Pe ORNATE LEFT PARENTHESIS FD3F ; R # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; R # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; R # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; R # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; R # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; R # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; R # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; R # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; R # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; R # Sc RIAL SIGN FDFD..FDFF ; R # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index dfd8e2821..1a3186df2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 22:47:18 GMT +# Date: 2024-11-14, 12:16:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1847,8 +1847,8 @@ FBC3..FBD2 ; ON # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALA FD3E ; ON # Pe ORNATE LEFT PARENTHESIS FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FD91 ; ON # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA -FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; ON # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; ON # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET @@ -1997,7 +1997,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6777 +# Total code points: 6785 # ================================================ @@ -2502,8 +2502,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 267 code points not listed here. -# Total code points: 1742 +# The above property value applies to 259 code points not listed here. +# Total code points: 1734 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index dd9824c6d..b03e18c1c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 22:47:19 GMT +# Date: 2024-11-14, 12:16:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1237,9 +1237,9 @@ FD3E ; 0 # Pe ORNATE LEFT PARENTHESIS FD3F ; 0 # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; 0 # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; 0 # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; 0 # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; 0 # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; 0 # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; 0 # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; 0 # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; 0 # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; 0 # Sc RIAL SIGN FDFD..FDFF ; 0 # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821517 code points not listed here. +# The above property value applies to 821509 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index a53658c73..e8d16e4a9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 22:47:21 GMT +# Date: 2024-11-14, 12:16:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1334,9 +1334,9 @@ FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; N # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -2104,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761035 code points not listed here. +# The above property value applies to 761027 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index e65d152ca..44b621cbf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 22:47:21 GMT +# Date: 2024-11-14, 12:16:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -329,8 +329,6 @@ FB3D ; Cn # FB3F ; Cn # FB42 ; Cn # FB45 ; Cn # -FD90 ; Cn # -FDC8..FDCE ; Cn # [7] .. FDD0..FDEF ; Cn # [32] .. FE1A..FE1F ; Cn # [6] .. FE53 ; Cn # @@ -743,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819469 +# Total code points: 819461 # ================================================ @@ -4206,8 +4204,8 @@ A839 ; So # NORTH INDIC QUANTITY MARK AA77..AA79 ; So # [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO FBC3..FBD2 ; So # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD40..FD4F ; So # [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FD91 ; So # ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA -FDCF ; So # ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; So # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; So # [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; So # [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FFE4 ; So # FULLWIDTH BROKEN BAR FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL @@ -4287,7 +4285,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7402 +# Total code points: 7410 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c2824ccc7..26e8f2b03 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 22:47:22 GMT +# Date: 2024-11-14, 12:16:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757589 code points not listed here. -# Total code points: 895057 +# The above property value applies to 757581 code points not listed here. +# Total code points: 895049 # ================================================ @@ -1189,9 +1189,9 @@ FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALA FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD91 ; AL # So ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM @@ -1614,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26714 +# Total code points: 26722 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index d31ed38a4..8cafde19a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-13, 22:47:22 GMT +# Date: 2024-11-14, 12:16:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -27090,6 +27090,7 @@ FD8C ; ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM FD8D ; ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM FD8E ; ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM FD8F ; ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYH FD91 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92 ; ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM FD93 ; ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM @@ -27145,6 +27146,13 @@ FDC4 ; ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM FDC5 ; ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM FDC6 ; ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM FDC7 ; ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDC8 ; ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA +FDC9 ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH +FDCA ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA +FDCB ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA +FDCC ; ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM +FDCD ; ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF +FDCE ; ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH FDCF ; ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0 ; ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM FDF1 ; ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM @@ -45431,6 +45439,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155062 +# Total code points: 155070 # EOF From 5a3961037384a5bf4945073afe8e6a2eeb72aa96 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 15:51:23 +0100 Subject: [PATCH 17/38] Sidetic (#417) * Java changes for new script * Generated changes from the addition of Sidetic * input file changes for the Sidetic block * Update generated files * Revert .project * comments * GenerateEnums * GenerateEnums * 0 padding * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 5 +-- .../data/ucd/dev/DerivedCoreProperties.txt | 20 +++++++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 +- unicodetools/data/ucd/dev/LineBreak.txt | 3 +- .../data/ucd/dev/PropertyValueAliases.txt | 4 ++- .../data/ucd/dev/ScriptExtensions.txt | 4 +-- unicodetools/data/ucd/dev/Scripts.txt | 8 ++++- unicodetools/data/ucd/dev/UnicodeData.txt | 29 ++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 3 +- .../dev/auxiliary/SentenceBreakProperty.txt | 5 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++-- .../dev/extracted/DerivedCombiningClass.txt | 5 +-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 +-- .../dev/extracted/DerivedGeneralCategory.txt | 9 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 ++--- .../data/ucd/dev/extracted/DerivedName.txt | 33 +++++++++++++++++-- .../org/unicode/props/UcdPropertyValues.java | 2 ++ .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 20 files changed, 124 insertions(+), 36 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 1517dde0c..be3e10ce3 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -228,6 +228,7 @@ FFF0..FFFF; Specials 108E0..108FF; Hatran 10900..1091F; Phoenician 10920..1093F; Lydian +10940..1095C; Sidetic 10980..1099F; Meroitic Hieroglyphs 109A0..109FF; Meroitic Cursive 10A00..10A5F; Kharoshthi diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b69576b8f..bd835436c 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 12:16:05 GMT +# Date: 2024-11-14, 14:37:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2080,8 +2080,9 @@ A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH +10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -# Total code points: 72 +# Total code points: 101 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index ddd0bed1b..a301fb3bd 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 12:16:23 GMT +# Date: 2024-11-14, 14:37:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1018,6 +1018,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 108F4..108F5 ; Alphabetic # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Alphabetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A @@ -1439,7 +1440,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142768 +# Total code points: 142797 # ================================================ @@ -6710,6 +6711,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A @@ -6970,7 +6972,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141278 +# Total code points: 141307 # ================================================ @@ -7864,6 +7866,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 108F4..108F5 ; ID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A @@ -8377,7 +8380,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144579 +# Total code points: 144608 # ================================================ @@ -8893,6 +8896,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A @@ -9153,7 +9157,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141255 +# Total code points: 141284 # ================================================ @@ -10048,6 +10052,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A @@ -10561,7 +10566,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144560 +# Total code points: 144589 # ================================================ @@ -12284,6 +12289,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1091F ; Grapheme_Base # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; Grapheme_Base # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; Grapheme_Base # Po LYDIAN TRIANGULAR MARK +10940..1095C ; Grapheme_Base # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Grapheme_Base # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; Grapheme_Base # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; Grapheme_Base # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -12817,7 +12823,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152773 +# Total code points: 152802 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 962986fa9..1775ad934 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 12:16:27 GMT +# Date: 2024-11-14, 14:37:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1904,6 +1904,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 4f0edadf4..eef5add38 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 12:16:28 GMT +# Date: 2024-11-14, 14:37:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2759,6 +2759,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1091F ; BA # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; AL # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; AL # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 4358a787e..7b9734001 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-09-11, 23:38:17 GMT +# Date: 2024-10-16, 17:27:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -423,6 +423,7 @@ blk; Sharada ; Sharada blk; Shavian ; Shavian blk; Shorthand_Format_Controls ; Shorthand_Format_Controls blk; Siddham ; Siddham +blk; Sidetic ; Sidetic blk; Sinhala ; Sinhala blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers blk; Small_Forms ; Small_Form_Variants @@ -1439,6 +1440,7 @@ sc ; Sgnw ; SignWriting sc ; Shaw ; Shavian sc ; Shrd ; Sharada sc ; Sidd ; Siddham +sc ; Sidt ; Sidetic sc ; Sind ; Khudawadi sc ; Sinh ; Sinhala sc ; Sogd ; Sogdian diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 140901a87..27e16cbe4 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ -# ScriptExtensions-16.0.0.txt -# Date: 2024-07-30, 19:38:00 GMT +# ScriptExtensions-17.0.0.txt +# Date: 2024-10-16, 17:28:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index d7cf7e07c..da4dc7bdf 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 12:16:50 GMT +# Date: 2024-11-14, 14:37:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3126,4 +3126,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 80 +# ================================================ + +10940..1095C ; Sidetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 + +# Total code points: 29 + # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ec9c0e0f3..d61a3c99d 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -18772,6 +18772,35 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10938;LYDIAN LETTER NN;Lo;0;R;;;;;N;;;;; 10939;LYDIAN LETTER C;Lo;0;R;;;;;N;;;;; 1093F;LYDIAN TRIANGULAR MARK;Po;0;R;;;;;N;;;;; +10940;SIDETIC LETTER N01;Lo;0;R;;;;;N;;;;; +10941;SIDETIC LETTER N02;Lo;0;R;;;;;N;;;;; +10942;SIDETIC LETTER N03;Lo;0;R;;;;;N;;;;; +10943;SIDETIC LETTER N04;Lo;0;R;;;;;N;;;;; +10944;SIDETIC LETTER N05;Lo;0;R;;;;;N;;;;; +10945;SIDETIC LETTER N06;Lo;0;R;;;;;N;;;;; +10946;SIDETIC LETTER N07;Lo;0;R;;;;;N;;;;; +10947;SIDETIC LETTER N08;Lo;0;R;;;;;N;;;;; +10948;SIDETIC LETTER N09;Lo;0;R;;;;;N;;;;; +10949;SIDETIC LETTER N10;Lo;0;R;;;;;N;;;;; +1094A;SIDETIC LETTER N11;Lo;0;R;;;;;N;;;;; +1094B;SIDETIC LETTER N12;Lo;0;R;;;;;N;;;;; +1094C;SIDETIC LETTER N13;Lo;0;R;;;;;N;;;;; +1094D;SIDETIC LETTER N14;Lo;0;R;;;;;N;;;;; +1094E;SIDETIC LETTER N15;Lo;0;R;;;;;N;;;;; +1094F;SIDETIC LETTER N16;Lo;0;R;;;;;N;;;;; +10950;SIDETIC LETTER N17;Lo;0;R;;;;;N;;;;; +10951;SIDETIC LETTER N18;Lo;0;R;;;;;N;;;;; +10952;SIDETIC LETTER N19;Lo;0;R;;;;;N;;;;; +10953;SIDETIC LETTER N20;Lo;0;R;;;;;N;;;;; +10954;SIDETIC LETTER N21;Lo;0;R;;;;;N;;;;; +10955;SIDETIC LETTER N22;Lo;0;R;;;;;N;;;;; +10956;SIDETIC LETTER N23;Lo;0;R;;;;;N;;;;; +10957;SIDETIC LETTER N24;Lo;0;R;;;;;N;;;;; +10958;SIDETIC LETTER N25;Lo;0;R;;;;;N;;;;; +10959;SIDETIC LETTER N26;Lo;0;R;;;;;N;;;;; +1095A;SIDETIC LETTER N27;Lo;0;R;;;;;N;;;;; +1095B;SIDETIC LETTER N28;Lo;0;R;;;;;N;;;;; +1095C;SIDETIC LETTER N29;Lo;0;R;;;;;N;;;;; 10980;MEROITIC HIEROGLYPHIC LETTER A;Lo;0;R;;;;;N;;;;; 10981;MEROITIC HIEROGLYPHIC LETTER E;Lo;0;R;;;;;N;;;;; 10982;MEROITIC HIEROGLYPHIC LETTER I;Lo;0;R;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index b4dc08030..05cd2171f 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 12:16:52 GMT +# Date: 2024-11-14, 14:37:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1725,6 +1725,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1091F ; R # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; U # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; R # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 44f556893..5e1badd5b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 22:19:10 GMT +# Date: 2024-11-14, 14:37:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2372,6 +2372,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; OLetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; OLetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; OLetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; OLetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; OLetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; OLetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; OLetter # Lo KHAROSHTHI LETTER A @@ -2591,7 +2592,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136913 +# Total code points: 136942 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 6ddc428da..25d826481 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-13, 22:19:12 GMT +# Date: 2024-11-14, 14:37:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1114,6 +1114,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ALetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ALetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ALetter # Lo KHAROSHTHI LETTER A @@ -1354,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33800 +# Total code points: 33829 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 1a3186df2..c87254b23 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 12:16:21 GMT +# Date: 2024-11-14, 14:37:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -67,6 +67,7 @@ # 108E0..108FF Hatran # 10900..1091F Phoenician # 10920..1093F Lydian +# 10940..1095C Sidetic # 10980..1099F Meroitic_Hieroglyphs # 109A0..109FF Meroitic_Cursive # 10A00..10A5F Kharoshthi @@ -1267,6 +1268,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1325,7 +1327,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2087 code points not listed here. +# The above property value applies to 2058 code points not listed here. # Total code points: 3631 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index b03e18c1c..158023481 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 12:16:23 GMT +# Date: 2024-11-14, 14:37:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1420,6 +1420,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1091F ; 0 # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; 0 # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; 0 # Po LYDIAN TRIANGULAR MARK +10940..1095C ; 0 # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; 0 # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; 0 # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; 0 # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -2060,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821509 code points not listed here. +# The above property value applies to 821480 code points not listed here. # Total code points: 1113151 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index e8d16e4a9..60379f00a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 12:16:24 GMT +# Date: 2024-11-14, 14:37:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1427,6 +1427,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; N # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -2104,7 +2105,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761027 code points not listed here. +# The above property value applies to 760998 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 44b621cbf..7bc6babc2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 12:16:25 GMT +# Date: 2024-11-14, 14:37:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -400,7 +400,7 @@ FFFE..FFFF ; Cn # [2] .. 108F6..108FA ; Cn # [5] .. 1091C..1091E ; Cn # [3] .. 1093A..1093E ; Cn # [5] .. -10940..1097F ; Cn # [64] .. +1095D..1097F ; Cn # [35] .. 109B8..109BB ; Cn # [4] .. 109D0..109D1 ; Cn # [2] .. 10A04 ; Cn # @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819461 +# Total code points: 819432 # ================================================ @@ -2503,6 +2503,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 108F4..108F5 ; Lo # [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Lo # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Lo # KHAROSHTHI LETTER A @@ -2707,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136482 +# Total code points: 136511 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 26e8f2b03..f6600add6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 12:16:26 GMT +# Date: 2024-11-14, 14:37:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757581 code points not listed here. -# Total code points: 895049 +# The above property value applies to 757552 code points not listed here. +# Total code points: 895020 # ================================================ @@ -1271,6 +1271,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10916..1091B ; AL # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; AL # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; AL # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1614,7 +1615,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26722 +# Total code points: 26751 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8cafde19a..85af14a48 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 12:16:26 GMT +# Date: 2024-11-14, 14:37:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -29437,6 +29437,35 @@ FFFD ; REPLACEMENT CHARACTER 10938 ; LYDIAN LETTER NN 10939 ; LYDIAN LETTER C 1093F ; LYDIAN TRIANGULAR MARK +10940 ; SIDETIC LETTER N01 +10941 ; SIDETIC LETTER N02 +10942 ; SIDETIC LETTER N03 +10943 ; SIDETIC LETTER N04 +10944 ; SIDETIC LETTER N05 +10945 ; SIDETIC LETTER N06 +10946 ; SIDETIC LETTER N07 +10947 ; SIDETIC LETTER N08 +10948 ; SIDETIC LETTER N09 +10949 ; SIDETIC LETTER N10 +1094A ; SIDETIC LETTER N11 +1094B ; SIDETIC LETTER N12 +1094C ; SIDETIC LETTER N13 +1094D ; SIDETIC LETTER N14 +1094E ; SIDETIC LETTER N15 +1094F ; SIDETIC LETTER N16 +10950 ; SIDETIC LETTER N17 +10951 ; SIDETIC LETTER N18 +10952 ; SIDETIC LETTER N19 +10953 ; SIDETIC LETTER N20 +10954 ; SIDETIC LETTER N21 +10955 ; SIDETIC LETTER N22 +10956 ; SIDETIC LETTER N23 +10957 ; SIDETIC LETTER N24 +10958 ; SIDETIC LETTER N25 +10959 ; SIDETIC LETTER N26 +1095A ; SIDETIC LETTER N27 +1095B ; SIDETIC LETTER N28 +1095C ; SIDETIC LETTER N29 10980 ; MEROITIC HIEROGLYPHIC LETTER A 10981 ; MEROITIC HIEROGLYPHIC LETTER E 10982 ; MEROITIC HIEROGLYPHIC LETTER I @@ -45439,6 +45468,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155070 +# Total code points: 155099 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 55107dc2a..ff767d030 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -448,6 +448,7 @@ public enum Block_Values implements Named { Shavian("Shavian"), Shorthand_Format_Controls("Shorthand_Format_Controls"), Siddham("Siddham"), + Sidetic("Sidetic"), Sinhala("Sinhala"), Sinhala_Archaic_Numbers("Sinhala_Archaic_Numbers"), Small_Form_Variants("Small_Forms"), @@ -1969,6 +1970,7 @@ public enum Script_Values implements Named { Shavian("Shaw"), Sharada("Shrd"), Siddham("Sidd"), + Sidetic("Sidt"), Khudawadi("Sind"), Sinhala("Sinh"), Sogdian("Sogd"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index eaa03a0f7..c1800d243 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -267,6 +267,7 @@ Sharada ; Sharada Shavian ; Shavian Shorthand_Format_Controls ; Shorthand_Format_Controls Siddham ; Siddham +Sidetic ; Sidetic Sinhala ; Sinhala Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers Small_Forms ; Small_Form_Variants From c64da22e3c48ad27d85f636ffe376ecad5b50226 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:10:55 +0100 Subject: [PATCH 18/38] Two Quranic Characters (#754) * UnicodeData.txt from L2/22-281R * lb=AL for the Lm, lb=CM for the Mn * Arabic * small low noon other_alphabetic cf. smol low meme * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 39 +++++++++++-------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++- .../data/ucd/dev/NormalizationTest.txt | 4 +- unicodetools/data/ucd/dev/PropList.txt | 6 +-- unicodetools/data/ucd/dev/Scripts.txt | 7 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 2 + .../data/ucd/dev/VerticalOrientation.txt | 5 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 9 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 9 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 11 +++--- .../dev/extracted/DerivedCombiningClass.txt | 10 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 7 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 15 +++---- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 +-- .../ucd/dev/extracted/DerivedLineBreak.txt | 13 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++- 19 files changed, 99 insertions(+), 72 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index bd835436c..6dc72b355 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 14:37:04 GMT +# Date: 2024-11-14, 14:54:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2081,8 +2081,10 @@ FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAY FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH 10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 +10EC5 ; 17.0 # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFB ; 17.0 # ARABIC SMALL LOW NOON -# Total code points: 101 +# Total code points: 103 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index a301fb3bd..985ab73b3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 14:37:21 GMT +# Date: 2024-11-14, 14:55:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1052,7 +1052,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFB..10EFC ; Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1440,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142797 +# Total code points: 142799 # ================================================ @@ -3351,7 +3352,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; Case_Ignorable # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFB..10EFF ; Case_Ignorable # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3506,7 +3508,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2779 +# Total code points: 2781 # ================================================ @@ -6739,6 +6741,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6972,7 +6975,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141307 +# Total code points: 141308 # ================================================ @@ -7905,7 +7908,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFB..10EFF ; ID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8380,7 +8384,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144608 +# Total code points: 144610 # ================================================ @@ -8924,6 +8928,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; XID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9157,7 +9162,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141284 +# Total code points: 141285 # ================================================ @@ -10091,7 +10096,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFB..10EFF ; XID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -10566,7 +10572,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144589 +# Total code points: 144591 # ================================================ @@ -10884,7 +10890,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Grapheme_Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Grapheme_Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Grapheme_Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Grapheme_Extend # Mn BRAHMI SIGN ANUSVARA @@ -11039,7 +11045,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2222 +# Total code points: 2223 # ================================================ @@ -12340,6 +12346,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; Grapheme_Base # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; Grapheme_Base # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -12823,7 +12830,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152802 +# Total code points: 152803 # ================================================ @@ -13214,7 +13221,7 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; InCB; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; InCB; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; InCB; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; InCB; Extend # Mn BRAHMI SIGN ANUSVARA @@ -13370,6 +13377,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2221 +# Total code points: 2222 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 1775ad934..bba100cc0 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 14:37:25 GMT +# Date: 2024-11-14, 14:55:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1965,8 +1965,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index eef5add38..5ea7db04a 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 14:37:26 GMT +# Date: 2024-11-14, 14:55:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2822,8 +2822,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index dd98635df..d47e846f2 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-17.0.0.txt -# Date: 2024-11-13, 22:18:53 GMT +# Date: 2024-11-14, 14:55:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18701,6 +18701,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFB 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062; # (a◌֚◌̖◌᷺◌𐻻b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON, LATIN SMALL LETTER B +0061 10EFB 059A 0316 1DFA 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062; # (a◌𐻻◌֚◌̖◌᷺b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B 0061 10EFD 059A 0316 1DFA 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062; # (a◌𐻽◌֚◌̖◌᷺b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW WORD SAKTA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFE 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062; # (a◌֚◌̖◌᷺◌𐻾b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD QASR, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 7744fcc3e..48dea2722 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-13, 22:18:57 GMT +# Date: 2024-11-14, 14:55:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,7 +702,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EFB..10EFC ; Other_Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -858,7 +858,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1495 +# Total code points: 1496 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index da4dc7bdf..1e5894b7f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 14:37:44 GMT +# Date: 2024-11-14, 14:56:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -888,8 +888,9 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Arabic # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -925,7 +926,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1407 +# Total code points: 1409 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index d61a3c99d..e4e8eacd5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19634,6 +19634,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; 10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; @@ -19642,6 +19643,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10ED6;ARABIC LIGATURE QUDDISA SIRRAHUMAA;So;0;ON;;;;;N;;;;; 10ED7;ARABIC LIGATURE QUDDISAT ASRAARUHUM;So;0;ON;;;;;N;;;;; 10ED8;ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH;So;0;ON;;;;;N;;;;; +10EFB;ARABIC SMALL LOW NOON;Mn;220;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 05cd2171f..a088ec14b 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 14:37:46 GMT +# Date: 2024-11-14, 14:56:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1786,8 +1786,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; R # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index ce9cb6cde..97b805ce1 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-11-13, 21:22:53 GMT +# Date: 2024-11-14, 14:55:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -340,7 +340,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -496,7 +496,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2227 +# Total code points: 2228 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 5e1badd5b..d8ff2484a 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 14:37:45 GMT +# Date: 2024-11-14, 14:56:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -374,7 +374,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -587,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2630 +# Total code points: 2631 # ================================================ @@ -2396,6 +2396,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; OLetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2592,7 +2593,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136942 +# Total code points: 136943 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 25d826481..29f44bc7c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 14:37:47 GMT +# Date: 2024-11-14, 14:56:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -410,7 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -624,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2634 +# Total code points: 2635 # ================================================ @@ -1142,6 +1142,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ALetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1355,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33829 +# Total code points: 33830 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index c87254b23..0c98bbcfc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 14:37:19 GMT +# Date: 2024-11-14, 14:55:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2277,7 +2277,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; NSM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2411,7 +2411,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2057 +# Total code points: 2058 # ================================================ @@ -2459,6 +2459,7 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2504,8 +2505,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 259 code points not listed here. -# Total code points: 1734 +# The above property value applies to 257 code points not listed here. +# Total code points: 1733 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 158023481..58a37ea70 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 14:37:21 GMT +# Date: 2024-11-14, 14:55:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1475,6 +1475,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2061,8 +2062,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821480 code points not listed here. -# Total code points: 1113151 +# The above property value applies to 821478 code points not listed here. +# Total code points: 1113150 # ================================================ @@ -2618,6 +2619,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 10A0D ; 220 # Mn KHAROSHTHI SIGN DOUBLE RING BELOW 10A3A ; 220 # Mn KHAROSHTHI SIGN DOT BELOW 10AE6 ; 220 # Mn MANICHAEAN ABBREVIATION MARK BELOW +10EFB ; 220 # Mn ARABIC SMALL LOW NOON 10EFD..10EFF ; 220 # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F47 ; 220 # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW 10F4B ; 220 # Mn SOGDIAN COMBINING CURVE BELOW @@ -2630,7 +2632,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 184 +# Total code points: 185 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 60379f00a..c3224da3a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 14:37:22 GMT +# Date: 2024-11-14, 14:55:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1487,8 +1487,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2105,7 +2106,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760998 code points not listed here. +# The above property value applies to 760996 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 7bc6babc2..4a761c488 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 14:37:23 GMT +# Date: 2024-11-14, 14:55:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -432,8 +432,8 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10ED0 ; Cn # [12] .. -10ED9..10EFB ; Cn # [35] .. +10EC6..10ED0 ; Cn # [11] .. +10ED9..10EFA ; Cn # [34] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819432 +# Total code points: 819430 # ================================================ @@ -2157,6 +2157,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10D4E ; Lm # GARAY VOWEL LENGTH MARK 10D6F ; Lm # GARAY REDUPLICATION MARK +10EC5 ; Lm # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT @@ -2171,7 +2172,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 405 +# Total code points: 406 # ================================================ @@ -2939,7 +2940,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; Mn # [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -3073,7 +3074,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2049 +# Total code points: 2050 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index d57fd1f6f..cd7857336 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-13, 21:22:50 GMT +# Date: 2024-11-14, 14:55:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -443,7 +443,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; T # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; T # Mn BRAHMI SIGN ANUSVARA @@ -583,6 +583,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2214 +# Total code points: 2215 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index f6600add6..29695f052 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 14:37:24 GMT +# Date: 2024-11-14, 14:55:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757552 code points not listed here. -# Total code points: 895020 +# The above property value applies to 757550 code points not listed here. +# Total code points: 895018 # ================================================ @@ -1316,6 +1316,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -1615,7 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26751 +# Total code points: 26752 # ================================================ @@ -2185,7 +2186,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2390,7 +2391,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2498 +# Total code points: 2499 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 85af14a48..2cb4df8a0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 14:37:24 GMT +# Date: 2024-11-14, 14:55:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30299,6 +30299,7 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM 10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM 10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM @@ -30307,6 +30308,7 @@ FFFD ; REPLACEMENT CHARACTER 10ED6 ; ARABIC LIGATURE QUDDISA SIRRAHUMAA 10ED7 ; ARABIC LIGATURE QUDDISAT ASRAARUHUM 10ED8 ; ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFB ; ARABIC SMALL LOW NOON 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -45468,6 +45470,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155099 +# Total code points: 155101 # EOF From 5c1cbadda2353e721176abc862af031391bd26f3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:24:57 +0100 Subject: [PATCH 19/38] =?UTF-8?q?Thin=20=D9=86=20(#756)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt line from L2/23-121 * lb=AL * Arabic * ArabicShaping.txt from the proposal * A new Joining_Group * Regenerate UCD * GenerateEnums * This is silly --- unicodetools/data/ucd/dev/ArabicShaping.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 6 +++--- .../data/ucd/dev/DerivedCoreProperties.txt | 20 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- .../data/ucd/dev/PropertyValueAliases.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 3 ++- .../dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 +++-- .../dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- .../ucd/dev/extracted/DerivedJoiningGroup.txt | 8 +++++++- .../ucd/dev/extracted/DerivedJoiningType.txt | 5 +++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 +++-- .../org/unicode/props/UcdPropertyValues.java | 1 + .../java/org/unicode/text/UCD/UCD_Names.java | 4 +++- .../java/org/unicode/text/UCD/UCD_Types.java | 4 +++- 22 files changed, 74 insertions(+), 41 deletions(-) diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 20d103cd5..38bc3a417 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -851,6 +851,7 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF +10EC6; THIN NOON; D; THIN NOON # Sogdian Characters diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 6dc72b355..559012328 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 14:54:38 GMT +# Date: 2024-11-14, 15:12:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2081,10 +2081,10 @@ FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAY FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH 10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 -10EC5 ; 17.0 # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC5..10EC6 ; 17.0 # [2] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER THIN NOON 10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB ; 17.0 # ARABIC SMALL LOW NOON -# Total code points: 103 +# Total code points: 104 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 985ab73b3..7d1dbc676 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 14:55:09 GMT +# Date: 2024-11-14, 15:13:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1053,6 +1053,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; Alphabetic # Lo ARABIC LETTER THIN NOON 10EFB..10EFC ; Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142799 +# Total code points: 142800 # ================================================ @@ -6742,6 +6743,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ID_Start # Lo ARABIC LETTER THIN NOON 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6975,7 +6977,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141308 +# Total code points: 141309 # ================================================ @@ -7909,6 +7911,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ID_Continue # Lo ARABIC LETTER THIN NOON 10EFB..10EFF ; ID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -8384,7 +8387,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144610 +# Total code points: 144611 # ================================================ @@ -8929,6 +8932,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; XID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; XID_Start # Lo ARABIC LETTER THIN NOON 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9162,7 +9166,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141285 +# Total code points: 141286 # ================================================ @@ -10097,6 +10101,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; XID_Continue # Lo ARABIC LETTER THIN NOON 10EFB..10EFF ; XID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -10572,7 +10577,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144591 +# Total code points: 144592 # ================================================ @@ -12347,6 +12352,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Grapheme_Base # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; Grapheme_Base # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; Grapheme_Base # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -12830,7 +12836,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152803 +# Total code points: 152804 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index bba100cc0..90ffc2d1e 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 14:55:18 GMT +# Date: 2024-11-14, 15:13:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1966,6 +1966,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; N # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 5ea7db04a..f5d7e0c08 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 14:55:19 GMT +# Date: 2024-11-14, 15:13:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2823,6 +2823,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; AL # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 7b9734001..5c447325b 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-10-16, 17:27:56 GMT +# Date: 2024-11-14, 15:13:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1090,6 +1090,7 @@ jg ; Taw ; Taw jg ; Teh_Marbuta ; Teh_Marbuta jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal jg ; Teth ; Teth +jg ; Thin_Noon ; Thin_Noon jg ; Thin_Yeh ; Thin_Yeh jg ; Vertical_Tail ; Vertical_Tail jg ; Waw ; Waw diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 1e5894b7f..f09505048 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 14:56:03 GMT +# Date: 2024-11-14, 15:14:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -889,6 +889,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; Arabic # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; Arabic # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL @@ -926,7 +927,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1409 +# Total code points: 1410 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index e4e8eacd5..f4d3f3f9d 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19635,6 +19635,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; +10EC6;ARABIC LETTER THIN NOON;Lo;0;AL;;;;;N;;;;; 10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index a088ec14b..2691f8c00 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 14:56:06 GMT +# Date: 2024-11-14, 15:14:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1787,6 +1787,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; R # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; R # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index d8ff2484a..8258057b2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 14:56:04 GMT +# Date: 2024-11-14, 15:14:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2397,6 +2397,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; OLetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; OLetter # Lo ARABIC LETTER THIN NOON 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2593,7 +2594,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136943 +# Total code points: 136944 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 29f44bc7c..d1800a501 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 14:56:07 GMT +# Date: 2024-11-14, 15:14:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1143,6 +1143,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ALetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ALetter # Lo ARABIC LETTER THIN NOON 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1356,7 +1357,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33830 +# Total code points: 33831 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 0c98bbcfc..221aa212a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 14:55:06 GMT +# Date: 2024-11-14, 15:13:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2460,6 +2460,7 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; AL # Lo ARABIC LETTER THIN NOON 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2505,7 +2506,7 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 257 code points not listed here. +# The above property value applies to 256 code points not listed here. # Total code points: 1733 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 58a37ea70..0eb3f0f88 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 14:55:08 GMT +# Date: 2024-11-14, 15:13:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1476,6 +1476,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; 0 # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2062,7 +2063,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821478 code points not listed here. +# The above property value applies to 821477 code points not listed here. # Total code points: 1113150 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c3224da3a..8e9aadf56 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 14:55:12 GMT +# Date: 2024-11-14, 15:13:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1488,6 +1488,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; N # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2106,7 +2107,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760996 code points not listed here. +# The above property value applies to 760995 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 4a761c488..cb3cc649f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 14:55:12 GMT +# Date: 2024-11-14, 15:13:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -432,7 +432,7 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC6..10ED0 ; Cn # [11] .. +10EC7..10ED0 ; Cn # [10] .. 10ED9..10EFA ; Cn # [34] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819430 +# Total code points: 819429 # ================================================ @@ -2526,6 +2526,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6 ; Lo # ARABIC LETTER THIN NOON 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2709,7 +2710,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136511 +# Total code points: 136512 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index d2c5547ed..f0e99096f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ # DerivedJoiningGroup-17.0.0.txt -# Date: 2024-10-15, 12:06:51 GMT +# Date: 2024-11-14, 15:13:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -751,4 +751,10 @@ # Total code points: 1 +# ================================================ + +10EC6 ; Thin_Noon # Lo ARABIC LETTER THIN NOON + +# Total code points: 1 + # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index cd7857336..ccc5a289a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 14:55:14 GMT +# Date: 2024-11-14, 15:13:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -97,6 +97,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O 10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA 10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6 ; D # Lo ARABIC LETTER THIN NOON 10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL 10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH 10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY @@ -112,7 +113,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 613 +# Total code points: 614 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 29695f052..1ce2a534f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 14:55:14 GMT +# Date: 2024-11-14, 15:13:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757550 code points not listed here. -# Total code points: 895018 +# The above property value applies to 757549 code points not listed here. +# Total code points: 895017 # ================================================ @@ -1317,6 +1317,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; AL # Lo ARABIC LETTER THIN NOON 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -1616,7 +1617,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26752 +# Total code points: 26753 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 2cb4df8a0..6aad2b226 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 14:55:15 GMT +# Date: 2024-11-14, 15:13:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30300,6 +30300,7 @@ FFFD ; REPLACEMENT CHARACTER 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ARABIC LETTER THIN NOON 10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM 10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM 10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM @@ -45470,6 +45471,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155101 +# Total code points: 155102 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index ff767d030..b41d852c9 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -1351,6 +1351,7 @@ public enum Joining_Group_Values implements Named { Teh_Marbuta("Teh_Marbuta"), Teh_Marbuta_Goal("Teh_Marbuta_Goal", "Hamza_On_Heh_Goal"), Teth("Teth"), + Thin_Noon("Thin_Noon"), Thin_Yeh("Thin_Yeh"), Vertical_Tail("Vertical_Tail"), Waw("Waw"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index 9075daea6..100273ad9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -1227,7 +1227,9 @@ public final class UCD_Names implements UCD_Types { "THIN_YEH", "VERTICAL_TAIL", // Unicode 16 - "KASHMIRI_YEH" + "KASHMIRI_YEH", + // Unicode n > 16 + "THIN_NOON", }; static { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index b1ffb8261..d00021b07 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -781,8 +781,10 @@ public interface UCD_Types { VERTICAL_TAIL = 104, // Unicode 16 KASHMIRI_YEH = 105, + // Unicode n > 16 + THIN_NOON = 106, // limit - LIMIT_JOINING_GROUP = KASHMIRI_YEH + 1; + LIMIT_JOINING_GROUP = THIN_NOON + 1; static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3; public static final int NF_COMPATIBILITY_MASK = 2, NF_COMPOSITION_MASK = 1; From 69070a9e65d7a3dd137bad1e001e9fa6d9fd88d0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:52:50 +0100 Subject: [PATCH 20/38] Yeh with four dots below (#706) * Yeh with 4 dots below * lb=AL * Scripts.txt * ArabicShaping.txt from the proposal * Regenerate UCD * Deconfliction --- unicodetools/data/ucd/dev/ArabicShaping.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++--- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedJoiningGroup.txt | 5 ++-- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++-- 18 files changed, 61 insertions(+), 57 deletions(-) diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 38bc3a417..6f71f9214 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -852,6 +852,7 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF 10EC6; THIN NOON; D; THIN NOON +10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH # Sogdian Characters diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 559012328..e6e952e4a 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 15:12:52 GMT +# Date: 2024-11-14, 15:26:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2081,10 +2081,10 @@ FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAY FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH 10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 -10EC5..10EC6 ; 17.0 # [2] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER THIN NOON +10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB ; 17.0 # ARABIC SMALL LOW NOON -# Total code points: 104 +# Total code points: 105 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 7d1dbc676..d0930bc27 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 15:13:20 GMT +# Date: 2024-11-14, 15:27:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1053,7 +1053,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; Alphabetic # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; Alphabetic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10EFB..10EFC ; Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1442,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142800 +# Total code points: 142801 # ================================================ @@ -6743,7 +6743,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; ID_Start # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; ID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6977,7 +6977,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141309 +# Total code points: 141310 # ================================================ @@ -7911,7 +7911,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; ID_Continue # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; ID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10EFB..10EFF ; ID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -8387,7 +8387,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144611 +# Total code points: 144612 # ================================================ @@ -8932,7 +8932,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; XID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; XID_Start # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; XID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9166,7 +9166,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141286 +# Total code points: 141287 # ================================================ @@ -10101,7 +10101,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; XID_Continue # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; XID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10EFB..10EFF ; XID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -10577,7 +10577,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144592 +# Total code points: 144593 # ================================================ @@ -12352,7 +12352,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Grapheme_Base # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; Grapheme_Base # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; Grapheme_Base # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; Grapheme_Base # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -12836,7 +12836,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152804 +# Total code points: 152805 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 90ffc2d1e..3af5ca7fd 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:13:27 GMT +# Date: 2024-11-14, 15:27:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1966,7 +1966,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; N # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index f5d7e0c08..7a9618652 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 15:13:28 GMT +# Date: 2024-11-14, 15:27:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2823,7 +2823,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; AL # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f09505048..2c4efaff7 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 15:14:07 GMT +# Date: 2024-11-14, 15:28:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -889,7 +889,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; Arabic # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; Arabic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; Arabic # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL @@ -927,7 +927,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1410 +# Total code points: 1411 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f4d3f3f9d..3217ea7a7 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19636,6 +19636,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; 10EC6;ARABIC LETTER THIN NOON;Lo;0;AL;;;;;N;;;;; +10EC7;ARABIC LETTER YEH WITH FOUR DOTS BELOW;Lo;0;AL;;;;;N;;;;; 10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 2691f8c00..16b0ece34 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 15:14:10 GMT +# Date: 2024-11-14, 15:28:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1787,7 +1787,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; R # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; R # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; R # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 8258057b2..625ffe50f 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 15:14:08 GMT +# Date: 2024-11-14, 15:28:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2397,7 +2397,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; OLetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; OLetter # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; OLetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2594,7 +2594,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136944 +# Total code points: 136945 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index d1800a501..c9ed2146c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 15:14:11 GMT +# Date: 2024-11-14, 15:28:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1143,7 +1143,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ALetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; ALetter # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; ALetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1357,7 +1357,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33831 +# Total code points: 33832 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 221aa212a..05f7cc4f4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 15:13:17 GMT +# Date: 2024-11-14, 15:27:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2460,7 +2460,7 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; AL # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2506,7 +2506,7 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 256 code points not listed here. +# The above property value applies to 255 code points not listed here. # Total code points: 1733 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 0eb3f0f88..6e0611432 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 15:13:19 GMT +# Date: 2024-11-14, 15:27:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1476,7 +1476,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; 0 # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; 0 # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2063,7 +2063,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821477 code points not listed here. +# The above property value applies to 821476 code points not listed here. # Total code points: 1113150 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 8e9aadf56..af21f6081 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:13:22 GMT +# Date: 2024-11-14, 15:27:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1488,7 +1488,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; N # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2107,7 +2107,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760995 code points not listed here. +# The above property value applies to 760994 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index cb3cc649f..36ce61d28 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 15:13:22 GMT +# Date: 2024-11-14, 15:27:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -432,7 +432,7 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC7..10ED0 ; Cn # [10] .. +10EC8..10ED0 ; Cn # [9] .. 10ED9..10EFA ; Cn # [34] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819429 +# Total code points: 819428 # ================================================ @@ -2526,7 +2526,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC6 ; Lo # ARABIC LETTER THIN NOON +10EC6..10EC7 ; Lo # [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2710,7 +2710,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136512 +# Total code points: 136513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index f0e99096f..f24475268 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ # DerivedJoiningGroup-17.0.0.txt -# Date: 2024-11-14, 15:13:23 GMT +# Date: 2024-11-14, 15:27:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -385,8 +385,9 @@ 0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW 08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08BA ; Yeh # Lo ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE +10EC7 ; Yeh # Lo ARABIC LETTER YEH WITH FOUR DOTS BELOW -# Total code points: 10 +# Total code points: 11 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index ccc5a289a..fc0d6d2cb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 15:13:24 GMT +# Date: 2024-11-14, 15:27:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -97,7 +97,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O 10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA 10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC6 ; D # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; D # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL 10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH 10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY @@ -113,7 +113,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 614 +# Total code points: 615 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 1ce2a534f..ddc1d086e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 15:13:24 GMT +# Date: 2024-11-14, 15:27:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757549 code points not listed here. -# Total code points: 895017 +# The above property value applies to 757548 code points not listed here. +# Total code points: 895016 # ================================================ @@ -1317,7 +1317,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EC6 ; AL # Lo ARABIC LETTER THIN NOON +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -1617,7 +1617,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26753 +# Total code points: 26754 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 6aad2b226..c2198a2fc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 15:13:24 GMT +# Date: 2024-11-14, 15:27:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30301,6 +30301,7 @@ FFFD ; REPLACEMENT CHARACTER 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6 ; ARABIC LETTER THIN NOON +10EC7 ; ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM 10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM 10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM @@ -45471,6 +45472,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155102 +# Total code points: 155103 # EOF From 0f41413497d3ef2b649e4d195da3e4cb9c782b86 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 17:06:08 +0100 Subject: [PATCH 21/38] Arabic biblical end of verse (#760) * UnicodeData.txt line from L2/23-103R * lb=BA based on figures in the proposal * Arabic per the proposal * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +++--- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 5 +++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + unicodetools/data/ucd/dev/VerticalOrientation.txt | 3 ++- unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 5 +++-- 13 files changed, 40 insertions(+), 28 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index e6e952e4a..426462abd 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 15:26:44 GMT +# Date: 2024-11-14, 15:54:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2082,9 +2082,9 @@ FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGAT FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH 10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW -10ED1..10ED8 ; 17.0 # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB ; 17.0 # ARABIC SMALL LOW NOON -# Total code points: 105 +# Total code points: 106 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index d0930bc27..629eaad62 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 15:27:14 GMT +# Date: 2024-11-14, 15:54:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12353,6 +12353,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Grapheme_Base # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; Grapheme_Base # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; Grapheme_Base # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; Grapheme_Base # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -12836,7 +12837,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152805 +# Total code points: 152806 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 3af5ca7fd..3a0583121 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:27:22 GMT +# Date: 2024-11-14, 15:54:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1967,6 +1967,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 7a9618652..430014be1 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 15:27:23 GMT +# Date: 2024-11-14, 15:54:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2824,6 +2824,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 2c4efaff7..c611447bd 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 15:28:06 GMT +# Date: 2024-11-14, 15:55:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -890,6 +890,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; Arabic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; Arabic # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; Arabic # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL @@ -927,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1411 +# Total code points: 1412 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 3217ea7a7..ce600d4b7 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19637,6 +19637,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; 10EC6;ARABIC LETTER THIN NOON;Lo;0;AL;;;;;N;;;;; 10EC7;ARABIC LETTER YEH WITH FOUR DOTS BELOW;Lo;0;AL;;;;;N;;;;; +10ED0;ARABIC BIBLICAL END OF VERSE;Po;0;ON;;;;;N;;;;; 10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; 10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 16b0ece34..d2a4df22d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 15:28:09 GMT +# Date: 2024-11-14, 15:55:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1788,6 +1788,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; R # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; R # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; R # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 05f7cc4f4..0bf1157fa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 15:27:10 GMT +# Date: 2024-11-14, 15:54:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1937,6 +1937,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; ON # Pd GARAY HYPHEN +10ED0 ; ON # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; ON # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT @@ -1999,7 +2000,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6785 +# Total code points: 6786 # ================================================ @@ -2506,8 +2507,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 255 code points not listed here. -# Total code points: 1733 +# The above property value applies to 254 code points not listed here. +# Total code points: 1732 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 6e0611432..ad658a165 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 15:27:13 GMT +# Date: 2024-11-14, 15:54:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1477,6 +1477,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; 0 # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; 0 # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2063,7 +2064,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821476 code points not listed here. +# The above property value applies to 821475 code points not listed here. # Total code points: 1113150 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index af21f6081..43bd0d94d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:27:16 GMT +# Date: 2024-11-14, 15:54:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1489,6 +1489,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -2107,7 +2108,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760994 code points not listed here. +# The above property value applies to 760993 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 36ce61d28..5ffd7454b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 15:27:17 GMT +# Date: 2024-11-14, 15:54:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -432,7 +432,7 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC8..10ED0 ; Cn # [9] .. +10EC8..10ECF ; Cn # [8] .. 10ED9..10EFA ; Cn # [34] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819428 +# Total code points: 819427 # ================================================ @@ -3903,6 +3903,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 10AF0..10AF6 ; Po # [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER 10B39..10B3F ; Po # [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10B99..10B9C ; Po # [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10ED0 ; Po # ARABIC BIBLICAL END OF VERSE 10F55..10F59 ; Po # [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 10F86..10F89 ; Po # [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS @@ -3954,7 +3955,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1E5FF ; Po # OL ONAL ABBREVIATION SIGN 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 640 +# Total code points: 641 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index ddc1d086e..60ff34b61 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 15:27:19 GMT +# Date: 2024-11-14, 15:54:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757548 code points not listed here. -# Total code points: 895016 +# The above property value applies to 757547 code points not listed here. +# Total code points: 895015 # ================================================ @@ -2509,6 +2509,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; BA # Pd GARAY HYPHEN 10EAD ; BA # Pd YEZIDI HYPHENATION MARK +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11140..11143 ; BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK @@ -2545,7 +2546,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; BA # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 272 +# Total code points: 273 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index c2198a2fc..2c3d04d62 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 15:27:19 GMT +# Date: 2024-11-14, 15:54:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30302,6 +30302,7 @@ FFFD ; REPLACEMENT CHARACTER 10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6 ; ARABIC LETTER THIN NOON 10EC7 ; ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; ARABIC BIBLICAL END OF VERSE 10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM 10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM 10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM @@ -45472,6 +45473,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155103 +# Total code points: 155104 # EOF From 6da68627c20dafcdc3e16c2f67196fa45d4cf83d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 19:31:45 +0100 Subject: [PATCH 22/38] Arabic double vertical bar below (#592) * UnicodeData.txt from L2/23-248 * LB=CM * Scripts.txt * Alphabetic diacritic like kasratan? * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- .../data/ucd/dev/NormalizationTest.txt | 4 ++- unicodetools/data/ucd/dev/PropList.txt | 9 ++++--- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 +++---- .../dev/extracted/DerivedCombiningClass.txt | 10 +++---- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++-- 19 files changed, 72 insertions(+), 67 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 426462abd..7978c07c3 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 15:54:07 GMT +# Date: 2024-11-14, 16:07:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2083,8 +2083,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB ; 17.0 # ARABIC SMALL LOW NOON +10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON -# Total code points: 106 +# Total code points: 107 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 629eaad62..0fee5fc29 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 15:54:32 GMT +# Date: 2024-11-14, 16:08:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1054,7 +1054,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; Alphabetic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW -10EFB..10EFC ; Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY +10EFA..10EFC ; Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1442,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142801 +# Total code points: 142802 # ================================================ @@ -3354,7 +3354,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EC5 ; Case_Ignorable # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW -10EFB..10EFF ; Case_Ignorable # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Case_Ignorable # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3509,7 +3509,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2781 +# Total code points: 2782 # ================================================ @@ -7912,7 +7912,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; ID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW -10EFB..10EFF ; ID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; ID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8387,7 +8387,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144612 +# Total code points: 144613 # ================================================ @@ -10102,7 +10102,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 10EC6..10EC7 ; XID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW -10EFB..10EFF ; XID_Continue # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; XID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -10577,7 +10577,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144593 +# Total code points: 144594 # ================================================ @@ -10895,7 +10895,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Grapheme_Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; Grapheme_Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Grapheme_Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Grapheme_Extend # Mn BRAHMI SIGN ANUSVARA @@ -11050,7 +11050,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2223 +# Total code points: 2224 # ================================================ @@ -13228,7 +13228,7 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; InCB; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; InCB; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; InCB; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; InCB; Extend # Mn BRAHMI SIGN ANUSVARA @@ -13384,6 +13384,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2222 +# Total code points: 2223 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 3a0583121..abf5bdbf8 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:54:39 GMT +# Date: 2024-11-14, 16:08:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1969,7 +1969,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 430014be1..e1f790790 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 15:54:40 GMT +# Date: 2024-11-14, 16:08:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2826,7 +2826,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index d47e846f2..c5e7583c7 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-17.0.0.txt -# Date: 2024-11-14, 14:55:28 GMT +# Date: 2024-11-14, 16:08:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18701,6 +18701,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFA 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062; # (a◌֚◌̖◌᷺◌𐻺b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC DOUBLE VERTICAL BAR BELOW, LATIN SMALL LETTER B +0061 10EFA 059A 0316 1DFA 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062; # (a◌𐻺◌֚◌̖◌᷺b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC DOUBLE VERTICAL BAR BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFB 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062; # (a◌֚◌̖◌᷺◌𐻻b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON, LATIN SMALL LETTER B 0061 10EFB 059A 0316 1DFA 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062; # (a◌𐻻◌֚◌̖◌᷺b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 48dea2722..545d02c88 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 14:55:37 GMT +# Date: 2024-11-14, 16:08:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,7 +702,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFC ; Other_Alphabetic # Mn [2] ARABIC SMALL LOW NOON..ARABIC COMBINING ALEF OVERLAY +10EFA..10EFC ; Other_Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -858,7 +858,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1496 +# Total code points: 1497 # ================================================ @@ -1080,6 +1080,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK 10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EFA ; Diacritic # Mn ARABIC DOUBLE VERTICAL BAR BELOW 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1153,7 +1154,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1208 +# Total code points: 1209 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index c611447bd..e71bbcbdd 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 15:55:12 GMT +# Date: 2024-11-14, 16:08:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -892,7 +892,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 10EC6..10EC7 ; Arabic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0 ; Arabic # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB..10EFF ; Arabic # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Arabic # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -928,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1412 +# Total code points: 1413 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ce600d4b7..ab7fed94b 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19646,6 +19646,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10ED6;ARABIC LIGATURE QUDDISA SIRRAHUMAA;So;0;ON;;;;;N;;;;; 10ED7;ARABIC LIGATURE QUDDISAT ASRAARUHUM;So;0;ON;;;;;N;;;;; 10ED8;ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH;So;0;ON;;;;;N;;;;; +10EFA;ARABIC DOUBLE VERTICAL BAR BELOW;Mn;220;NSM;;;;;N;;;;; 10EFB;ARABIC SMALL LOW NOON;Mn;220;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index d2a4df22d..923ed7072 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 15:55:15 GMT +# Date: 2024-11-14, 16:08:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1790,7 +1790,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EC6..10EC7 ; R # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0 ; R # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB..10EFF ; R # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; R # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 97b805ce1..790beba09 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-11-14, 14:55:18 GMT +# Date: 2024-11-14, 16:08:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -340,7 +340,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -496,7 +496,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2228 +# Total code points: 2229 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 625ffe50f..4f2e7dbd9 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 15:28:07 GMT +# Date: 2024-11-14, 16:08:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -374,7 +374,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -587,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2631 +# Total code points: 2632 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index c9ed2146c..b97cc34ed 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 15:28:10 GMT +# Date: 2024-11-14, 16:08:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -410,7 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; Extend # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -624,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2635 +# Total code points: 2636 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 0bf1157fa..a3f88f639 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 15:54:29 GMT +# Date: 2024-11-14, 16:08:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2278,7 +2278,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; NSM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; NSM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2412,7 +2412,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2058 +# Total code points: 2059 # ================================================ @@ -2507,8 +2507,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 254 code points not listed here. -# Total code points: 1732 +# The above property value applies to 253 code points not listed here. +# Total code points: 1731 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index ad658a165..4ded0a6d3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 15:54:31 GMT +# Date: 2024-11-14, 16:08:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2064,8 +2064,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821475 code points not listed here. -# Total code points: 1113150 +# The above property value applies to 821474 code points not listed here. +# Total code points: 1113149 # ================================================ @@ -2621,7 +2621,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 10A0D ; 220 # Mn KHAROSHTHI SIGN DOUBLE RING BELOW 10A3A ; 220 # Mn KHAROSHTHI SIGN DOT BELOW 10AE6 ; 220 # Mn MANICHAEAN ABBREVIATION MARK BELOW -10EFB ; 220 # Mn ARABIC SMALL LOW NOON +10EFA..10EFB ; 220 # Mn [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 10EFD..10EFF ; 220 # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F47 ; 220 # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW 10F4B ; 220 # Mn SOGDIAN COMBINING CURVE BELOW @@ -2634,7 +2634,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 185 +# Total code points: 186 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 43bd0d94d..0296ddd35 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 15:54:34 GMT +# Date: 2024-11-14, 16:08:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1491,7 +1491,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE 10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH -10EFB..10EFF ; N # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2108,7 +2108,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760993 code points not listed here. +# The above property value applies to 760992 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 5ffd7454b..2afe33131 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 15:54:35 GMT +# Date: 2024-11-14, 16:08:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -433,7 +433,7 @@ FFFE..FFFF ; Cn # [2] .. 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. 10EC8..10ECF ; Cn # [8] .. -10ED9..10EFA ; Cn # [34] .. +10ED9..10EF9 ; Cn # [33] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -741,7 +741,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819427 +# Total code points: 819426 # ================================================ @@ -2941,7 +2941,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; Mn # [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Mn # [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -3075,7 +3075,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2050 +# Total code points: 2051 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index fc0d6d2cb..34f4d6bbb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 15:27:18 GMT +# Date: 2024-11-14, 16:08:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -444,7 +444,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; T # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; T # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; T # Mn BRAHMI SIGN ANUSVARA @@ -584,6 +584,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2215 +# Total code points: 2216 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 60ff34b61..c330cbf10 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 15:54:36 GMT +# Date: 2024-11-14, 16:08:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757547 code points not listed here. -# Total code points: 895015 +# The above property value applies to 757546 code points not listed here. +# Total code points: 895014 # ================================================ @@ -2187,7 +2187,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFB..10EFF ; CM # Mn [5] ARABIC SMALL LOW NOON..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2392,7 +2392,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2499 +# Total code points: 2500 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 2c3d04d62..a2098a1a7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 15:54:36 GMT +# Date: 2024-11-14, 16:08:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30311,6 +30311,7 @@ FFFD ; REPLACEMENT CHARACTER 10ED6 ; ARABIC LIGATURE QUDDISA SIRRAHUMAA 10ED7 ; ARABIC LIGATURE QUDDISAT ASRAARUHUM 10ED8 ; ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA ; ARABIC DOUBLE VERTICAL BAR BELOW 10EFB ; ARABIC SMALL LOW NOON 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA @@ -45473,6 +45474,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155104 +# Total code points: 155105 # EOF From 15626600d4de6999518b7b7283e1b158ed145aa4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 20:21:46 +0100 Subject: [PATCH 23/38] Sharada vowel signs for Kashmiri (#757) * UnicodeData.txt lines from L2/23-122 * lb=CM I guess? * Looks Sharada to me * sup * new block * InMeowC from proposal * alphabetic * Regenerate UCD * GenerateEnums --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 5 +- .../data/ucd/dev/DerivedCoreProperties.txt | 46 +++++++++++++++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 8 +++- .../data/ucd/dev/IndicPositionalCategory.txt | 11 ++++- .../data/ucd/dev/IndicSyllabicCategory.txt | 8 +++- unicodetools/data/ucd/dev/LineBreak.txt | 8 +++- unicodetools/data/ucd/dev/PropList.txt | 10 +++- .../data/ucd/dev/PropertyValueAliases.txt | 3 +- unicodetools/data/ucd/dev/Scripts.txt | 12 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 8 ++++ .../data/ucd/dev/VerticalOrientation.txt | 8 +++- .../dev/auxiliary/GraphemeBreakProperty.txt | 12 +++-- .../dev/auxiliary/SentenceBreakProperty.txt | 10 +++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 10 +++- .../ucd/dev/extracted/DerivedBidiClass.txt | 14 ++++-- .../dev/extracted/DerivedCombiningClass.txt | 10 +++- .../dev/extracted/DerivedEastAsianWidth.txt | 10 +++- .../dev/extracted/DerivedGeneralCategory.txt | 17 +++++-- .../ucd/dev/extracted/DerivedJoiningType.txt | 7 ++- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 ++++-- .../data/ucd/dev/extracted/DerivedName.txt | 12 ++++- .../org/unicode/props/UcdPropertyValues.java | 1 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 24 files changed, 196 insertions(+), 50 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index be3e10ce3..13a98f8d0 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -280,6 +280,7 @@ FFF0..FFFF; Specials 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 11AC0..11AFF; Pau Cin Hau 11B00..11B5F; Devanagari Extended-A +11B60..11B7F; Sharada Supplement 11BC0..11BFF; Sunuwar 11C00..11C6F; Bhaiksuki 11C70..11CBF; Marchen diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 7978c07c3..6eba838ff 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 16:07:50 GMT +# Date: 2024-11-14, 18:37:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2084,7 +2084,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON +11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O -# Total code points: 107 +# Total code points: 115 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 0fee5fc29..c46405c3a 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 16:08:15 GMT +# Date: 2024-11-14, 18:38:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1240,6 +1240,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA 11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; Alphabetic # Mn SHARADA VOWEL SIGN OE +11B61 ; Alphabetic # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Alphabetic # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Alphabetic # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Alphabetic # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Alphabetic # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA @@ -1442,7 +1448,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142802 +# Total code points: 142810 # ================================================ @@ -3431,6 +3437,9 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11A59..11A5B ; Case_Ignorable # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Case_Ignorable # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Case_Ignorable # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Case_Ignorable # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; Case_Ignorable # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Case_Ignorable # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Case_Ignorable # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Case_Ignorable # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Case_Ignorable # Mn BHAIKSUKI SIGN VIRAMA @@ -3509,7 +3518,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2782 +# Total code points: 2787 # ================================================ @@ -8139,6 +8148,12 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; ID_Continue # Mn SHARADA VOWEL SIGN OE +11B61 ; ID_Continue # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; ID_Continue # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; ID_Continue # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; ID_Continue # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; ID_Continue # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L @@ -8387,7 +8402,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144613 +# Total code points: 144621 # ================================================ @@ -10329,6 +10344,12 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; XID_Continue # Mn SHARADA VOWEL SIGN OE +11B61 ; XID_Continue # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; XID_Continue # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; XID_Continue # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; XID_Continue # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; XID_Continue # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L @@ -10577,7 +10598,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144594 +# Total code points: 144602 # ================================================ @@ -10986,6 +11007,9 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11A59..11A5B ; Grapheme_Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Grapheme_Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Grapheme_Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Grapheme_Extend # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; Grapheme_Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Grapheme_Extend # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Grapheme_Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Grapheme_Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Grapheme_Extend # Mn BHAIKSUKI SIGN VIRAMA @@ -11050,7 +11074,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2224 +# Total code points: 2229 # ================================================ @@ -12540,6 +12564,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 11A9E..11AA2 ; Grapheme_Base # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; Grapheme_Base # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; Grapheme_Base # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B61 ; Grapheme_Base # Mc SHARADA VOWEL SIGN OOE +11B65 ; Grapheme_Base # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; Grapheme_Base # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; Grapheme_Base # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; Grapheme_Base # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; Grapheme_Base # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -12837,7 +12864,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152806 +# Total code points: 152809 # ================================================ @@ -13319,6 +13346,9 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 11A59..11A5B ; InCB; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; InCB; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; InCB; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; InCB; Extend # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; InCB; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; InCB; Extend # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; InCB; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; InCB; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; InCB; Extend # Mn BHAIKSUKI SIGN VIRAMA @@ -13384,6 +13414,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2223 +# Total code points: 2228 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index abf5bdbf8..d3582c914 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 16:08:21 GMT +# Date: 2024-11-14, 18:38:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2240,6 +2240,12 @@ FFFD ; A # So REPLACEMENT CHARACTER 11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; N # Mn SHARADA VOWEL SIGN OE +11B61 ; N # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; N # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; N # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; N # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; N # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; N # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index de2390ff4..b71569272 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ -# IndicPositionalCategory-16.0.0.txt -# Date: 2024-06-06, 09:37:46 GMT +# IndicPositionalCategory-17.0.0.txt +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -297,6 +297,9 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 11A39 ; Right # Mc ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; Right # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; Right # Mc SOYOMBO SIGN VISARGA +11B61 ; Right # Mc SHARADA VOWEL SIGN OOE +11B65 ; Right # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; Right # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Right # Mc BHAIKSUKI VOWEL SIGN AA 11C3E ; Right # Mc BHAIKSUKI SIGN VISARGA 11CA9 ; Right # Mc MARCHEN SUBJOINED LETTER YA @@ -594,6 +597,9 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11A84..11A89 ; Top # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A96 ; Top # Mn SOYOMBO SIGN ANUSVARA 11A98 ; Top # Mn SOYOMBO GEMINATION MARK +11B60 ; Top # Mn SHARADA VOWEL SIGN OE +11B64 ; Top # Mn SHARADA VOWEL SIGN SHORT E +11B66 ; Top # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C31 ; Top # Mn [2] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN II 11C38..11C3D ; Top # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11CB3 ; Top # Mn MARCHEN VOWEL SIGN E @@ -755,6 +761,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11A52..11A53 ; Bottom # Mn [2] SOYOMBO VOWEL SIGN UE..SOYOMBO VOWEL SIGN U 11A59..11A5B ; Bottom # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A95 ; Bottom # Mn [12] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO FINAL CONSONANT SIGN -A +11B62..11B63 ; Bottom # Mn [2] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN UUE 11C32..11C36 ; Bottom # Mn [5] BHAIKSUKI VOWEL SIGN U..BHAIKSUKI VOWEL SIGN VOCALIC L 11C3F ; Bottom # Mn BHAIKSUKI SIGN VIRAMA 11C92..11CA7 ; Bottom # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 90b00e470..6c068793d 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-17.0.0.txt -# Date: 2024-11-13, 16:22:02 GMT +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -729,6 +729,12 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 11A51..11A56 ; Vowel_Dependent # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE 11A57..11A58 ; Vowel_Dependent # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A59..11A5B ; Vowel_Dependent # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11B60 ; Vowel_Dependent # Mn SHARADA VOWEL SIGN OE +11B61 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Vowel_Dependent # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Vowel_Dependent # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Vowel_Dependent # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Vowel_Dependent # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3B ; Vowel_Dependent # Mn [4] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI VOWEL SIGN AU diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index e1f790790..6cedc4b29 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 16:08:22 GMT +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3121,6 +3121,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11AB0..11ABF ; AL # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; BB # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; CM # Mn SHARADA VOWEL SIGN OE +11B61 ; CM # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; CM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; CM # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; CM # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; CM # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; AL # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 545d02c88..d41c02c99 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 16:08:35 GMT +# Date: 2024-11-14, 18:38:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -809,6 +809,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11B60 ; Other_Alphabetic # Mn SHARADA VOWEL SIGN OE +11B61 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Other_Alphabetic # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Other_Alphabetic # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -858,7 +864,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1497 +# Total code points: 1505 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 5c447325b..aee447d3e 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-14, 15:13:47 GMT +# Date: 2024-11-14, 18:38:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -420,6 +420,7 @@ blk; Runic ; Runic blk; Samaritan ; Samaritan blk; Saurashtra ; Saurashtra blk; Sharada ; Sharada +blk; Sharada_Sup ; Sharada_Supplement blk; Shavian ; Shavian blk; Shorthand_Format_Controls ; Shorthand_Format_Controls blk; Siddham ; Siddham diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index e71bbcbdd..90997ecbf 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 16:08:54 GMT +# Date: 2024-11-14, 18:38:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2351,8 +2351,14 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 111DB ; Sharada # Po SHARADA SIGN SIDDHAM 111DC ; Sharada # Lo SHARADA HEADSTROKE 111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 - -# Total code points: 96 +11B60 ; Sharada # Mn SHARADA VOWEL SIGN OE +11B61 ; Sharada # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Sharada # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Sharada # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Sharada # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Sharada # Mc SHARADA VOWEL SIGN CANDRA O + +# Total code points: 104 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ab7fed94b..fcf03d716 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -21628,6 +21628,14 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11B07;DEVANAGARI SIGN WESTERN NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B08;DEVANAGARI SIGN REVERSED NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B09;DEVANAGARI SIGN MINDU;Po;0;L;;;;;N;;;;; +11B60;SHARADA VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;; +11B61;SHARADA VOWEL SIGN OOE;Mc;0;L;;;;;N;;;;; +11B62;SHARADA VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;; +11B63;SHARADA VOWEL SIGN UUE;Mn;0;NSM;;;;;N;;;;; +11B64;SHARADA VOWEL SIGN SHORT E;Mn;0;NSM;;;;;N;;;;; +11B65;SHARADA VOWEL SIGN SHORT O;Mc;0;L;;;;;N;;;;; +11B66;SHARADA VOWEL SIGN CANDRA E;Mn;0;NSM;;;;;N;;;;; +11B67;SHARADA VOWEL SIGN CANDRA O;Mc;0;L;;;;;N;;;;; 11BC0;SUNUWAR LETTER DEVI;Lo;0;L;;;;;N;;;;; 11BC1;SUNUWAR LETTER TASLA;Lo;0;L;;;;;N;;;;; 11BC2;SUNUWAR LETTER EKO;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 923ed7072..cc5f22a4e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 16:08:57 GMT +# Date: 2024-11-14, 18:38:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2065,6 +2065,12 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11AB0..11ABF ; U # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; R # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; R # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; R # Mn SHARADA VOWEL SIGN OE +11B61 ; R # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; R # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; R # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; R # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; R # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; R # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; R # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; R # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 790beba09..38d8320b2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-11-14, 16:08:22 GMT +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -431,6 +431,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA @@ -496,7 +499,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2229 +# Total code points: 2234 # ================================================ @@ -647,6 +650,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA +11B61 ; SpacingMark # Mc SHARADA VOWEL SIGN OOE +11B65 ; SpacingMark # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; SpacingMark # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA 11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA 11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA @@ -662,7 +668,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -# Total code points: 378 +# Total code points: 381 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 4f2e7dbd9..ea5954271 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 16:08:55 GMT +# Date: 2024-11-14, 18:38:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -509,6 +509,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Extend # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B61 ; Extend # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Extend # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Extend # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -587,7 +593,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2632 +# Total code points: 2640 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index b97cc34ed..df639a405 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 16:08:58 GMT +# Date: 2024-11-14, 18:38:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -545,6 +545,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Extend # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B61 ; Extend # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Extend # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Extend # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -624,7 +630,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2636 +# Total code points: 2644 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a3f88f639..e4ed9cadf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 16:08:12 GMT +# Date: 2024-11-14, 18:37:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1010,6 +1010,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B61 ; L # Mc SHARADA VOWEL SIGN OOE +11B65 ; L # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; L # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; L # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -1213,8 +1216,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815313 code points not listed here. -# Total code points: 1095483 +# The above property value applies to 815305 code points not listed here. +# Total code points: 1095478 # ================================================ @@ -2354,6 +2357,9 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; NSM # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; NSM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; NSM # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA @@ -2412,7 +2418,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2059 +# Total code points: 2064 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 4ded0a6d3..d9a17a78c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 16:08:14 GMT +# Date: 2024-11-14, 18:37:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1724,6 +1724,12 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11A9E..11AA2 ; 0 # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; 0 # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; 0 # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; 0 # Mn SHARADA VOWEL SIGN OE +11B61 ; 0 # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; 0 # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; 0 # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; 0 # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; 0 # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; 0 # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; 0 # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; 0 # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -2064,7 +2070,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821474 code points not listed here. +# The above property value applies to 821466 code points not listed here. # Total code points: 1113149 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 0296ddd35..3c0cb2d75 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 16:08:16 GMT +# Date: 2024-11-14, 18:38:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1759,6 +1759,12 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; N # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; N # Mn SHARADA VOWEL SIGN OE +11B61 ; N # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; N # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; N # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; N # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; N # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; N # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -2108,7 +2114,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760992 code points not listed here. +# The above property value applies to 760984 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2afe33131..145d3bc46 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 16:08:17 GMT +# Date: 2024-11-14, 18:38:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -516,7 +516,8 @@ FFFE..FFFF ; Cn # [2] .. 11A48..11A4F ; Cn # [8] .. 11AA3..11AAF ; Cn # [13] .. 11AF9..11AFF ; Cn # [7] .. -11B0A..11BBF ; Cn # [182] .. +11B0A..11B5F ; Cn # [86] .. +11B68..11BBF ; Cn # [88] .. 11BE2..11BEF ; Cn # [14] .. 11BFA..11BFF ; Cn # [6] .. 11C09 ; Cn # @@ -741,7 +742,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819426 +# Total code points: 819418 # ================================================ @@ -3016,6 +3017,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11A59..11A5B ; Mn # [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Mn # [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Mn # [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Mn # SHARADA VOWEL SIGN OE +11B62..11B64 ; Mn # [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Mn # SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Mn # [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Mn # [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Mn # BHAIKSUKI SIGN VIRAMA @@ -3075,7 +3079,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2051 +# Total code points: 2056 # ================================================ @@ -3265,6 +3269,9 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11A39 ; Mc # ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; Mc # [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; Mc # SOYOMBO SIGN VISARGA +11B61 ; Mc # SHARADA VOWEL SIGN OOE +11B65 ; Mc # SHARADA VOWEL SIGN SHORT O +11B67 ; Mc # SHARADA VOWEL SIGN CANDRA O 11C2F ; Mc # BHAIKSUKI VOWEL SIGN AA 11C3E ; Mc # BHAIKSUKI SIGN VISARGA 11CA9 ; Mc # MARCHEN SUBJOINED LETTER YA @@ -3284,7 +3291,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 468 +# Total code points: 471 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 34f4d6bbb..9560720aa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 16:08:18 GMT +# Date: 2024-11-14, 18:38:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -519,6 +519,9 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 11A59..11A5B ; T # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; T # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; T # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; T # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; T # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; T # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; T # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; T # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; T # Mn BHAIKSUKI SIGN VIRAMA @@ -584,6 +587,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2216 +# Total code points: 2221 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c330cbf10..45434e051 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 16:08:18 GMT +# Date: 2024-11-14, 18:38:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757546 code points not listed here. -# Total code points: 895014 +# The above property value applies to 757538 code points not listed here. +# Total code points: 895006 # ================================================ @@ -2313,6 +2313,12 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11A8A..11A96 ; CM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; CM # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; CM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; CM # Mn SHARADA VOWEL SIGN OE +11B61 ; CM # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; CM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; CM # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; CM # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; CM # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; CM # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; CM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; CM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -2392,7 +2398,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2500 +# Total code points: 2508 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index a2098a1a7..006451cad 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 16:08:19 GMT +# Date: 2024-11-14, 18:38:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -32293,6 +32293,14 @@ FFFD ; REPLACEMENT CHARACTER 11B07 ; DEVANAGARI SIGN WESTERN NINE-LIKE BHALE 11B08 ; DEVANAGARI SIGN REVERSED NINE-LIKE BHALE 11B09 ; DEVANAGARI SIGN MINDU +11B60 ; SHARADA VOWEL SIGN OE +11B61 ; SHARADA VOWEL SIGN OOE +11B62 ; SHARADA VOWEL SIGN UE +11B63 ; SHARADA VOWEL SIGN UUE +11B64 ; SHARADA VOWEL SIGN SHORT E +11B65 ; SHARADA VOWEL SIGN SHORT O +11B66 ; SHARADA VOWEL SIGN CANDRA E +11B67 ; SHARADA VOWEL SIGN CANDRA O 11BC0 ; SUNUWAR LETTER DEVI 11BC1 ; SUNUWAR LETTER TASLA 11BC2 ; SUNUWAR LETTER EKO @@ -45474,6 +45482,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155105 +# Total code points: 155113 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index b41d852c9..8e497e96f 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -445,6 +445,7 @@ public enum Block_Values implements Named { Samaritan("Samaritan"), Saurashtra("Saurashtra"), Sharada("Sharada"), + Sharada_Supplement("Sharada_Sup"), Shavian("Shavian"), Shorthand_Format_Controls("Shorthand_Format_Controls"), Siddham("Siddham"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index c1800d243..bf082107a 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -264,6 +264,7 @@ Runic ; Runic Samaritan ; Samaritan Saurashtra ; Saurashtra Sharada ; Sharada +Sharada_Sup ; Sharada_Supplement Shavian ; Shavian Shorthand_Format_Controls ; Shorthand_Format_Controls Siddham ; Siddham From 6b9f06a685732a14d5a69bbeb12743f1b103f477 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 21:17:45 +0100 Subject: [PATCH 24/38] Chisoi (#759) * UnicodeData.txt lines from L2/22-218R3 * LineBreak.txt lines from L2/22-218R3 * Scripts.txt * new block * Danda scx * Alphabetic for the anusvara, Diacritic for the virama * Regenerate UCD * GenerateEnums * mind the gap * Regenerate UCD * InMeowC from KenW * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 49 ++++++++++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 ++- .../data/ucd/dev/IndicPositionalCategory.txt | 4 +- .../data/ucd/dev/IndicSyllabicCategory.txt | 18 ++++++- unicodetools/data/ucd/dev/LineBreak.txt | 7 ++- .../data/ucd/dev/NormalizationTest.txt | 4 +- unicodetools/data/ucd/dev/PropList.txt | 8 +-- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- .../data/ucd/dev/ScriptExtensions.txt | 6 +-- unicodetools/data/ucd/dev/Scripts.txt | 12 ++++- unicodetools/data/ucd/dev/UnicodeData.txt | 40 +++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 7 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++- .../dev/auxiliary/SentenceBreakProperty.txt | 13 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 13 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 13 +++-- .../dev/extracted/DerivedCombiningClass.txt | 13 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 9 +++- .../dev/extracted/DerivedGeneralCategory.txt | 19 ++++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++- .../ucd/dev/extracted/DerivedLineBreak.txt | 17 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 44 ++++++++++++++++- .../ucd/dev/extracted/DerivedNumericType.txt | 7 +-- .../dev/extracted/DerivedNumericValues.txt | 34 ++++++++----- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 28 files changed, 292 insertions(+), 78 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 13a98f8d0..99383f375 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -305,6 +305,7 @@ FFF0..FFFF; Specials 16AD0..16AFF; Bassa Vah 16B00..16B8F; Pahawh Hmong 16D40..16D7F; Kirat Rai +16D80..16DAF; Chisoi 16E40..16E9F; Medefaidrin 16F00..16F9F; Miao 16FE0..16FFF; Ideographic Symbols and Punctuation diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 6eba838ff..d0731ac36 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 18:37:36 GMT +# Date: 2024-11-14, 19:48:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2085,7 +2085,9 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O +16D80..16D9D ; 17.0 # [30] CHISOI LETTER A..CHISOI SIGN SISO +16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE -# Total code points: 115 +# Total code points: 155 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c46405c3a..e901a471d 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 18:38:00 GMT +# Date: 2024-11-14, 19:48:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1317,6 +1317,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16D40..16D42 ; Alphabetic # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; Alphabetic # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; Alphabetic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; Alphabetic # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; Alphabetic # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; Alphabetic # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -1448,7 +1451,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142810 +# Total code points: 142839 # ================================================ @@ -3471,6 +3474,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Case_Ignorable # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Case_Ignorable # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D98 ; Case_Ignorable # Mn CHISOI SIGN ANUSVARA +16D9D ; Case_Ignorable # Mn CHISOI SIGN SISO 16F4F ; Case_Ignorable # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -3518,7 +3523,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2787 +# Total code points: 2789 # ================================================ @@ -6870,6 +6875,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; ID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; ID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; ID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; ID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION @@ -6986,7 +6993,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141310 +# Total code points: 141338 # ================================================ @@ -8243,6 +8250,11 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16D43..16D6A ; ID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D70..16D79 ; ID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; ID_Continue # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; ID_Continue # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; ID_Continue # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; ID_Continue # Mn CHISOI SIGN SISO +16DA0..16DA9 ; ID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -8402,7 +8414,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144621 +# Total code points: 144661 # ================================================ @@ -9065,6 +9077,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16D40..16D42 ; XID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; XID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; XID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; XID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; XID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION @@ -9181,7 +9195,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141287 +# Total code points: 141315 # ================================================ @@ -10439,6 +10453,11 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16D43..16D6A ; XID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; XID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D70..16D79 ; XID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; XID_Continue # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; XID_Continue # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; XID_Continue # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; XID_Continue # Mn CHISOI SIGN SISO +16DA0..16DA9 ; XID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -10598,7 +10617,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144602 +# Total code points: 144642 # ================================================ @@ -11038,6 +11057,8 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 1612D..1612F ; Grapheme_Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Grapheme_Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Grapheme_Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Grapheme_Extend # Mn CHISOI SIGN SISO 16F4F ; Grapheme_Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Grapheme_Extend # Mn KHITAN SMALL SCRIPT FILLER @@ -11074,7 +11095,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2229 +# Total code points: 2231 # ================================================ @@ -12649,6 +12670,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16D6B..16D6C ; Grapheme_Base # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; Grapheme_Base # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; Grapheme_Base # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; Grapheme_Base # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; Grapheme_Base # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16DA0..16DA9 ; Grapheme_Base # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; Grapheme_Base # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; Grapheme_Base # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; Grapheme_Base # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH @@ -12864,7 +12888,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152809 +# Total code points: 152847 # ================================================ @@ -12936,8 +12960,9 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 11F41 ; Grapheme_Link # Mc KAWI SIGN KILLER 11F42 ; Grapheme_Link # Mn KAWI CONJOINER 1612F ; Grapheme_Link # Mn GURUNG KHEMA SIGN THOLHOMA +16D9D ; Grapheme_Link # Mn CHISOI SIGN SISO -# Total code points: 69 +# Total code points: 70 # ================================================ @@ -13377,6 +13402,8 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 1612D..1612F ; InCB; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; InCB; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; InCB; Extend # Mn CHISOI SIGN SISO 16F4F ; InCB; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; InCB; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; InCB; Extend # Mn KHITAN SMALL SCRIPT FILLER @@ -13414,6 +13441,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2228 +# Total code points: 2230 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index d3582c914..034acfc2c 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 18:38:06 GMT +# Date: 2024-11-14, 19:48:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2357,6 +2357,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 16D6B..16D6C ; N # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; N # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; N # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; N # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; N # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; N # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; N # Mn CHISOI SIGN SISO +16DA0..16DA9 ; N # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index b71569272..876f0114c 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-17.0.0.txt -# Date: 2024-11-14, 18:38:07 GMT +# Date: 2024-11-14, 19:48:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -619,6 +619,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F5A ; Top # Mn KAWI SIGN NUKTA 1611E..16129 ; Top # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK 1612D ; Top # Mn GURUNG KHEMA SIGN ANUSVARA +16D98 ; Top # Mn CHISOI SIGN ANUSVARA # Indic_Positional_Category=Bottom @@ -774,6 +775,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11EF4 ; Bottom # Mn MAKASAR VOWEL SIGN U 11F38..11F3A ; Bottom # Mn [3] KAWI VOWEL SIGN U..KAWI VOWEL SIGN VOCALIC R 1612E..1612F ; Bottom # Mn [2] GURUNG KHEMA CONSONANT SIGN MEDIAL RA..GURUNG KHEMA SIGN THOLHOMA +16D9D ; Bottom # Mn CHISOI SIGN SISO # Indic_Positional_Category=Top_And_Bottom diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 6c068793d..ad5958461 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-17.0.0.txt -# Date: 2024-11-14, 18:38:07 GMT +# Date: 2024-11-14, 19:48:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -140,6 +140,7 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11F00..11F01 ; Bindu # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA 1612D ; Bindu # Mn GURUNG KHEMA SIGN ANUSVARA 16D40..16D41 ; Bindu # Lm [2] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN TONPI +16D98 ; Bindu # Mn CHISOI SIGN ANUSVARA # ================================================ @@ -338,6 +339,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 11F41 ; Pure_Killer # Mc KAWI SIGN KILLER 1612F ; Pure_Killer # Mn GURUNG KHEMA SIGN THOLHOMA 16D6B..16D6C ; Pure_Killer # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D9D ; Pure_Killer # Mn CHISOI SIGN SISO # ================================================ @@ -773,6 +775,12 @@ A866 ; Vowel # Lo PHAGS-PA LETTER EE A922..A925 ; Vowel # Lo [4] KAYAH LI LETTER A..KAYAH LI LETTER OO A926..A92A ; Vowel # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O 11150..11154 ; Vowel # Lo [5] MAHAJANI LETTER A..MAHAJANI LETTER O +16D80 ; Vowel # Lo CHISOI LETTER A +16D82..16D83 ; Vowel # Lo [2] CHISOI LETTER AI..CHISOI LETTER AA +16D86 ; Vowel # Lo CHISOI LETTER E +16D89 ; Vowel # Lo CHISOI LETTER I +16D8F ; Vowel # Lo CHISOI LETTER U +16D92 ; Vowel # Lo CHISOI LETTER O # ================================================ @@ -972,6 +980,13 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11F12..11F33 ; Consonant # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 16101..1611D ; Consonant # Lo [29] GURUNG KHEMA LETTER KA..GURUNG KHEMA LETTER SA 16D43..16D62 ; Consonant # Lo [32] KIRAT RAI LETTER A..KIRAT RAI LETTER HA +16D81 ; Consonant # Lo CHISOI LETTER BA +16D84..16D85 ; Consonant # Lo [2] CHISOI LETTER GA..CHISOI LETTER TA +16D87..16D88 ; Consonant # Lo [2] CHISOI LETTER SA..CHISOI LETTER NA +16D8A..16D8E ; Consonant # Lo [5] CHISOI LETTER KA..CHISOI LETTER RRA +16D90..16D91 ; Consonant # Lo [2] CHISOI LETTER DA..CHISOI LETTER LA +16D93..16D97 ; Consonant # Lo [5] CHISOI LETTER NYA..CHISOI LETTER PA +16D99..16D9C ; Consonant # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA # ================================================ @@ -1386,6 +1401,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 11F50..11F59 ; Number # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Number # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16D70..16D79 ; Number # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Number # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 6cedc4b29..75368d39d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 18:38:07 GMT +# Date: 2024-11-14, 19:48:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3267,6 +3267,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16D6D ; AL # Po KIRAT RAI SIGN YUPI 16D6E..16D6F ; BA # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; NU # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; AL # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; CM # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; AL # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; CM # Mn CHISOI SIGN SISO +16DA0..16DA9 ; NU # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E98 ; BA # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index c5e7583c7..d509d5415 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-17.0.0.txt -# Date: 2024-11-14, 16:08:28 GMT +# Date: 2024-11-14, 19:48:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18891,6 +18891,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 16B35 0315 0300 05AE 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062; # (a◌𖬵◌̕◌̀◌֮b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; ) LATIN SMALL LETTER A, PAHAWH HMONG MARK CIM HOM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 16B36 0062;00E0 05AE 16B36 0315 0062;0061 05AE 0300 16B36 0315 0062;00E0 05AE 16B36 0315 0062;0061 05AE 0300 16B36 0315 0062; # (a◌̕◌̀◌֮◌𖬶b; à◌֮◌𖬶◌̕b; a◌֮◌̀◌𖬶◌̕b; à◌֮◌𖬶◌̕b; a◌֮◌̀◌𖬶◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, PAHAWH HMONG MARK CIM TAUM, LATIN SMALL LETTER B 0061 16B36 0315 0300 05AE 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062; # (a◌𖬶◌̕◌̀◌֮b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; ) LATIN SMALL LETTER A, PAHAWH HMONG MARK CIM TAUM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 05B0 094D 3099 16D9D 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062; # (a◌ְ◌्◌゙◌𖶝b; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, CHISOI SIGN SISO, LATIN SMALL LETTER B +0061 16D9D 05B0 094D 3099 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062; # (a◌𖶝◌ְ◌्◌゙b; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; ) LATIN SMALL LETTER A, CHISOI SIGN SISO, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 093C 16FF0 0334 16FF0 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062; # (a◌𖿰़◌̴𖿰b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, VIETNAMESE ALTERNATE READING MARK CA, LATIN SMALL LETTER B 0061 16FF0 093C 16FF0 0334 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062; # (a𖿰◌𖿰़◌̴b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; ) LATIN SMALL LETTER A, VIETNAMESE ALTERNATE READING MARK CA, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 093C 16FF0 0334 16FF1 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062; # (a◌𖿰़◌̴𖿱b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, VIETNAMESE ALTERNATE READING MARK NHAY, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index d41c02c99..ac90e4739 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 18:38:19 GMT +# Date: 2024-11-14, 19:48:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -848,6 +848,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK 1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA +16D98 ; Other_Alphabetic # Mn CHISOI SIGN ANUSVARA 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -864,7 +865,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1505 +# Total code points: 1506 # ================================================ @@ -1138,6 +1139,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16D6B..16D6C ; Diacritic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D9D ; Diacritic # Mn CHISOI SIGN SISO 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY @@ -1160,7 +1162,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1209 +# Total code points: 1210 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index aee447d3e..5624118c0 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-14, 18:38:21 GMT +# Date: 2024-11-14, 19:48:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -197,6 +197,7 @@ blk; Cham ; Cham blk; Cherokee ; Cherokee blk; Cherokee_Sup ; Cherokee_Supplement blk; Chess_Symbols ; Chess_Symbols +blk; Chisoi ; Chisoi blk; Chorasmian ; Chorasmian blk; CJK ; CJK_Unified_Ideographs blk; CJK_Compat ; CJK_Compatibility @@ -1334,6 +1335,7 @@ sc ; Cans ; Canadian_Aboriginal sc ; Cari ; Carian sc ; Cham ; Cham sc ; Cher ; Cherokee +sc ; Chis ; Chisoi sc ; Chrs ; Chorasmian sc ; Copt ; Coptic ; Qaac sc ; Cpmn ; Cypro_Minoan diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 27e16cbe4..f63e2cd5f 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ # ScriptExtensions-17.0.0.txt -# Date: 2024-10-16, 17:28:17 GMT +# Date: 2024-11-14, 19:49:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -81,8 +81,8 @@ 06D4 ; Arab Rohg # Po ARABIC FULL STOP 0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh #Mn DEVANAGARI STRESS SIGN UDATTA 0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh #Mn DEVANAGARI STRESS SIGN ANUDATTA -0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DANDA -0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DOUBLE DANDA +0964 ; Beng Chis Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DANDA +0965 ; Beng Chis Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DOUBLE DANDA 0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE 0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 90997ecbf..8aa5b147f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 18:38:37 GMT +# Date: 2024-11-14, 19:49:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3137,6 +3137,16 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ +16D80..16D97 ; Chisoi # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; Chisoi # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; Chisoi # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; Chisoi # Mn CHISOI SIGN SISO +16DA0..16DA9 ; Chisoi # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE + +# Total code points: 40 + +# ================================================ + 10940..1095C ; Sidetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 # Total code points: 29 diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index fcf03d716..1785a773b 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30148,6 +30148,46 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16D77;KIRAT RAI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 16D78;KIRAT RAI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 16D79;KIRAT RAI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +16D80;CHISOI LETTER A;Lo;0;L;;;;;N;;;;; +16D81;CHISOI LETTER BA;Lo;0;L;;;;;N;;;;; +16D82;CHISOI LETTER AI;Lo;0;L;;;;;N;;;;; +16D83;CHISOI LETTER AA;Lo;0;L;;;;;N;;;;; +16D84;CHISOI LETTER GA;Lo;0;L;;;;;N;;;;; +16D85;CHISOI LETTER TA;Lo;0;L;;;;;N;;;;; +16D86;CHISOI LETTER E;Lo;0;L;;;;;N;;;;; +16D87;CHISOI LETTER SA;Lo;0;L;;;;;N;;;;; +16D88;CHISOI LETTER NA;Lo;0;L;;;;;N;;;;; +16D89;CHISOI LETTER I;Lo;0;L;;;;;N;;;;; +16D8A;CHISOI LETTER KA;Lo;0;L;;;;;N;;;;; +16D8B;CHISOI LETTER RA;Lo;0;L;;;;;N;;;;; +16D8C;CHISOI LETTER MA;Lo;0;L;;;;;N;;;;; +16D8D;CHISOI LETTER HA;Lo;0;L;;;;;N;;;;; +16D8E;CHISOI LETTER RRA;Lo;0;L;;;;;N;;;;; +16D8F;CHISOI LETTER U;Lo;0;L;;;;;N;;;;; +16D90;CHISOI LETTER DA;Lo;0;L;;;;;N;;;;; +16D91;CHISOI LETTER LA;Lo;0;L;;;;;N;;;;; +16D92;CHISOI LETTER O;Lo;0;L;;;;;N;;;;; +16D93;CHISOI LETTER NYA;Lo;0;L;;;;;N;;;;; +16D94;CHISOI LETTER NGA;Lo;0;L;;;;;N;;;;; +16D95;CHISOI LETTER CA;Lo;0;L;;;;;N;;;;; +16D96;CHISOI LETTER JA;Lo;0;L;;;;;N;;;;; +16D97;CHISOI LETTER PA;Lo;0;L;;;;;N;;;;; +16D98;CHISOI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +16D99;CHISOI LETTER YA;Lo;0;L;;;;;N;;;;; +16D9A;CHISOI LETTER DDA;Lo;0;L;;;;;N;;;;; +16D9B;CHISOI LETTER TTA;Lo;0;L;;;;;N;;;;; +16D9C;CHISOI LETTER JARAHA;Lo;0;L;;;;;N;;;;; +16D9D;CHISOI SIGN SISO;Mn;9;NSM;;;;;N;;;;; +16DA0;CHISOI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +16DA1;CHISOI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +16DA2;CHISOI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +16DA3;CHISOI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +16DA4;CHISOI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +16DA5;CHISOI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +16DA6;CHISOI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +16DA7;CHISOI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +16DA8;CHISOI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +16DA9;CHISOI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 16E40;MEDEFAIDRIN CAPITAL LETTER M;Lu;0;L;;;;;N;;;;16E60; 16E41;MEDEFAIDRIN CAPITAL LETTER S;Lu;0;L;;;;;N;;;;16E61; 16E42;MEDEFAIDRIN CAPITAL LETTER V;Lu;0;L;;;;;N;;;;16E62; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index cc5f22a4e..f7589f767 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 18:38:40 GMT +# Date: 2024-11-14, 19:49:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2185,6 +2185,11 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16D6B..16D6C ; R # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; R # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; R # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; R # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; R # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; R # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; R # Mn CHISOI SIGN SISO +16DA0..16DA9 ; R # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; R # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; R # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; R # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 38d8320b2..48351bad9 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-11-14, 18:38:07 GMT +# Date: 2024-11-14, 19:48:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -462,6 +462,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER @@ -499,7 +501,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2234 +# Total code points: 2236 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ea5954271..848e6705c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 18:38:38 GMT +# Date: 2024-11-14, 19:49:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -556,6 +556,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -593,7 +595,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2640 +# Total code points: 2642 # ================================================ @@ -2520,6 +2522,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; OLetter # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; OLetter # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; OLetter # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; OLetter # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; OLetter # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16F00..16F4A ; OLetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; OLetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -2600,7 +2604,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136945 +# Total code points: 136973 # ================================================ @@ -2675,6 +2679,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Numeric # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Numeric # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Numeric # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -2684,7 +2689,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 775 +# Total code points: 785 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index df639a405..b42092680 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 18:38:40 GMT +# Date: 2024-11-14, 19:49:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -592,6 +592,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -630,7 +632,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2644 +# Total code points: 2646 # ================================================ @@ -1265,6 +1267,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; ALetter # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; ALetter # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ALetter # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; ALetter # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; ALetter # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ALetter # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; ALetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ALetter # Lo MIAO LETTER NASALIZATION @@ -1363,7 +1367,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33832 +# Total code points: 33860 # ================================================ @@ -1481,6 +1485,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Numeric # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Numeric # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Numeric # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -1490,7 +1495,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 774 +# Total code points: 784 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index e4ed9cadf..6dee40eac 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 18:37:57 GMT +# Date: 2024-11-14, 19:48:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1095,6 +1095,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16D6B..16D6C ; L # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; L # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; L # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; L # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; L # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16DA0..16DA9 ; L # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH @@ -1216,8 +1219,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815305 code points not listed here. -# Total code points: 1095478 +# The above property value applies to 815265 code points not listed here. +# Total code points: 1095476 # ================================================ @@ -2386,6 +2389,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; NSM # Mn CHISOI SIGN ANUSVARA +16D9D ; NSM # Mn CHISOI SIGN SISO 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER @@ -2418,7 +2423,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2064 +# Total code points: 2066 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index d9a17a78c..7ef663a65 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 18:37:59 GMT +# Date: 2024-11-14, 19:48:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1836,6 +1836,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16D6B..16D6C ; 0 # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; 0 # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; 0 # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; 0 # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; 0 # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; 0 # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16DA0..16DA9 ; 0 # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; 0 # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; 0 # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; 0 # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH @@ -2070,8 +2074,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821466 code points not listed here. -# Total code points: 1113149 +# The above property value applies to 821426 code points not listed here. +# Total code points: 1113148 # ================================================ @@ -2208,8 +2212,9 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 11F41 ; 9 # Mc KAWI SIGN KILLER 11F42 ; 9 # Mn KAWI CONJOINER 1612F ; 9 # Mn GURUNG KHEMA SIGN THOLHOMA +16D9D ; 9 # Mn CHISOI SIGN SISO -# Total code points: 69 +# Total code points: 70 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 3c0cb2d75..0b91e12ca 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 18:38:02 GMT +# Date: 2024-11-14, 19:48:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1876,6 +1876,11 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16D6B..16D6C ; N # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; N # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; N # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; N # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; N # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; N # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; N # Mn CHISOI SIGN SISO +16DA0..16DA9 ; N # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH @@ -2114,7 +2119,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760984 code points not listed here. +# The above property value applies to 760944 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 145d3bc46..93cce79d6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 18:38:02 GMT +# Date: 2024-11-14, 19:48:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -567,7 +567,9 @@ FFFE..FFFF ; Cn # [2] .. 16B62 ; Cn # 16B78..16B7C ; Cn # [5] .. 16B90..16D3F ; Cn # [432] .. -16D7A..16E3F ; Cn # [198] .. +16D7A..16D7F ; Cn # [6] .. +16D9E..16D9F ; Cn # [2] .. +16DAA..16E3F ; Cn # [150] .. 16E9B..16EFF ; Cn # [101] .. 16F4B..16F4E ; Cn # [4] .. 16F88..16F8E ; Cn # [7] .. @@ -742,7 +744,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819418 +# Total code points: 819378 # ================================================ @@ -2640,6 +2642,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16B63..16B77 ; Lo # [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D80..16D97 ; Lo # [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; Lo # [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 @@ -2711,7 +2715,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136513 +# Total code points: 136541 # ================================================ @@ -3047,6 +3051,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1612D..1612F ; Mn # [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Mn # CHISOI SIGN ANUSVARA +16D9D ; Mn # CHISOI SIGN SISO 16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Mn # KHITAN SMALL SCRIPT FILLER @@ -3079,7 +3085,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2056 +# Total code points: 2058 # ================================================ @@ -3360,6 +3366,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Nd # [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Nd # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Nd # [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -3369,7 +3376,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 760 +# Total code points: 770 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 9560720aa..d505d6ad3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 18:38:03 GMT +# Date: 2024-11-14, 19:48:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -550,6 +550,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 1612D..1612F ; T # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; T # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; T # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; T # Mn CHISOI SIGN ANUSVARA +16D9D ; T # Mn CHISOI SIGN SISO 16F4F ; T # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; T # Mn KHITAN SMALL SCRIPT FILLER @@ -587,6 +589,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2221 +# Total code points: 2223 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 45434e051..8656e73c8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 18:38:04 GMT +# Date: 2024-11-14, 19:48:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757538 code points not listed here. -# Total code points: 895006 +# The above property value applies to 757498 code points not listed here. +# Total code points: 894966 # ================================================ @@ -548,6 +548,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 16AC0..16AC9 ; NU # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; NU # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; NU # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; NU # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; NU # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; NU # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; NU # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -557,7 +558,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 695 +# Total code points: 705 # ================================================ @@ -1442,6 +1443,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16D43..16D6A ; AL # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; AL # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D ; AL # Po KIRAT RAI SIGN YUPI +16D80..16D97 ; AL # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; AL # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH @@ -1617,7 +1620,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26754 +# Total code points: 26782 # ================================================ @@ -2359,6 +2362,8 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 1612D..1612F ; CM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; CM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; CM # Mn CHISOI SIGN ANUSVARA +16D9D ; CM # Mn CHISOI SIGN SISO 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; CM # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -2398,7 +2403,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2508 +# Total code points: 2510 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 006451cad..f127fc3d1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 18:38:04 GMT +# Date: 2024-11-14, 19:48:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36819,6 +36819,46 @@ FFFD ; REPLACEMENT CHARACTER 16D77 ; KIRAT RAI DIGIT SEVEN 16D78 ; KIRAT RAI DIGIT EIGHT 16D79 ; KIRAT RAI DIGIT NINE +16D80 ; CHISOI LETTER A +16D81 ; CHISOI LETTER BA +16D82 ; CHISOI LETTER AI +16D83 ; CHISOI LETTER AA +16D84 ; CHISOI LETTER GA +16D85 ; CHISOI LETTER TA +16D86 ; CHISOI LETTER E +16D87 ; CHISOI LETTER SA +16D88 ; CHISOI LETTER NA +16D89 ; CHISOI LETTER I +16D8A ; CHISOI LETTER KA +16D8B ; CHISOI LETTER RA +16D8C ; CHISOI LETTER MA +16D8D ; CHISOI LETTER HA +16D8E ; CHISOI LETTER RRA +16D8F ; CHISOI LETTER U +16D90 ; CHISOI LETTER DA +16D91 ; CHISOI LETTER LA +16D92 ; CHISOI LETTER O +16D93 ; CHISOI LETTER NYA +16D94 ; CHISOI LETTER NGA +16D95 ; CHISOI LETTER CA +16D96 ; CHISOI LETTER JA +16D97 ; CHISOI LETTER PA +16D98 ; CHISOI SIGN ANUSVARA +16D99 ; CHISOI LETTER YA +16D9A ; CHISOI LETTER DDA +16D9B ; CHISOI LETTER TTA +16D9C ; CHISOI LETTER JARAHA +16D9D ; CHISOI SIGN SISO +16DA0 ; CHISOI DIGIT ZERO +16DA1 ; CHISOI DIGIT ONE +16DA2 ; CHISOI DIGIT TWO +16DA3 ; CHISOI DIGIT THREE +16DA4 ; CHISOI DIGIT FOUR +16DA5 ; CHISOI DIGIT FIVE +16DA6 ; CHISOI DIGIT SIX +16DA7 ; CHISOI DIGIT SEVEN +16DA8 ; CHISOI DIGIT EIGHT +16DA9 ; CHISOI DIGIT NINE 16E40 ; MEDEFAIDRIN CAPITAL LETTER M 16E41 ; MEDEFAIDRIN CAPITAL LETTER S 16E42 ; MEDEFAIDRIN CAPITAL LETTER V @@ -45482,6 +45522,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155113 +# Total code points: 155153 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 8e48d0b85..720458961 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ -# DerivedNumericType-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# DerivedNumericType-17.0.0.txt +# Date: 2024-11-14, 19:48:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -284,6 +284,7 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Decimal # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Decimal # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Decimal # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -293,6 +294,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 760 +# Total code points: 770 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index ae1f99c0f..bb4d4cb05 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ -# DerivedNumericValues-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# DerivedNumericValues-17.0.0.txt +# Date: 2024-11-14, 19:48:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -116,6 +116,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 16AC0 ; 0.0 ; ; 0 # Nd TANGSA DIGIT ZERO 16B50 ; 0.0 ; ; 0 # Nd PAHAWH HMONG DIGIT ZERO 16D70 ; 0.0 ; ; 0 # Nd KIRAT RAI DIGIT ZERO +16DA0 ; 0.0 ; ; 0 # Nd CHISOI DIGIT ZERO 16E80 ; 0.0 ; ; 0 # No MEDEFAIDRIN DIGIT ZERO 1CCF0 ; 0.0 ; ; 0 # Nd OUTLINED DIGIT ZERO 1D2C0 ; 0.0 ; ; 0 # No KAKTOVIK NUMERAL ZERO @@ -134,7 +135,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 96 +# Total code points: 97 # ================================================ @@ -542,6 +543,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 16AC1 ; 1.0 ; ; 1 # Nd TANGSA DIGIT ONE 16B51 ; 1.0 ; ; 1 # Nd PAHAWH HMONG DIGIT ONE 16D71 ; 1.0 ; ; 1 # Nd KIRAT RAI DIGIT ONE +16DA1 ; 1.0 ; ; 1 # Nd CHISOI DIGIT ONE 16E81 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE 16E94 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE ALTERNATE FORM 1CCF1 ; 1.0 ; ; 1 # Nd OUTLINED DIGIT ONE @@ -569,7 +571,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 152 +# Total code points: 153 # ================================================ @@ -704,6 +706,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 16AC2 ; 2.0 ; ; 2 # Nd TANGSA DIGIT TWO 16B52 ; 2.0 ; ; 2 # Nd PAHAWH HMONG DIGIT TWO 16D72 ; 2.0 ; ; 2 # Nd KIRAT RAI DIGIT TWO +16DA2 ; 2.0 ; ; 2 # Nd CHISOI DIGIT TWO 16E82 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO 16E95 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO ALTERNATE FORM 1CCF2 ; 2.0 ; ; 2 # Nd OUTLINED DIGIT TWO @@ -731,7 +734,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 154 +# Total code points: 155 # ================================================ @@ -858,6 +861,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 16AC3 ; 3.0 ; ; 3 # Nd TANGSA DIGIT THREE 16B53 ; 3.0 ; ; 3 # Nd PAHAWH HMONG DIGIT THREE 16D73 ; 3.0 ; ; 3 # Nd KIRAT RAI DIGIT THREE +16DA3 ; 3.0 ; ; 3 # Nd CHISOI DIGIT THREE 16E83 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE 16E96 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 1CCF3 ; 3.0 ; ; 3 # Nd OUTLINED DIGIT THREE @@ -887,7 +891,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 152 +# Total code points: 153 # ================================================ @@ -1009,6 +1013,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 16AC4 ; 4.0 ; ; 4 # Nd TANGSA DIGIT FOUR 16B54 ; 4.0 ; ; 4 # Nd PAHAWH HMONG DIGIT FOUR 16D74 ; 4.0 ; ; 4 # Nd KIRAT RAI DIGIT FOUR +16DA4 ; 4.0 ; ; 4 # Nd CHISOI DIGIT FOUR 16E84 ; 4.0 ; ; 4 # No MEDEFAIDRIN DIGIT FOUR 1CCF4 ; 4.0 ; ; 4 # Nd OUTLINED DIGIT FOUR 1D2C4 ; 4.0 ; ; 4 # No KAKTOVIK NUMERAL FOUR @@ -1036,7 +1041,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 143 +# Total code points: 144 # ================================================ @@ -1161,6 +1166,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 16AC5 ; 5.0 ; ; 5 # Nd TANGSA DIGIT FIVE 16B55 ; 5.0 ; ; 5 # Nd PAHAWH HMONG DIGIT FIVE 16D75 ; 5.0 ; ; 5 # Nd KIRAT RAI DIGIT FIVE +16DA5 ; 5.0 ; ; 5 # Nd CHISOI DIGIT FIVE 16E85 ; 5.0 ; ; 5 # No MEDEFAIDRIN DIGIT FIVE 1CCF5 ; 5.0 ; ; 5 # Nd OUTLINED DIGIT FIVE 1D2C5 ; 5.0 ; ; 5 # No KAKTOVIK NUMERAL FIVE @@ -1187,7 +1193,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 141 +# Total code points: 142 # ================================================ @@ -1299,6 +1305,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 16AC6 ; 6.0 ; ; 6 # Nd TANGSA DIGIT SIX 16B56 ; 6.0 ; ; 6 # Nd PAHAWH HMONG DIGIT SIX 16D76 ; 6.0 ; ; 6 # Nd KIRAT RAI DIGIT SIX +16DA6 ; 6.0 ; ; 6 # Nd CHISOI DIGIT SIX 16E86 ; 6.0 ; ; 6 # No MEDEFAIDRIN DIGIT SIX 1CCF6 ; 6.0 ; ; 6 # Nd OUTLINED DIGIT SIX 1D2C6 ; 6.0 ; ; 6 # No KAKTOVIK NUMERAL SIX @@ -1323,7 +1330,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 125 +# Total code points: 126 # ================================================ @@ -1433,6 +1440,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 16AC7 ; 7.0 ; ; 7 # Nd TANGSA DIGIT SEVEN 16B57 ; 7.0 ; ; 7 # Nd PAHAWH HMONG DIGIT SEVEN 16D77 ; 7.0 ; ; 7 # Nd KIRAT RAI DIGIT SEVEN +16DA7 ; 7.0 ; ; 7 # Nd CHISOI DIGIT SEVEN 16E87 ; 7.0 ; ; 7 # No MEDEFAIDRIN DIGIT SEVEN 1CCF7 ; 7.0 ; ; 7 # Nd OUTLINED DIGIT SEVEN 1D2C7 ; 7.0 ; ; 7 # No KAKTOVIK NUMERAL SEVEN @@ -1457,7 +1465,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 125 +# Total code points: 126 # ================================================ @@ -1564,6 +1572,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 16AC8 ; 8.0 ; ; 8 # Nd TANGSA DIGIT EIGHT 16B58 ; 8.0 ; ; 8 # Nd PAHAWH HMONG DIGIT EIGHT 16D78 ; 8.0 ; ; 8 # Nd KIRAT RAI DIGIT EIGHT +16DA8 ; 8.0 ; ; 8 # Nd CHISOI DIGIT EIGHT 16E88 ; 8.0 ; ; 8 # No MEDEFAIDRIN DIGIT EIGHT 1CCF8 ; 8.0 ; ; 8 # Nd OUTLINED DIGIT EIGHT 1D2C8 ; 8.0 ; ; 8 # No KAKTOVIK NUMERAL EIGHT @@ -1587,7 +1596,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 120 +# Total code points: 121 # ================================================ @@ -1697,6 +1706,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 16AC9 ; 9.0 ; ; 9 # Nd TANGSA DIGIT NINE 16B59 ; 9.0 ; ; 9 # Nd PAHAWH HMONG DIGIT NINE 16D79 ; 9.0 ; ; 9 # Nd KIRAT RAI DIGIT NINE +16DA9 ; 9.0 ; ; 9 # Nd CHISOI DIGIT NINE 16E89 ; 9.0 ; ; 9 # No MEDEFAIDRIN DIGIT NINE 1CCF9 ; 9.0 ; ; 9 # Nd OUTLINED DIGIT NINE 1D2C9 ; 9.0 ; ; 9 # No KAKTOVIK NUMERAL NINE @@ -1721,7 +1731,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 126 +# Total code points: 127 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 8e497e96f..2b5d727e2 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -221,6 +221,7 @@ public enum Block_Values implements Named { Cherokee("Cherokee"), Cherokee_Supplement("Cherokee_Sup"), Chess_Symbols("Chess_Symbols"), + Chisoi("Chisoi"), Chorasmian("Chorasmian"), CJK_Unified_Ideographs("CJK"), CJK_Compatibility("CJK_Compat"), @@ -1864,6 +1865,7 @@ public enum Script_Values implements Named { Carian("Cari"), Cham("Cham"), Cherokee("Cher"), + Chisoi("Chis"), Chorasmian("Chrs"), Coptic("Copt", "Qaac"), Cypro_Minoan("Cpmn"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index bf082107a..4c5f819cc 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -42,6 +42,7 @@ Cham ; Cham Cherokee ; Cherokee Cherokee_Sup ; Cherokee_Supplement Chess_Symbols ; Chess_Symbols +Chisoi ; Chisoi Chorasmian ; Chorasmian CJK_Compat ; CJK_Compatibility CJK_Compat_Forms ; CJK_Compatibility_Forms From 35e96c6aeacff0bb9b1de30ad04831adfb8c9505 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 21:52:23 +0100 Subject: [PATCH 25/38] Beria Erfe (#661) * UnicodeData.txt lines from L2/24-004 * LineBreak.txt from L2/24-004 * Scripts.txt * block * Paging the ISO 15924 registrar * Regenerate UCD * GenerateEnums * Split cases * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/CaseFolding.txt | 27 ++++++++- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +- .../data/ucd/dev/DerivedCoreProperties.txt | 52 +++++++++++----- .../ucd/dev/DerivedNormalizationProps.txt | 59 +++++++++++++++++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +- unicodetools/data/ucd/dev/LineBreak.txt | 4 +- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- unicodetools/data/ucd/dev/Scripts.txt | 9 ++- unicodetools/data/ucd/dev/UnicodeData.txt | 50 ++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 8 ++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 +- .../dev/extracted/DerivedCombiningClass.txt | 6 +- .../dev/extracted/DerivedEastAsianWidth.txt | 6 +- .../dev/extracted/DerivedGeneralCategory.txt | 14 +++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 54 ++++++++++++++++- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 21 files changed, 284 insertions(+), 49 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 99383f375..94a188532 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -304,6 +304,7 @@ FFF0..FFFF; Specials 16A70..16ACF; Tangsa 16AD0..16AFF; Bassa Vah 16B00..16B8F; Pahawh Hmong +16EA0..16EDF; Beria Erfe 16D40..16D7F; Kirat Rai 16D80..16DAF; Chisoi 16E40..16E9F; Medefaidrin diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 533abab63..9e3c3320a 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ # CaseFolding-17.0.0.txt -# Date: 2024-11-13, 22:03:16 GMT +# Date: 2024-11-14, 20:19:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1619,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O 16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI 16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y +16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB +16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA +16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI +16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH +16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO +16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY +16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO +16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I +16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI +16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO +16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO +16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI +16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI +16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA +16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY +16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI +16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI +16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO +16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA +16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI +16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP +16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE +16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI +16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE +16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d0731ac36..d95507f85 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 19:48:01 GMT +# Date: 2024-11-14, 20:19:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2087,7 +2087,9 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O 16D80..16D9D ; 17.0 # [30] CHISOI LETTER A..CHISOI SIGN SISO 16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE +16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY -# Total code points: 155 +# Total code points: 205 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index e901a471d..afb8eb130 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 19:48:24 GMT +# Date: 2024-11-14, 20:20:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1321,6 +1321,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16D98 ; Alphabetic # Mn CHISOI SIGN ANUSVARA 16D99..16D9C ; Alphabetic # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Alphabetic # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Alphabetic # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION @@ -1451,7 +1453,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142839 +# Total code points: 142889 # ================================================ @@ -2116,6 +2118,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Lowercase # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Lowercase # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Lowercase # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -2150,7 +2153,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2570 +# Total code points: 2595 # ================================================ @@ -2781,6 +2784,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 10D50..10D65 ; Uppercase # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Uppercase # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Uppercase # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Uppercase # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -2817,7 +2821,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1981 +# Total code points: 2006 # ================================================ @@ -2960,6 +2964,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10D70..10D85 ; Cased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Cased # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Cased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Cased # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -2999,7 +3005,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4582 +# Total code points: 4632 # ================================================ @@ -4148,9 +4154,10 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Changes_When_Lowercased # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1463 +# Total code points: 1488 # ================================================ @@ -4791,9 +4798,10 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Changes_When_Uppercased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1555 +# Total code points: 1580 # ================================================ @@ -5433,9 +5441,10 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Changes_When_Titlecased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1482 +# Total code points: 1507 # ================================================ @@ -6072,9 +6081,10 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Changes_When_Casefolded # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1536 +# Total code points: 1561 # ================================================ @@ -6211,9 +6221,11 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Changes_When_Casemapped # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Changes_When_Casemapped # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2987 +# Total code points: 3037 # ================================================ @@ -6878,6 +6890,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D80..16D97 ; ID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA 16D99..16D9C ; ID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ID_Start # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ID_Start # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -6993,7 +7007,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141338 +# Total code points: 141388 # ================================================ @@ -8256,6 +8270,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16D9D ; ID_Continue # Mn CHISOI SIGN SISO 16DA0..16DA9 ; ID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ID_Continue # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ID_Continue # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION @@ -8414,7 +8430,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144661 +# Total code points: 144711 # ================================================ @@ -9080,6 +9096,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16D80..16D97 ; XID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA 16D99..16D9C ; XID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; XID_Start # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; XID_Start # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -9195,7 +9213,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141315 +# Total code points: 141365 # ================================================ @@ -10459,6 +10477,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16D9D ; XID_Continue # Mn CHISOI SIGN SISO 16DA0..16DA9 ; XID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; XID_Continue # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; XID_Continue # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION @@ -10617,7 +10637,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144642 +# Total code points: 144692 # ================================================ @@ -12676,6 +12696,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16E40..16E7F ; Grapheme_Base # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; Grapheme_Base # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; Grapheme_Base # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; Grapheme_Base # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Grapheme_Base # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; Grapheme_Base # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Grapheme_Base # Lo MIAO LETTER NASALIZATION 16F51..16F87 ; Grapheme_Base # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI @@ -12888,7 +12910,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152847 +# Total code points: 152897 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 743f1cdcb..e8bb30b2a 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-17.0.0.txt -# Date: 2024-11-13, 22:18:47 GMT +# Date: 2024-11-14, 20:20:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -7189,6 +7189,31 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... -# Total code points: 10558 +# Total code points: 10583 # ================================================ @@ -13327,6 +13352,31 @@ FFF0..FFF8 ; NFKC_SCF; # Cn [9] ...... -# Total code points: 10520 +# Total code points: 10545 # ================================================ @@ -16211,6 +16261,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] ...... -# Total code points: 10558 +# Total code points: 10583 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 034acfc2c..a16b79315 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 19:48:31 GMT +# Date: 2024-11-14, 20:20:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2365,6 +2365,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; N # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; N # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; N # Lo MIAO LETTER NASALIZATION diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 75368d39d..d0db8c4fc 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 19:48:32 GMT +# Date: 2024-11-14, 20:20:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3276,6 +3276,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E98 ; BA # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; AL # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; AL # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; AL # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; AL # Lo MIAO LETTER NASALIZATION diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 5624118c0..05a230765 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-14, 19:48:45 GMT +# Date: 2024-11-14, 20:20:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,6 +180,7 @@ blk; Bamum_Sup ; Bamum_Supplement blk; Bassa_Vah ; Bassa_Vah blk; Batak ; Batak blk; Bengali ; Bengali +blk; Beria_Erfe ; Beria_Erfe blk; Bhaiksuki ; Bhaiksuki blk; Block_Elements ; Block_Elements blk; Bopomofo ; Bopomofo @@ -1434,6 +1435,7 @@ sc ; Phlp ; Psalter_Pahlavi sc ; Phnx ; Phoenician sc ; Plrd ; Miao sc ; Prti ; Inscriptional_Parthian +sc ; Qaba ; Beria_Erfe sc ; Rjng ; Rejang sc ; Rohg ; Hanifi_Rohingya sc ; Runr ; Runic diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 8aa5b147f..cd4e1cc44 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 19:49:02 GMT +# Date: 2024-11-14, 20:20:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3151,4 +3151,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 29 +# ================================================ + +16EA0..16EB8 ; Beria_Erfe # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Beria_Erfe # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY + +# Total code points: 50 + # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 1785a773b..176763415 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30279,6 +30279,56 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16E98;MEDEFAIDRIN FULL STOP;Po;0;L;;;;;N;;;;; 16E99;MEDEFAIDRIN SYMBOL AIVA;Po;0;L;;;;;N;;;;; 16E9A;MEDEFAIDRIN EXCLAMATION OH;Po;0;L;;;;;N;;;;; +16EA0;BERIA ERFE CAPITAL LETTER ARKAB;Lu;0;L;;;;;N;;;;16EBB; +16EA1;BERIA ERFE CAPITAL LETTER BASIGNA;Lu;0;L;;;;;N;;;;16EBC; +16EA2;BERIA ERFE CAPITAL LETTER DARBAI;Lu;0;L;;;;;N;;;;16EBD; +16EA3;BERIA ERFE CAPITAL LETTER EH;Lu;0;L;;;;;N;;;;16EBE; +16EA4;BERIA ERFE CAPITAL LETTER FITKO;Lu;0;L;;;;;N;;;;16EBF; +16EA5;BERIA ERFE CAPITAL LETTER GOWAY;Lu;0;L;;;;;N;;;;16EC0; +16EA6;BERIA ERFE CAPITAL LETTER HIRDEABO;Lu;0;L;;;;;N;;;;16EC1; +16EA7;BERIA ERFE CAPITAL LETTER I;Lu;0;L;;;;;N;;;;16EC2; +16EA8;BERIA ERFE CAPITAL LETTER DJAI;Lu;0;L;;;;;N;;;;16EC3; +16EA9;BERIA ERFE CAPITAL LETTER KOBO;Lu;0;L;;;;;N;;;;16EC4; +16EAA;BERIA ERFE CAPITAL LETTER LAKKO;Lu;0;L;;;;;N;;;;16EC5; +16EAB;BERIA ERFE CAPITAL LETTER MERI;Lu;0;L;;;;;N;;;;16EC6; +16EAC;BERIA ERFE CAPITAL LETTER NINI;Lu;0;L;;;;;N;;;;16EC7; +16EAD;BERIA ERFE CAPITAL LETTER GNA;Lu;0;L;;;;;N;;;;16EC8; +16EAE;BERIA ERFE CAPITAL LETTER NGAY;Lu;0;L;;;;;N;;;;16EC9; +16EAF;BERIA ERFE CAPITAL LETTER OI;Lu;0;L;;;;;N;;;;16ECA; +16EB0;BERIA ERFE CAPITAL LETTER PI;Lu;0;L;;;;;N;;;;16ECB; +16EB1;BERIA ERFE CAPITAL LETTER ERIGO;Lu;0;L;;;;;N;;;;16ECC; +16EB2;BERIA ERFE CAPITAL LETTER ERIGO TAMURA;Lu;0;L;;;;;N;;;;16ECD; +16EB3;BERIA ERFE CAPITAL LETTER SERI;Lu;0;L;;;;;N;;;;16ECE; +16EB4;BERIA ERFE CAPITAL LETTER SHEP;Lu;0;L;;;;;N;;;;16ECF; +16EB5;BERIA ERFE CAPITAL LETTER TATASOUE;Lu;0;L;;;;;N;;;;16ED0; +16EB6;BERIA ERFE CAPITAL LETTER UI;Lu;0;L;;;;;N;;;;16ED1; +16EB7;BERIA ERFE CAPITAL LETTER WASSE;Lu;0;L;;;;;N;;;;16ED2; +16EB8;BERIA ERFE CAPITAL LETTER AY;Lu;0;L;;;;;N;;;;16ED3; +16EBB;BERIA ERFE SMALL LETTER ARKAB;Ll;0;L;;;;;N;;;16EA0;;16EA0 +16EBC;BERIA ERFE SMALL LETTER BASIGNA;Ll;0;L;;;;;N;;;16EA1;;16EA1 +16EBD;BERIA ERFE SMALL LETTER DARBAI;Ll;0;L;;;;;N;;;16EA2;;16EA2 +16EBE;BERIA ERFE SMALL LETTER EH;Ll;0;L;;;;;N;;;16EA3;;16EA3 +16EBF;BERIA ERFE SMALL LETTER FITKO;Ll;0;L;;;;;N;;;16EA4;;16EA4 +16EC0;BERIA ERFE SMALL LETTER GOWAY;Ll;0;L;;;;;N;;;16EA5;;16EA5 +16EC1;BERIA ERFE SMALL LETTER HIRDEABO;Ll;0;L;;;;;N;;;16EA6;;16EA6 +16EC2;BERIA ERFE SMALL LETTER I;Ll;0;L;;;;;N;;;16EA7;;16EA7 +16EC3;BERIA ERFE SMALL LETTER DJAI;Ll;0;L;;;;;N;;;16EA8;;16EA8 +16EC4;BERIA ERFE SMALL LETTER KOBO;Ll;0;L;;;;;N;;;16EA9;;16EA9 +16EC5;BERIA ERFE SMALL LETTER LAKKO;Ll;0;L;;;;;N;;;16EAA;;16EAA +16EC6;BERIA ERFE SMALL LETTER MERI;Ll;0;L;;;;;N;;;16EAB;;16EAB +16EC7;BERIA ERFE SMALL LETTER NINI;Ll;0;L;;;;;N;;;16EAC;;16EAC +16EC8;BERIA ERFE SMALL LETTER GNA;Ll;0;L;;;;;N;;;16EAD;;16EAD +16EC9;BERIA ERFE SMALL LETTER NGAY;Ll;0;L;;;;;N;;;16EAE;;16EAE +16ECA;BERIA ERFE SMALL LETTER OI;Ll;0;L;;;;;N;;;16EAF;;16EAF +16ECB;BERIA ERFE SMALL LETTER PI;Ll;0;L;;;;;N;;;16EB0;;16EB0 +16ECC;BERIA ERFE SMALL LETTER ERIGO;Ll;0;L;;;;;N;;;16EB1;;16EB1 +16ECD;BERIA ERFE SMALL LETTER ERIGO TAMURA;Ll;0;L;;;;;N;;;16EB2;;16EB2 +16ECE;BERIA ERFE SMALL LETTER SERI;Ll;0;L;;;;;N;;;16EB3;;16EB3 +16ECF;BERIA ERFE SMALL LETTER SHEP;Ll;0;L;;;;;N;;;16EB4;;16EB4 +16ED0;BERIA ERFE SMALL LETTER TATASOUE;Ll;0;L;;;;;N;;;16EB5;;16EB5 +16ED1;BERIA ERFE SMALL LETTER UI;Ll;0;L;;;;;N;;;16EB6;;16EB6 +16ED2;BERIA ERFE SMALL LETTER WASSE;Ll;0;L;;;;;N;;;16EB7;;16EB7 +16ED3;BERIA ERFE SMALL LETTER AY;Ll;0;L;;;;;N;;;16EB8;;16EB8 16F00;MIAO LETTER PA;Lo;0;L;;;;;N;;;;; 16F01;MIAO LETTER BA;Lo;0;L;;;;;N;;;;; 16F02;MIAO LETTER YI PA;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index f7589f767..38b6130a0 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 19:49:05 GMT +# Date: 2024-11-14, 20:20:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2193,6 +2193,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16E40..16E7F ; R # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; R # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; R # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; R # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; R # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; R # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; R # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; R # Lo MIAO LETTER NASALIZATION diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 848e6705c..30c96c678 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 19:49:03 GMT +# Date: 2024-11-14, 20:20:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1296,6 +1296,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10D70..10D85 ; Lower # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lower # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Lower # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Lower # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Lower # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -1330,7 +1331,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2523 +# Total code points: 2548 # ================================================ @@ -1959,6 +1960,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 10D50..10D65 ; Upper # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Upper # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Upper # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Upper # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Upper # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -1995,7 +1997,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1966 +# Total code points: 1991 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index b42092680..4dd410759 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 19:49:05 GMT +# Date: 2024-11-14, 20:20:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1270,6 +1270,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D80..16D97 ; ALetter # Lo [24] CHISOI LETTER A..CHISOI LETTER PA 16D99..16D9C ; ALetter # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ALetter # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ALetter # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ALetter # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ALetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ALetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -1367,7 +1369,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33860 +# Total code points: 33910 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 6dee40eac..81774fad8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 19:48:22 GMT +# Date: 2024-11-14, 20:20:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1101,6 +1101,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; L # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; L # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; L # Lo MIAO LETTER NASALIZATION 16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI @@ -1219,7 +1221,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815265 code points not listed here. +# The above property value applies to 815215 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 7ef663a65..e66191403 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 19:48:24 GMT +# Date: 2024-11-14, 20:20:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1843,6 +1843,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16E40..16E7F ; 0 # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; 0 # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; 0 # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; 0 # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; 0 # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; 0 # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; 0 # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; 0 # Lo MIAO LETTER NASALIZATION @@ -2074,7 +2076,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821426 code points not listed here. +# The above property value applies to 821376 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 0b91e12ca..63f4bd761 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 19:48:26 GMT +# Date: 2024-11-14, 20:20:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1884,6 +1884,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; N # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; N # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; N # Lo MIAO LETTER NASALIZATION @@ -2119,7 +2121,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760944 code points not listed here. +# The above property value applies to 760894 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 93cce79d6..1267d144f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 19:48:27 GMT +# Date: 2024-11-14, 20:20:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -570,7 +570,9 @@ FFFE..FFFF ; Cn # [2] .. 16D7A..16D7F ; Cn # [6] .. 16D9E..16D9F ; Cn # [2] .. 16DAA..16E3F ; Cn # [150] .. -16E9B..16EFF ; Cn # [101] .. +16E9B..16E9F ; Cn # [5] .. +16EB9..16EBA ; Cn # [2] .. +16ED4..16EFF ; Cn # [44] .. 16F4B..16F4E ; Cn # [4] .. 16F88..16F8E ; Cn # [7] .. 16FA0..16FDF ; Cn # [64] .. @@ -744,7 +746,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819378 +# Total code points: 819328 # ================================================ @@ -1372,6 +1374,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 10D50..10D65 ; Lu # [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Lu # [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Lu # [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Lu # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Lu # [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Lu # [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -1405,7 +1408,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1861 +# Total code points: 1886 # ================================================ @@ -2042,6 +2045,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 10D70..10D85 ; Ll # [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Ll # [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Ll # [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Ll # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Ll # [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Ll # [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Ll # [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -2075,7 +2079,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2258 +# Total code points: 2283 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 8656e73c8..180323543 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 19:48:28 GMT +# Date: 2024-11-14, 20:20:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757498 code points not listed here. -# Total code points: 894966 +# The above property value applies to 757448 code points not listed here. +# Total code points: 894916 # ================================================ @@ -1448,6 +1448,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; AL # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; AL # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; AL # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; AL # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -1620,7 +1622,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26782 +# Total code points: 26832 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index f127fc3d1..9195b5d80 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 19:48:28 GMT +# Date: 2024-11-14, 20:20:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36950,6 +36950,56 @@ FFFD ; REPLACEMENT CHARACTER 16E98 ; MEDEFAIDRIN FULL STOP 16E99 ; MEDEFAIDRIN SYMBOL AIVA 16E9A ; MEDEFAIDRIN EXCLAMATION OH +16EA0 ; BERIA ERFE CAPITAL LETTER ARKAB +16EA1 ; BERIA ERFE CAPITAL LETTER BASIGNA +16EA2 ; BERIA ERFE CAPITAL LETTER DARBAI +16EA3 ; BERIA ERFE CAPITAL LETTER EH +16EA4 ; BERIA ERFE CAPITAL LETTER FITKO +16EA5 ; BERIA ERFE CAPITAL LETTER GOWAY +16EA6 ; BERIA ERFE CAPITAL LETTER HIRDEABO +16EA7 ; BERIA ERFE CAPITAL LETTER I +16EA8 ; BERIA ERFE CAPITAL LETTER DJAI +16EA9 ; BERIA ERFE CAPITAL LETTER KOBO +16EAA ; BERIA ERFE CAPITAL LETTER LAKKO +16EAB ; BERIA ERFE CAPITAL LETTER MERI +16EAC ; BERIA ERFE CAPITAL LETTER NINI +16EAD ; BERIA ERFE CAPITAL LETTER GNA +16EAE ; BERIA ERFE CAPITAL LETTER NGAY +16EAF ; BERIA ERFE CAPITAL LETTER OI +16EB0 ; BERIA ERFE CAPITAL LETTER PI +16EB1 ; BERIA ERFE CAPITAL LETTER ERIGO +16EB2 ; BERIA ERFE CAPITAL LETTER ERIGO TAMURA +16EB3 ; BERIA ERFE CAPITAL LETTER SERI +16EB4 ; BERIA ERFE CAPITAL LETTER SHEP +16EB5 ; BERIA ERFE CAPITAL LETTER TATASOUE +16EB6 ; BERIA ERFE CAPITAL LETTER UI +16EB7 ; BERIA ERFE CAPITAL LETTER WASSE +16EB8 ; BERIA ERFE CAPITAL LETTER AY +16EBB ; BERIA ERFE SMALL LETTER ARKAB +16EBC ; BERIA ERFE SMALL LETTER BASIGNA +16EBD ; BERIA ERFE SMALL LETTER DARBAI +16EBE ; BERIA ERFE SMALL LETTER EH +16EBF ; BERIA ERFE SMALL LETTER FITKO +16EC0 ; BERIA ERFE SMALL LETTER GOWAY +16EC1 ; BERIA ERFE SMALL LETTER HIRDEABO +16EC2 ; BERIA ERFE SMALL LETTER I +16EC3 ; BERIA ERFE SMALL LETTER DJAI +16EC4 ; BERIA ERFE SMALL LETTER KOBO +16EC5 ; BERIA ERFE SMALL LETTER LAKKO +16EC6 ; BERIA ERFE SMALL LETTER MERI +16EC7 ; BERIA ERFE SMALL LETTER NINI +16EC8 ; BERIA ERFE SMALL LETTER GNA +16EC9 ; BERIA ERFE SMALL LETTER NGAY +16ECA ; BERIA ERFE SMALL LETTER OI +16ECB ; BERIA ERFE SMALL LETTER PI +16ECC ; BERIA ERFE SMALL LETTER ERIGO +16ECD ; BERIA ERFE SMALL LETTER ERIGO TAMURA +16ECE ; BERIA ERFE SMALL LETTER SERI +16ECF ; BERIA ERFE SMALL LETTER SHEP +16ED0 ; BERIA ERFE SMALL LETTER TATASOUE +16ED1 ; BERIA ERFE SMALL LETTER UI +16ED2 ; BERIA ERFE SMALL LETTER WASSE +16ED3 ; BERIA ERFE SMALL LETTER AY 16F00 ; MIAO LETTER PA 16F01 ; MIAO LETTER BA 16F02 ; MIAO LETTER YI PA @@ -45522,6 +45572,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155153 +# Total code points: 155203 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 2b5d727e2..91ea52b89 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -204,6 +204,7 @@ public enum Block_Values implements Named { Bassa_Vah("Bassa_Vah"), Batak("Batak"), Bengali("Bengali"), + Beria_Erfe("Beria_Erfe"), Bhaiksuki("Bhaiksuki"), Block_Elements("Block_Elements"), Bopomofo("Bopomofo"), @@ -1964,6 +1965,7 @@ public enum Script_Values implements Named { Phoenician("Phnx"), Miao("Plrd"), Inscriptional_Parthian("Prti"), + Beria_Erfe("Qaba"), Rejang("Rjng"), Hanifi_Rohingya("Rohg"), Runic("Runr"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 4c5f819cc..b9ac48f0e 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -25,6 +25,7 @@ ASCII ; Basic_Latin Bassa_Vah ; Bassa_Vah Batak ; Batak Bengali ; Bengali +Beria_Erfe ; Beria_Erfe Bhaiksuki ; Bhaiksuki Block_Elements ; Block_Elements Bopomofo ; Bopomofo From 96e92df66f82e56c40aa1f6e882e030859aec8ab Mon Sep 17 00:00:00 2001 From: Josh Hadley <51727949+josh-hadley@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:41:11 -0800 Subject: [PATCH 26/38] Tolong Siki (#732) * UnicodeData.txt lines from L2/23-024 * LineBreak.txt from L2/23-024 * Scripts.txt from L2/23-024 * Block info for L2/23-024 * oopsie * mind the gap * Regenerate UCD * Regenerate Enums * Changes to TOLONG SIKI SIGN SELA Based on feedback from @eggrobin and @Ken-Whistler : - set gc=Lm (UnicodeData.txt) - set Diacritic and Extender (PropsList.txt) * C not CA * Regenerate UCD --------- Co-authored-by: Robin Leroy --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 6 +- .../data/ucd/dev/DerivedCoreProperties.txt | 38 +++++++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 +- unicodetools/data/ucd/dev/LineBreak.txt | 6 +- unicodetools/data/ucd/dev/PropList.txt | 8 ++- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- unicodetools/data/ucd/dev/Scripts.txt | 11 +++- unicodetools/data/ucd/dev/UnicodeData.txt | 54 +++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 6 +- .../dev/auxiliary/SentenceBreakProperty.txt | 10 +++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 10 +++- .../ucd/dev/extracted/DerivedBidiClass.txt | 8 ++- .../dev/extracted/DerivedCombiningClass.txt | 8 ++- .../dev/extracted/DerivedEastAsianWidth.txt | 8 ++- .../dev/extracted/DerivedGeneralCategory.txt | 18 ++++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 +++-- .../data/ucd/dev/extracted/DerivedName.txt | 58 ++++++++++++++++++- .../ucd/dev/extracted/DerivedNumericType.txt | 5 +- .../dev/extracted/DerivedNumericValues.txt | 32 ++++++---- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 22 files changed, 258 insertions(+), 56 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 94a188532..4101b2488 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -286,6 +286,7 @@ FFF0..FFFF; Specials 11C70..11CBF; Marchen 11D00..11D5F; Masaram Gondi 11D60..11DAF; Gunjala Gondi +11DB0..11DEF; Tolong Siki 11EE0..11EFF; Makasar 11F00..11F5F; Kawi 11FB0..11FBF; Lisu Supplement diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d95507f85..249eb64c0 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 20:19:40 GMT +# Date: 2024-11-14, 22:50:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2085,11 +2085,13 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O +11DB0..11DDB ; 17.0 # [44] TOLONG SIKI LETTER I..TOLONG SIKI UNGGA +11DE0..11DE9 ; 17.0 # [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 16D80..16D9D ; 17.0 # [30] CHISOI LETTER A..CHISOI SIGN SISO 16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY -# Total code points: 205 +# Total code points: 259 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index afb8eb130..1f3b17477 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 20:20:04 GMT +# Date: 2024-11-14, 22:51:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1281,6 +1281,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11D95 ; Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA 11D96 ; Alphabetic # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; Alphabetic # Lo GUNJALA GONDI OM +11DB0..11DD8 ; Alphabetic # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; Alphabetic # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; Alphabetic # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; Alphabetic # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -1453,7 +1456,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142889 +# Total code points: 142933 # ================================================ @@ -3464,6 +3467,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11D90..11D91 ; Case_Ignorable # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI 11D95 ; Case_Ignorable # Mn GUNJALA GONDI SIGN ANUSVARA 11D97 ; Case_Ignorable # Mn GUNJALA GONDI VIRAMA +11DD9 ; Case_Ignorable # Lm TOLONG SIKI SIGN SELA 11EF3..11EF4 ; Case_Ignorable # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11F00..11F01 ; Case_Ignorable # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA 11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R @@ -3529,7 +3533,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2789 +# Total code points: 2790 # ================================================ @@ -6862,6 +6866,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; ID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; ID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ID_Start # Lo GUNJALA GONDI OM +11DB0..11DD8 ; ID_Start # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ID_Start # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ID_Start # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; ID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; ID_Start # Lo KAWI SIGN REPHA 11F04..11F10 ; ID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -7007,7 +7014,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141388 +# Total code points: 141432 # ================================================ @@ -8215,6 +8222,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11D97 ; ID_Continue # Mn GUNJALA GONDI VIRAMA 11D98 ; ID_Continue # Lo GUNJALA GONDI OM 11DA0..11DA9 ; ID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; ID_Continue # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ID_Continue # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ID_Continue # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; ID_Continue # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; ID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; ID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; ID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -8430,7 +8441,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144711 +# Total code points: 144765 # ================================================ @@ -9068,6 +9079,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 11D67..11D68 ; XID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; XID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; XID_Start # Lo GUNJALA GONDI OM +11DB0..11DD8 ; XID_Start # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; XID_Start # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; XID_Start # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; XID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; XID_Start # Lo KAWI SIGN REPHA 11F04..11F10 ; XID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -9213,7 +9227,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141365 +# Total code points: 141409 # ================================================ @@ -10422,6 +10436,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11D97 ; XID_Continue # Mn GUNJALA GONDI VIRAMA 11D98 ; XID_Continue # Lo GUNJALA GONDI OM 11DA0..11DA9 ; XID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; XID_Continue # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; XID_Continue # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; XID_Continue # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; XID_Continue # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; XID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; XID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; XID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -10637,7 +10655,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144692 +# Total code points: 144746 # ================================================ @@ -12637,6 +12655,10 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 11D96 ; Grapheme_Base # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; Grapheme_Base # Lo GUNJALA GONDI OM 11DA0..11DA9 ; Grapheme_Base # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; Grapheme_Base # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; Grapheme_Base # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; Grapheme_Base # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; Grapheme_Base # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; Grapheme_Base # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF5..11EF6 ; Grapheme_Base # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O 11EF7..11EF8 ; Grapheme_Base # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION @@ -12910,7 +12932,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152897 +# Total code points: 152951 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a16b79315..2525f033e 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 20:20:10 GMT +# Date: 2024-11-14, 22:51:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2290,6 +2290,10 @@ FFFD ; A # So REPLACEMENT CHARACTER 11D97 ; N # Mn GUNJALA GONDI VIRAMA 11D98 ; N # Lo GUNJALA GONDI OM 11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; N # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; N # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; N # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; N # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index d0db8c4fc..2328c554e 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 20:20:11 GMT +# Date: 2024-11-14, 22:51:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3172,6 +3172,10 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11D97 ; CM # Mn GUNJALA GONDI VIRAMA 11D98 ; AL # Lo GUNJALA GONDI OM 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; AL # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; AL # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; AL # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; NU # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF1 ; AS # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11EF2 ; BA # Lo MAKASAR ANGKA 11EF3..11EF4 ; CM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ac90e4739..b88ef9c38 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 19:48:43 GMT +# Date: 2024-11-14, 22:51:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1131,6 +1131,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11DD9 ; Diacritic # Lm TOLONG SIKI SIGN SELA 11F41 ; Diacritic # Mc KAWI SIGN KILLER 11F42 ; Diacritic # Mn KAWI CONJOINER 11F5A ; Diacritic # Mn KAWI SIGN NUKTA @@ -1162,7 +1163,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1210 +# Total code points: 1211 # ================================================ @@ -1202,6 +1203,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 113D3 ; Extender # Lo TULU-TIGALARI SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +11DD9 ; Extender # Lm TOLONG SIKI SIGN SELA 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM 16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK @@ -1209,7 +1211,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 60 +# Total code points: 61 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 05a230765..224a2e06c 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-14, 20:20:25 GMT +# Date: 2024-11-14, 22:51:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -475,6 +475,7 @@ blk; Tibetan ; Tibetan blk; Tifinagh ; Tifinagh blk; Tirhuta ; Tirhuta blk; Todhri ; Todhri +blk; Tolong_Siki ; Tolong_Siki blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols blk; Tulu_Tigalari ; Tulu_Tigalari @@ -1473,6 +1474,7 @@ sc ; Tibt ; Tibetan sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa sc ; Todr ; Todhri +sc ; Tols ; Tolong_Siki sc ; Toto ; Toto sc ; Tutg ; Tulu_Tigalari sc ; Ugar ; Ugaritic diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index cd4e1cc44..f1f9cd011 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 20:20:42 GMT +# Date: 2024-11-14, 22:51:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3153,6 +3153,15 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ +11DB0..11DD8 ; Tolong_Siki # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; Tolong_Siki # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; Tolong_Siki # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; Tolong_Siki # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE + +# Total code points: 54 + +# ================================================ + 16EA0..16EB8 ; Beria_Erfe # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; Beria_Erfe # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 176763415..b4fbb4a49 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -21983,6 +21983,60 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11DA7;GUNJALA GONDI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 11DA8;GUNJALA GONDI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 11DA9;GUNJALA GONDI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11DB0;TOLONG SIKI LETTER I;Lo;0;L;;;;;N;;;;; +11DB1;TOLONG SIKI LETTER E;Lo;0;L;;;;;N;;;;; +11DB2;TOLONG SIKI LETTER U;Lo;0;L;;;;;N;;;;; +11DB3;TOLONG SIKI LETTER O;Lo;0;L;;;;;N;;;;; +11DB4;TOLONG SIKI LETTER A;Lo;0;L;;;;;N;;;;; +11DB5;TOLONG SIKI LETTER AA;Lo;0;L;;;;;N;;;;; +11DB6;TOLONG SIKI LETTER P;Lo;0;L;;;;;N;;;;; +11DB7;TOLONG SIKI LETTER PH;Lo;0;L;;;;;N;;;;; +11DB8;TOLONG SIKI LETTER B;Lo;0;L;;;;;N;;;;; +11DB9;TOLONG SIKI LETTER BH;Lo;0;L;;;;;N;;;;; +11DBA;TOLONG SIKI LETTER M;Lo;0;L;;;;;N;;;;; +11DBB;TOLONG SIKI LETTER T;Lo;0;L;;;;;N;;;;; +11DBC;TOLONG SIKI LETTER TH;Lo;0;L;;;;;N;;;;; +11DBD;TOLONG SIKI LETTER D;Lo;0;L;;;;;N;;;;; +11DBE;TOLONG SIKI LETTER DH;Lo;0;L;;;;;N;;;;; +11DBF;TOLONG SIKI LETTER N;Lo;0;L;;;;;N;;;;; +11DC0;TOLONG SIKI LETTER TT;Lo;0;L;;;;;N;;;;; +11DC1;TOLONG SIKI LETTER TTH;Lo;0;L;;;;;N;;;;; +11DC2;TOLONG SIKI LETTER DD;Lo;0;L;;;;;N;;;;; +11DC3;TOLONG SIKI LETTER DDH;Lo;0;L;;;;;N;;;;; +11DC4;TOLONG SIKI LETTER NN;Lo;0;L;;;;;N;;;;; +11DC5;TOLONG SIKI LETTER C;Lo;0;L;;;;;N;;;;; +11DC6;TOLONG SIKI LETTER CH;Lo;0;L;;;;;N;;;;; +11DC7;TOLONG SIKI LETTER J;Lo;0;L;;;;;N;;;;; +11DC8;TOLONG SIKI LETTER JH;Lo;0;L;;;;;N;;;;; +11DC9;TOLONG SIKI LETTER NY;Lo;0;L;;;;;N;;;;; +11DCA;TOLONG SIKI LETTER K;Lo;0;L;;;;;N;;;;; +11DCB;TOLONG SIKI LETTER KH;Lo;0;L;;;;;N;;;;; +11DCC;TOLONG SIKI LETTER G;Lo;0;L;;;;;N;;;;; +11DCD;TOLONG SIKI LETTER GH;Lo;0;L;;;;;N;;;;; +11DCE;TOLONG SIKI LETTER NG;Lo;0;L;;;;;N;;;;; +11DCF;TOLONG SIKI LETTER Y;Lo;0;L;;;;;N;;;;; +11DD0;TOLONG SIKI LETTER R;Lo;0;L;;;;;N;;;;; +11DD1;TOLONG SIKI LETTER L;Lo;0;L;;;;;N;;;;; +11DD2;TOLONG SIKI LETTER V;Lo;0;L;;;;;N;;;;; +11DD3;TOLONG SIKI LETTER NNY;Lo;0;L;;;;;N;;;;; +11DD4;TOLONG SIKI LETTER S;Lo;0;L;;;;;N;;;;; +11DD5;TOLONG SIKI LETTER H;Lo;0;L;;;;;N;;;;; +11DD6;TOLONG SIKI LETTER X;Lo;0;L;;;;;N;;;;; +11DD7;TOLONG SIKI LETTER RR;Lo;0;L;;;;;N;;;;; +11DD8;TOLONG SIKI LETTER RRH;Lo;0;L;;;;;N;;;;; +11DD9;TOLONG SIKI SIGN SELA;Lm;0;L;;;;;N;;;;; +11DDA;TOLONG SIKI SIGN HECAKA;Lo;0;L;;;;;N;;;;; +11DDB;TOLONG SIKI UNGGA;Lo;0;L;;;;;N;;;;; +11DE0;TOLONG SIKI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11DE1;TOLONG SIKI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11DE2;TOLONG SIKI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11DE3;TOLONG SIKI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +11DE4;TOLONG SIKI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +11DE5;TOLONG SIKI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +11DE6;TOLONG SIKI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +11DE7;TOLONG SIKI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +11DE8;TOLONG SIKI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +11DE9;TOLONG SIKI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 11EE0;MAKASAR LETTER KA;Lo;0;L;;;;;N;;;;; 11EE1;MAKASAR LETTER GA;Lo;0;L;;;;;N;;;;; 11EE2;MAKASAR LETTER NGA;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 38b6130a0..87bbf7807 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 20:20:45 GMT +# Date: 2024-11-14, 22:51:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2115,6 +2115,10 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11D97 ; R # Mn GUNJALA GONDI VIRAMA 11D98 ; R # Lo GUNJALA GONDI OM 11DA0..11DA9 ; R # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; R # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; R # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; R # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; R # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; R # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; R # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; R # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 30c96c678..a64d16ac8 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 20:20:42 GMT +# Date: 2024-11-14, 22:51:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2499,6 +2499,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; OLetter # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; OLetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; OLetter # Lo GUNJALA GONDI OM +11DB0..11DD8 ; OLetter # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; OLetter # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; OLetter # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; OLetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; OLetter # Lo KAWI SIGN REPHA 11F04..11F10 ; OLetter # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -2606,7 +2609,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136973 +# Total code points: 137017 # ================================================ @@ -2675,6 +2678,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Numeric # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -2691,7 +2695,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 785 +# Total code points: 795 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 4dd410759..668852b57 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 20:20:45 GMT +# Date: 2024-11-14, 22:51:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1242,6 +1242,9 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; ALetter # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; ALetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ALetter # Lo GUNJALA GONDI OM +11DB0..11DD8 ; ALetter # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ALetter # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ALetter # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; ALetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; ALetter # Lo KAWI SIGN REPHA 11F04..11F10 ; ALetter # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -1369,7 +1372,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33910 +# Total code points: 33954 # ================================================ @@ -1481,6 +1484,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Numeric # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -1497,7 +1501,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 784 +# Total code points: 794 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 81774fad8..8b939789a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 20:20:01 GMT +# Date: 2024-11-14, 22:51:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1043,6 +1043,10 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; L # Lo GUNJALA GONDI OM 11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; L # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; L # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; L # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; L # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O 11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION @@ -1221,7 +1225,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815215 code points not listed here. +# The above property value applies to 815161 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index e66191403..b49953af1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 20:20:03 GMT +# Date: 2024-11-14, 22:51:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1773,6 +1773,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11D96 ; 0 # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; 0 # Lo GUNJALA GONDI OM 11DA0..11DA9 ; 0 # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; 0 # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; 0 # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; 0 # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; 0 # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; 0 # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; 0 # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; 0 # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -2076,7 +2080,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821376 code points not listed here. +# The above property value applies to 821322 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 63f4bd761..3ffccd30d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 20:20:06 GMT +# Date: 2024-11-14, 22:51:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1809,6 +1809,10 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11D97 ; N # Mn GUNJALA GONDI VIRAMA 11D98 ; N # Lo GUNJALA GONDI OM 11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; N # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; N # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; N # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; N # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -2121,7 +2125,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760894 code points not listed here. +# The above property value applies to 760840 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 1267d144f..817a91f0c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 20:20:06 GMT +# Date: 2024-11-14, 22:51:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -539,7 +539,9 @@ FFFE..FFFF ; Cn # [2] .. 11D8F ; Cn # 11D92 ; Cn # 11D99..11D9F ; Cn # [7] .. -11DAA..11EDF ; Cn # [310] .. +11DAA..11DAF ; Cn # [6] .. +11DDC..11DDF ; Cn # [4] .. +11DEA..11EDF ; Cn # [246] .. 11EF9..11EFF ; Cn # [7] .. 11F11 ; Cn # 11F3B..11F3D ; Cn # [3] .. @@ -746,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819328 +# Total code points: 819274 # ================================================ @@ -2165,6 +2167,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 10D4E ; Lm # GARAY VOWEL LENGTH MARK 10D6F ; Lm # GARAY REDUPLICATION MARK 10EC5 ; Lm # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +11DD9 ; Lm # TOLONG SIKI SIGN SELA 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT @@ -2179,7 +2182,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 406 +# Total code points: 407 # ================================================ @@ -2625,6 +2628,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 11D67..11D68 ; Lo # [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; Lo # GUNJALA GONDI OM +11DB0..11DD8 ; Lo # [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DDA..11DDB ; Lo # [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; Lo # KAWI SIGN REPHA 11F04..11F10 ; Lo # [13] KAWI LETTER A..KAWI LETTER O @@ -2719,7 +2724,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136541 +# Total code points: 136584 # ================================================ @@ -3364,6 +3369,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Nd # [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Nd # [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -3380,7 +3386,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 770 +# Total code points: 780 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 180323543..07f83f85d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 20:20:08 GMT +# Date: 2024-11-14, 22:51:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757448 code points not listed here. -# Total code points: 894916 +# The above property value applies to 757394 code points not listed here. +# Total code points: 894862 # ================================================ @@ -544,6 +544,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 11C50..11C59 ; NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; NU # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 16A60..16A69 ; NU # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; NU # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; NU # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE @@ -558,7 +559,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 705 +# Total code points: 715 # ================================================ @@ -1409,6 +1410,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11D67..11D68 ; AL # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; AL # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; AL # Lo GUNJALA GONDI OM +11DB0..11DD8 ; AL # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; AL # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; AL # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11FB0 ; AL # Lo LISU LETTER YHA 11FC0..11FD4 ; AL # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; AL # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -1622,7 +1626,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26832 +# Total code points: 26876 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 9195b5d80..3139aa645 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 20:20:08 GMT +# Date: 2024-11-14, 22:51:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -32648,6 +32648,60 @@ FFFD ; REPLACEMENT CHARACTER 11DA7 ; GUNJALA GONDI DIGIT SEVEN 11DA8 ; GUNJALA GONDI DIGIT EIGHT 11DA9 ; GUNJALA GONDI DIGIT NINE +11DB0 ; TOLONG SIKI LETTER I +11DB1 ; TOLONG SIKI LETTER E +11DB2 ; TOLONG SIKI LETTER U +11DB3 ; TOLONG SIKI LETTER O +11DB4 ; TOLONG SIKI LETTER A +11DB5 ; TOLONG SIKI LETTER AA +11DB6 ; TOLONG SIKI LETTER P +11DB7 ; TOLONG SIKI LETTER PH +11DB8 ; TOLONG SIKI LETTER B +11DB9 ; TOLONG SIKI LETTER BH +11DBA ; TOLONG SIKI LETTER M +11DBB ; TOLONG SIKI LETTER T +11DBC ; TOLONG SIKI LETTER TH +11DBD ; TOLONG SIKI LETTER D +11DBE ; TOLONG SIKI LETTER DH +11DBF ; TOLONG SIKI LETTER N +11DC0 ; TOLONG SIKI LETTER TT +11DC1 ; TOLONG SIKI LETTER TTH +11DC2 ; TOLONG SIKI LETTER DD +11DC3 ; TOLONG SIKI LETTER DDH +11DC4 ; TOLONG SIKI LETTER NN +11DC5 ; TOLONG SIKI LETTER C +11DC6 ; TOLONG SIKI LETTER CH +11DC7 ; TOLONG SIKI LETTER J +11DC8 ; TOLONG SIKI LETTER JH +11DC9 ; TOLONG SIKI LETTER NY +11DCA ; TOLONG SIKI LETTER K +11DCB ; TOLONG SIKI LETTER KH +11DCC ; TOLONG SIKI LETTER G +11DCD ; TOLONG SIKI LETTER GH +11DCE ; TOLONG SIKI LETTER NG +11DCF ; TOLONG SIKI LETTER Y +11DD0 ; TOLONG SIKI LETTER R +11DD1 ; TOLONG SIKI LETTER L +11DD2 ; TOLONG SIKI LETTER V +11DD3 ; TOLONG SIKI LETTER NNY +11DD4 ; TOLONG SIKI LETTER S +11DD5 ; TOLONG SIKI LETTER H +11DD6 ; TOLONG SIKI LETTER X +11DD7 ; TOLONG SIKI LETTER RR +11DD8 ; TOLONG SIKI LETTER RRH +11DD9 ; TOLONG SIKI SIGN SELA +11DDA ; TOLONG SIKI SIGN HECAKA +11DDB ; TOLONG SIKI UNGGA +11DE0 ; TOLONG SIKI DIGIT ZERO +11DE1 ; TOLONG SIKI DIGIT ONE +11DE2 ; TOLONG SIKI DIGIT TWO +11DE3 ; TOLONG SIKI DIGIT THREE +11DE4 ; TOLONG SIKI DIGIT FOUR +11DE5 ; TOLONG SIKI DIGIT FIVE +11DE6 ; TOLONG SIKI DIGIT SIX +11DE7 ; TOLONG SIKI DIGIT SEVEN +11DE8 ; TOLONG SIKI DIGIT EIGHT +11DE9 ; TOLONG SIKI DIGIT NINE 11EE0 ; MAKASAR LETTER KA 11EE1 ; MAKASAR LETTER GA 11EE2 ; MAKASAR LETTER NGA @@ -45572,6 +45626,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155203 +# Total code points: 155257 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 720458961..74f42dda2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ # DerivedNumericType-17.0.0.txt -# Date: 2024-11-14, 19:48:30 GMT +# Date: 2024-11-14, 22:51:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -278,6 +278,7 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Decimal # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Decimal # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -294,6 +295,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 770 +# Total code points: 780 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index bb4d4cb05..8ca76d328 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ # DerivedNumericValues-17.0.0.txt -# Date: 2024-11-14, 19:48:31 GMT +# Date: 2024-11-14, 22:51:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -110,6 +110,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 11C50 ; 0.0 ; ; 0 # Nd BHAIKSUKI DIGIT ZERO 11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO 11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO +11DE0 ; 0.0 ; ; 0 # Nd TOLONG SIKI DIGIT ZERO 11F50 ; 0.0 ; ; 0 # Nd KAWI DIGIT ZERO 16130 ; 0.0 ; ; 0 # Nd GURUNG KHEMA DIGIT ZERO 16A60 ; 0.0 ; ; 0 # Nd MRO DIGIT ZERO @@ -135,7 +136,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 97 +# Total code points: 98 # ================================================ @@ -531,6 +532,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 11C5A ; 1.0 ; ; 1 # No BHAIKSUKI NUMBER ONE 11D51 ; 1.0 ; ; 1 # Nd MASARAM GONDI DIGIT ONE 11DA1 ; 1.0 ; ; 1 # Nd GUNJALA GONDI DIGIT ONE +11DE1 ; 1.0 ; ; 1 # Nd TOLONG SIKI DIGIT ONE 11F51 ; 1.0 ; ; 1 # Nd KAWI DIGIT ONE 12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2 1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU @@ -571,7 +573,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 153 +# Total code points: 154 # ================================================ @@ -690,6 +692,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 11C5B ; 2.0 ; ; 2 # No BHAIKSUKI NUMBER TWO 11D52 ; 2.0 ; ; 2 # Nd MASARAM GONDI DIGIT TWO 11DA2 ; 2.0 ; ; 2 # Nd GUNJALA GONDI DIGIT TWO +11DE2 ; 2.0 ; ; 2 # Nd TOLONG SIKI DIGIT TWO 11F52 ; 2.0 ; ; 2 # Nd KAWI DIGIT TWO 12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH 12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2 @@ -734,7 +737,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 155 +# Total code points: 156 # ================================================ @@ -844,6 +847,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 11C5C ; 3.0 ; ; 3 # No BHAIKSUKI NUMBER THREE 11D53 ; 3.0 ; ; 3 # Nd MASARAM GONDI DIGIT THREE 11DA3 ; 3.0 ; ; 3 # Nd GUNJALA GONDI DIGIT THREE +11DE3 ; 3.0 ; ; 3 # Nd TOLONG SIKI DIGIT THREE 11F53 ; 3.0 ; ; 3 # Nd KAWI DIGIT THREE 12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH 12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH @@ -891,7 +895,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 153 +# Total code points: 154 # ================================================ @@ -995,6 +999,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 11C5D ; 4.0 ; ; 4 # No BHAIKSUKI NUMBER FOUR 11D54 ; 4.0 ; ; 4 # Nd MASARAM GONDI DIGIT FOUR 11DA4 ; 4.0 ; ; 4 # Nd GUNJALA GONDI DIGIT FOUR +11DE4 ; 4.0 ; ; 4 # Nd TOLONG SIKI DIGIT FOUR 11F54 ; 4.0 ; ; 4 # Nd KAWI DIGIT FOUR 12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH 12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH @@ -1041,7 +1046,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 144 +# Total code points: 145 # ================================================ @@ -1149,6 +1154,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 11C5E ; 5.0 ; ; 5 # No BHAIKSUKI NUMBER FIVE 11D55 ; 5.0 ; ; 5 # Nd MASARAM GONDI DIGIT FIVE 11DA5 ; 5.0 ; ; 5 # Nd GUNJALA GONDI DIGIT FIVE +11DE5 ; 5.0 ; ; 5 # Nd TOLONG SIKI DIGIT FIVE 11F55 ; 5.0 ; ; 5 # Nd KAWI DIGIT FIVE 12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH 1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH @@ -1193,7 +1199,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 142 +# Total code points: 143 # ================================================ @@ -1291,6 +1297,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 11C5F ; 6.0 ; ; 6 # No BHAIKSUKI NUMBER SIX 11D56 ; 6.0 ; ; 6 # Nd MASARAM GONDI DIGIT SIX 11DA6 ; 6.0 ; ; 6 # Nd GUNJALA GONDI DIGIT SIX +11DE6 ; 6.0 ; ; 6 # Nd TOLONG SIKI DIGIT SIX 11F56 ; 6.0 ; ; 6 # Nd KAWI DIGIT SIX 12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH 1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH @@ -1330,7 +1337,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 126 +# Total code points: 127 # ================================================ @@ -1427,6 +1434,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 11C60 ; 7.0 ; ; 7 # No BHAIKSUKI NUMBER SEVEN 11D57 ; 7.0 ; ; 7 # Nd MASARAM GONDI DIGIT SEVEN 11DA7 ; 7.0 ; ; 7 # Nd GUNJALA GONDI DIGIT SEVEN +11DE7 ; 7.0 ; ; 7 # Nd TOLONG SIKI DIGIT SEVEN 11F57 ; 7.0 ; ; 7 # Nd KAWI DIGIT SEVEN 12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH 1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH @@ -1465,7 +1473,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 126 +# Total code points: 127 # ================================================ @@ -1559,6 +1567,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 11C61 ; 8.0 ; ; 8 # No BHAIKSUKI NUMBER EIGHT 11D58 ; 8.0 ; ; 8 # Nd MASARAM GONDI DIGIT EIGHT 11DA8 ; 8.0 ; ; 8 # Nd GUNJALA GONDI DIGIT EIGHT +11DE8 ; 8.0 ; ; 8 # Nd TOLONG SIKI DIGIT EIGHT 11F58 ; 8.0 ; ; 8 # Nd KAWI DIGIT EIGHT 12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH 1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH @@ -1596,7 +1605,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 121 +# Total code points: 122 # ================================================ @@ -1693,6 +1702,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 11C62 ; 9.0 ; ; 9 # No BHAIKSUKI NUMBER NINE 11D59 ; 9.0 ; ; 9 # Nd MASARAM GONDI DIGIT NINE 11DA9 ; 9.0 ; ; 9 # Nd GUNJALA GONDI DIGIT NINE +11DE9 ; 9.0 ; ; 9 # Nd TOLONG SIKI DIGIT NINE 11F59 ; 9.0 ; ; 9 # Nd KAWI DIGIT NINE 12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH 1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH @@ -1731,7 +1741,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 127 +# Total code points: 128 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 91ea52b89..850b02979 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -500,6 +500,7 @@ public enum Block_Values implements Named { Tifinagh("Tifinagh"), Tirhuta("Tirhuta"), Todhri("Todhri"), + Tolong_Siki("Tolong_Siki"), Toto("Toto"), Transport_And_Map_Symbols("Transport_And_Map"), Tulu_Tigalari("Tulu_Tigalari"), @@ -2003,6 +2004,7 @@ public enum Script_Values implements Named { Tirhuta("Tirh"), Tangsa("Tnsa"), Todhri("Todr"), + Tolong_Siki("Tols"), Toto("Toto"), Tulu_Tigalari("Tutg"), Ugaritic("Ugar"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index b9ac48f0e..956790f7a 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -320,6 +320,7 @@ Tibetan ; Tibetan Tifinagh ; Tifinagh Tirhuta ; Tirhuta Todhri ; Todhri +Tolong_Siki ; Tolong_Siki Toto ; Toto Tulu_Tigalari ; Tulu_Tigalari Transport_And_Map ; Transport_And_Map_Symbols From 25ab030b2c0ab183e4aaf9f6d1e74df9a9419369 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 01:41:37 +0100 Subject: [PATCH 27/38] Remove Joining_Group numeric constants from UCD_Types (#967) * Will it build? * spot --- .../java/org/unicode/text/UCD/UCD_Types.java | 115 +----------------- 1 file changed, 1 insertion(+), 114 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index d00021b07..80f60dcd6 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -671,120 +671,7 @@ public interface UCD_Types { JT_T = 5, LIMIT_JOINING_TYPE = 6; public static short NO_SHAPING = 0, - AIN = 1, - ALAPH = 2, - ALEF = 3, - BEH = 4, - BETH = 5, - DAL = 6, - DALATH_RISH = 7, - E = 8, - FEH = 9, - FINAL_SEMKATH = 10, - GAF = 11, - GAMAL = 12, - HAH = 13, - TEH_MARBUTA_GOAL = 14, - HE = 15, - HEH = 16, - HEH_GOAL = 17, - HETH = 18, - KAF = 19, - KAPH = 20, - KNOTTED_HEH = 21, - LAM = 22, - LAMADH = 23, - MEEM = 24, - MIM = 25, - NOON = 26, - NUN = 27, - PE = 28, - QAF = 29, - QAPH = 30, - REH = 31, - REVERSED_PE = 32, - SAD = 33, - SADHE = 34, - SEEN = 35, - SEMKATH = 36, - SHIN = 37, - SWASH_KAF = 38, - TAH = 39, - TAW = 40, - TEH_MARBUTA = 41, - TETH = 42, - WAW = 43, - SYRIAC_WAW = 44, - YEH = 45, - YEH_BARREE = 46, - YEH_WITH_TAIL = 47, - YUDH = 48, - YUDH_HE = 49, - ZAIN = 50, - ZHAIN = 51, - KHAPH = 52, - FE = 53, - BURUSHASKI_YEH_BARREE = 54, - FARSI_YEH = 55, - NYA = 56, - ROHINGYA_YEH = 57, - HAMZAH_ON_HA_GOAL = 58, - STRAIGHT_WAW = 59, - MANICHAEAN_ALEPH = 60, - MANICHAEAN_AYIN = 61, - MANICHAEAN_BETH = 62, - MANICHAEAN_DALETH = 63, - MANICHAEAN_DHAMEDH = 64, - MANICHAEAN_FIVE = 65, - MANICHAEAN_GIMEL = 66, - MANICHAEAN_HETH = 67, - MANICHAEAN_HUNDRED = 68, - MANICHAEAN_KAPH = 69, - MANICHAEAN_LAMEDH = 70, - MANICHAEAN_MEM = 71, - MANICHAEAN_NUN = 72, - MANICHAEAN_ONE = 73, - MANICHAEAN_PE = 74, - MANICHAEAN_QOPH = 75, - MANICHAEAN_RESH = 76, - MANICHAEAN_SADHE = 77, - MANICHAEAN_SAMEKH = 78, - MANICHAEAN_TAW = 79, - MANICHAEAN_TEN = 80, - MANICHAEAN_TETH = 81, - MANICHAEAN_THAMEDH = 82, - MANICHAEAN_TWENTY = 83, - MANICHAEAN_WAW = 84, - MANICHAEAN_YODH = 85, - MANICHAEAN_ZAYIN = 86, - // Unicode 9: - AFRICAN_FEH = 87, - AFRICAN_QAF = 88, - AFRICAN_NOON = 89, - // Unicode 10: - MALAYALAM_NGA = 90, - MALAYALAM_JA = 91, - MALAYALAM_NYA = 92, - MALAYALAM_TTA = 93, - MALAYALAM_NNA = 94, - MALAYALAM_NNNA = 95, - MALAYALAM_BHA = 96, - MALAYALAM_RA = 97, - MALAYALAM_LLA = 98, - MALAYALAM_LLLA = 99, - MALAYALAM_SSA = 100, - // Unicode 11, non-singletons: - Hanifi_Rohingya_Pa = 101, - Hanifi_Rohingya_Kinna_Ya = 102, - // Unicode 14 - THIN_YEH = 103, - VERTICAL_TAIL = 104, - // Unicode 16 - KASHMIRI_YEH = 105, - // Unicode n > 16 - THIN_NOON = 106, - // limit - LIMIT_JOINING_GROUP = THIN_NOON + 1; + LIMIT_JOINING_GROUP = (short) UCD_Names.JOINING_GROUP.length; static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3; public static final int NF_COMPATIBILITY_MASK = 2, NF_COMPOSITION_MASK = 1; From c255ee42ba086f618f8761f69ddce8c5e2f19f92 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 16:04:10 +0100 Subject: [PATCH 28/38] Two small form CJK characters for Chinese (#666) * UnicodeData.txt lines reconstructed from L2/23-284 * LineBreak.txt as described by L2/23-284 * script * Ideographic Extender * Regenerate UCD * ideohack * Remove changelog * gc=Lm for the small ers * Regenerate UCD * Not lowercase --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 23 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 8 ++++--- unicodetools/data/ucd/dev/Scripts.txt | 5 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 2 ++ .../data/ucd/dev/VerticalOrientation.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 ++-- .../dev/extracted/DerivedCombiningClass.txt | 5 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 ++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 ++++---- .../data/ucd/dev/extracted/DerivedName.txt | 6 +++-- .../unicode/text/UCD/UnicodeInvariantTest.txt | 8 +++++-- 16 files changed, 67 insertions(+), 39 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 249eb64c0..aa7d3e957 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-14, 22:50:46 GMT +# Date: 2024-11-15, 13:43:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2091,7 +2091,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY +16FF2..16FF3 ; 17.0 # [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER -# Total code points: 259 +# Total code points: 261 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1f3b17477..bd4be2002 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-14, 22:51:10 GMT +# Date: 2024-11-15, 13:43:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1335,6 +1335,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1456,7 +1457,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142933 +# Total code points: 142935 # ================================================ @@ -3492,6 +3493,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 16FE0..16FE1 ; Case_Ignorable # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Case_Ignorable # Lm OLD CHINESE ITERATION MARK 16FE4 ; Case_Ignorable # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; Case_Ignorable # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1AFF0..1AFF3 ; Case_Ignorable # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Case_Ignorable # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Case_Ignorable # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -3533,7 +3535,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2790 +# Total code points: 2792 # ================================================ @@ -6904,6 +6906,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -7014,7 +7017,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141432 +# Total code points: 141434 # ================================================ @@ -8293,6 +8296,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -8441,7 +8445,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144765 +# Total code points: 144767 # ================================================ @@ -9117,6 +9121,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -9227,7 +9232,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141409 +# Total code points: 141411 # ================================================ @@ -10507,6 +10512,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -10655,7 +10661,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144746 +# Total code points: 144748 # ================================================ @@ -12727,6 +12733,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE0..16FE1 ; Grapheme_Base # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -12932,7 +12939,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152951 +# Total code points: 152953 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 2525f033e..afa21d0f9 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 22:51:17 GMT +# Date: 2024-11-15, 13:43:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2382,6 +2382,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 2328c554e..6ee8d51fa 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-14, 22:51:18 GMT +# Date: 2024-11-15, 13:43:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3293,6 +3293,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index b88ef9c38..e712b4c46 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 22:51:30 GMT +# Date: 2024-11-15, 13:43:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -878,6 +878,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -892,7 +893,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106477 +# Total code points: 106479 # ================================================ @@ -1207,11 +1208,12 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM 16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; Extender # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 61 +# Total code points: 63 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f1f9cd011..f0140c396 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 22:51:49 GMT +# Date: 2024-11-15, 13:44:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1597,6 +1597,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 16FE2 ; Han # Po OLD CHINESE HOOK MARK 16FE3 ; Han # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; Han # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Han # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1607,7 +1608,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 99030 +# Total code points: 99032 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index b4fbb4a49..cb5139da8 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30539,6 +30539,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FE4;KHITAN SMALL SCRIPT FILLER;Mn;0;NSM;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; +16FF2;CHINESE SMALL SIMPLIFIED ER;Lm;0;L;;;;;N;;;;; +16FF3;CHINESE SMALL TRADITIONAL ER;Lm;0;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; 187F7;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 87bbf7807..99c55523f 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-14, 22:51:52 GMT +# Date: 2024-11-15, 13:44:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2211,7 +2211,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE4 ; U # Mn KHITAN SMALL SCRIPT FILLER 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -16FF2..16FFF ; U # Cn [14] .. +16FF2..16FF3 ; U # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FFF ; U # Cn [12] .. 17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 187F8..187FF ; U # Cn [8] .. 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index a64d16ac8..cee1f823c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-14, 22:51:50 GMT +# Date: 2024-11-15, 13:44:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2534,6 +2534,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2609,7 +2610,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137017 +# Total code points: 137019 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 8b939789a..234e0b7fa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-14, 22:51:08 GMT +# Date: 2024-11-15, 13:43:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1114,6 +1114,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1225,7 +1226,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815161 code points not listed here. +# The above property value applies to 815159 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index b49953af1..14a42b8ea 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-14, 22:51:09 GMT +# Date: 2024-11-15, 13:43:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1859,6 +1859,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE2 ; 0 # Po OLD CHINESE HOOK MARK 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2080,7 +2081,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821322 code points not listed here. +# The above property value applies to 821320 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 3ffccd30d..5e3c829cf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-14, 22:51:12 GMT +# Date: 2024-11-15, 13:43:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1896,6 +1896,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2125,7 +2126,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760840 code points not listed here. +# The above property value applies to 760838 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 817a91f0c..2df83bcc1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-14, 22:51:13 GMT +# Date: 2024-11-15, 13:43:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -579,7 +579,7 @@ FFFE..FFFF ; Cn # [2] .. 16F88..16F8E ; Cn # [7] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. -16FF2..16FFF ; Cn # [14] .. +16FF4..16FFF ; Cn # [12] .. 187F8..187FF ; Cn # [8] .. 18CD6..18CFE ; Cn # [41] .. 18D09..1AFEF ; Cn # [8935] .. @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819274 +# Total code points: 819272 # ================================================ @@ -2174,6 +2174,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; Lm # [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Lm # OLD CHINESE ITERATION MARK +16FF2..16FF3 ; Lm # [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2182,7 +2183,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 407 +# Total code points: 409 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 07f83f85d..30f586a5a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-14, 22:51:14 GMT +# Date: 2024-11-15, 13:43:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757394 code points not listed here. -# Total code points: 894862 +# The above property value applies to 757392 code points not listed here. +# Total code points: 894860 # ================================================ @@ -357,9 +357,10 @@ FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KA 16FE0..16FE1 ; NS # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE2 ; NS # Po OLD CHINESE HOOK MARK 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1F679..1F67B ; NS # So [3] HEAVY INTERROBANG ORNAMENT..HEAVY SANS-SERIF INTERROBANG ORNAMENT -# Total code points: 35 +# Total code points: 37 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 3139aa645..391287961 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-14, 22:51:15 GMT +# Date: 2024-11-15, 13:43:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37210,6 +37210,8 @@ FFFD ; REPLACEMENT CHARACTER 16FE4 ; KHITAN SMALL SCRIPT FILLER 16FF0 ; VIETNAMESE ALTERNATE READING MARK CA 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY +16FF2 ; CHINESE SMALL SIMPLIFIED ER +16FF3 ; CHINESE SMALL TRADITIONAL ER 17000..187F7 ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 @@ -45626,6 +45628,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155257 +# Total code points: 155259 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 4e4685029..51d2d14ce 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -722,7 +722,7 @@ Let $nonLowercaseSmallLetters := [ \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ] -Let $nonLowercaseSmallModifierLetters := [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] +Let $nonLowercaseSmallModifierLetters := [ \p{gc=Lm} & \p{name=/^(ARABIC|CHINESE) SMALL/} ] [ \p{name=/\bSMALL LETTER\b/}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} [ [\p{gc=Lm} & \p{name=/SMALL/}] - $nonLowercaseSmallModifierLetters ] ⊆ \p{Lowercase} @@ -1202,7 +1202,11 @@ $punct ⊇ [[\u0021-\u007E] - [0-9 A-Z a-z]] # The Khitan Small Script filler is a Nonspacing Mark. # The other characters are numerals (the Hangzhou ten through thirty are compatibility decomposable, # but not the one through nine) and have Script=Han. -Let $NonOtherLetterIdeographs := [\N{KHITAN SMALL SCRIPT FILLER} 〇 〡-〩 〸-〺] +Let $NonOtherLetterIdeographs := [ + \N{KHITAN SMALL SCRIPT FILLER} + 〇 〡-〩 〸-〺 + \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} +] $NonOtherLetterIdeographs = [\p{Ideographic} - \p{gc=Lo}] # Ideographic closing mark, gc=Lo. Let $CommonIdeographs := [〆] From 0bdae18de018a6ec3af37e087f964075ae2c08a4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 16:30:33 +0100 Subject: [PATCH 29/38] =?UTF-8?q?=E6=8F=9A=E7=90=B4=20slow=202..4=20(#782)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt lines from L2/24-071R * lb=ID like the numerals per the proposal * Han like the numerals per the proposal * Regenerate UCD * Ideographic like the numerals * Regenerate UCD * Is Lo really appropriate here? * Rename them and make them Nl * Regenerate UCD * Revert the invariant test change * fix the invariant * 死馬 * Regenerate UCD * halb --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +++--- .../data/ucd/dev/DerivedCoreProperties.txt | 20 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 5 +++-- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +++ .../data/ucd/dev/VerticalOrientation.txt | 5 +++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 +++-- .../dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 7 +++++-- .../ucd/dev/extracted/DerivedNumericType.txt | 5 +++-- .../dev/extracted/DerivedNumericValues.txt | 11 ++++++---- .../unicode/text/UCD/UnicodeInvariantTest.txt | 11 +++++----- 18 files changed, 75 insertions(+), 47 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index aa7d3e957..65de50da3 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 13:43:01 GMT +# Date: 2024-11-15, 15:06:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2091,8 +2091,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY -16FF2..16FF3 ; 17.0 # [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS -# Total code points: 261 +# Total code points: 264 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index bd4be2002..94bb32a96 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 13:43:27 GMT +# Date: 2024-11-15, 15:06:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1336,6 +1336,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1457,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142935 +# Total code points: 142938 # ================================================ @@ -6907,6 +6908,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -7017,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141434 +# Total code points: 141437 # ================================================ @@ -8297,6 +8299,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -8445,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144767 +# Total code points: 144770 # ================================================ @@ -9122,6 +9125,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -9232,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141411 +# Total code points: 141414 # ================================================ @@ -10513,6 +10517,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -10661,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144748 +# Total code points: 144751 # ================================================ @@ -12734,6 +12739,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -12939,7 +12945,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152953 +# Total code points: 152956 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index afa21d0f9..307e7abf1 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 13:43:35 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2383,6 +2383,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 6ee8d51fa..a856d05be 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 13:43:36 GMT +# Date: 2024-11-15, 15:06:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3294,6 +3294,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e712b4c46..bc5673c73 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 13:43:48 GMT +# Date: 2024-11-15, 15:06:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -879,6 +879,7 @@ F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COM FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -893,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106479 +# Total code points: 106482 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f0140c396..9e998924e 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 13:44:08 GMT +# Date: 2024-11-15, 15:07:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1598,6 +1598,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 16FE3 ; Han # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Han # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Han # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Han # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1608,7 +1609,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 99032 +# Total code points: 99035 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index cb5139da8..966ba1ba5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30541,6 +30541,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 16FF2;CHINESE SMALL SIMPLIFIED ER;Lm;0;L;;;;;N;;;;; 16FF3;CHINESE SMALL TRADITIONAL ER;Lm;0;L;;;;;N;;;;; +16FF4;YANGQIN SIGN SLOW ONE BEAT;Nl;0;L;;;;1;N;;;;; +16FF5;YANGQIN SIGN SLOW THREE HALF BEATS;Nl;0;L;;;;3/2;N;;;;; +16FF6;YANGQIN SIGN SLOW TWO BEATS;Nl;0;L;;;;2;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; 187F7;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 99c55523f..4edfd8b9c 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 13:44:11 GMT +# Date: 2024-11-15, 15:07:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2212,7 +2212,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; U # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER -16FF4..16FFF ; U # Cn [12] .. +16FF4..16FF6 ; U # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS +16FF7..16FFF ; U # Cn [9] .. 17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 187F8..187FF ; U # Cn [8] .. 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index cee1f823c..f2cdd7c5a 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 13:44:09 GMT +# Date: 2024-11-15, 15:07:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2535,6 +2535,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2610,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137019 +# Total code points: 137022 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 234e0b7fa..4c52a387e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 13:43:25 GMT +# Date: 2024-11-15, 15:06:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1115,6 +1115,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1226,7 +1227,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815159 code points not listed here. +# The above property value applies to 815156 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 14a42b8ea..c4c01ffa0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 13:43:27 GMT +# Date: 2024-11-15, 15:06:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1860,6 +1860,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2081,7 +2082,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821320 code points not listed here. +# The above property value applies to 821317 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 5e3c829cf..c69b23b4b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 13:43:29 GMT +# Date: 2024-11-15, 15:06:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1897,6 +1897,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2126,7 +2127,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760838 code points not listed here. +# The above property value applies to 760835 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2df83bcc1..d1fa21d11 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 13:43:30 GMT +# Date: 2024-11-15, 15:06:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -579,7 +579,7 @@ FFFE..FFFF ; Cn # [2] .. 16F88..16F8E ; Cn # [7] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. -16FF4..16FFF ; Cn # [12] .. +16FF7..16FFF ; Cn # [9] .. 187F8..187FF ; Cn # [8] .. 18CD6..18CFE ; Cn # [41] .. 18D09..1AFEF ; Cn # [8935] .. @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819272 +# Total code points: 819269 # ================================================ @@ -3405,8 +3405,9 @@ A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM 1034A ; Nl # GOTHIC LETTER NINE HUNDRED 103D1..103D5 ; Nl # [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 12400..1246E ; Nl # [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +16FF4..16FF6 ; Nl # [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -# Total code points: 236 +# Total code points: 239 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 30f586a5a..1af810dfe 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 13:43:32 GMT +# Date: 2024-11-15, 15:06:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757392 code points not listed here. -# Total code points: 894860 +# The above property value applies to 757389 code points not listed here. +# Total code points: 894857 # ================================================ @@ -1776,6 +1776,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 @@ -1867,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172421 +# Total code points: 172424 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 391287961..165570f71 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 13:43:32 GMT +# Date: 2024-11-15, 15:06:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37212,6 +37212,9 @@ FFFD ; REPLACEMENT CHARACTER 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY 16FF2 ; CHINESE SMALL SIMPLIFIED ER 16FF3 ; CHINESE SMALL TRADITIONAL ER +16FF4 ; YANGQIN SIGN SLOW ONE BEAT +16FF5 ; YANGQIN SIGN SLOW THREE HALF BEATS +16FF6 ; YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 @@ -45628,6 +45631,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155259 +# Total code points: 155262 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 74f42dda2..c66993789 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ # DerivedNumericType-17.0.0.txt -# Date: 2024-11-14, 22:51:17 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -165,6 +165,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 12400..1246E ; Numeric # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 16B5B..16B61 ; Numeric # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16E80..16E96 ; Numeric # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16FF4..16FF6 ; Numeric # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1D2C0..1D2D3 ; Numeric # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Numeric # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; Numeric # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -192,7 +193,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 1114 +# Total code points: 1117 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index 8ca76d328..93c6b84e3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ # DerivedNumericValues-17.0.0.txt -# Date: 2024-11-14, 22:51:17 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -548,6 +548,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 16DA1 ; 1.0 ; ; 1 # Nd CHISOI DIGIT ONE 16E81 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE 16E94 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE ALTERNATE FORM +16FF4 ; 1.0 ; ; 1 # Nl YANGQIN SIGN SLOW ONE BEAT 1CCF1 ; 1.0 ; ; 1 # Nd OUTLINED DIGIT ONE 1D2C1 ; 1.0 ; ; 1 # No KAKTOVIK NUMERAL ONE 1D2E1 ; 1.0 ; ; 1 # No MAYAN NUMERAL ONE @@ -573,13 +574,14 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 154 +# Total code points: 155 # ================================================ 0F2B ; 1.5 ; ; 3/2 # No TIBETAN DIGIT HALF TWO +16FF5 ; 1.5 ; ; 3/2 # Nl YANGQIN SIGN SLOW THREE HALF BEATS -# Total code points: 1 +# Total code points: 2 # ================================================ @@ -712,6 +714,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 16DA2 ; 2.0 ; ; 2 # Nd CHISOI DIGIT TWO 16E82 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO 16E95 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO ALTERNATE FORM +16FF6 ; 2.0 ; ; 2 # Nl YANGQIN SIGN SLOW TWO BEATS 1CCF2 ; 2.0 ; ; 2 # Nd OUTLINED DIGIT TWO 1D2C2 ; 2.0 ; ; 2 # No KAKTOVIK NUMERAL TWO 1D2E2 ; 2.0 ; ; 2 # No MAYAN NUMERAL TWO @@ -737,7 +740,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 156 +# Total code points: 157 # ================================================ diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 51d2d14ce..b45806a9f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -1202,12 +1202,13 @@ $punct ⊇ [[\u0021-\u007E] - [0-9 A-Z a-z]] # The Khitan Small Script filler is a Nonspacing Mark. # The other characters are numerals (the Hangzhou ten through thirty are compatibility decomposable, # but not the one through nine) and have Script=Han. -Let $NonOtherLetterIdeographs := [ - \N{KHITAN SMALL SCRIPT FILLER} - 〇 〡-〩 〸-〺 - \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} +Let $NonOtherLetterIdeographs := [\p{Ideographic} - \p{gc=Lo}] +$NonOtherLetterIdeographs = [ + \N{KHITAN SMALL SCRIPT FILLER} + 〇 〡-〩 〸-〺 + \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} + \p{Name=/^YANGQIN SIGN SLOW (ONE|THREE HALF|TWO) BEATS?$/} ] -$NonOtherLetterIdeographs = [\p{Ideographic} - \p{gc=Lo}] # Ideographic closing mark, gc=Lo. Let $CommonIdeographs := [〆] $CommonIdeographs = [\p{Ideographic} & \p{sc=Common}] From e9cb07502ef4fa730eefbf507272bf1264fc60f8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 16:46:06 +0100 Subject: [PATCH 30/38] 2 Tangut components and 28 Tangut ideographs (#578) * UnicodeData.txt and hardcoded ranges from L2/23-246 * LineBreak.txt * new block * Regenerate UCD * GenerateEnums * EAW=W * Regenerate UCD * SUP * script * Regenerate UCD * Do not break UnicodeData ranges * Regenerate UCD * sources * Tangut Components Supplement to vo=U * Regenerate UCD * Ideographic * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 9 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 50 ++++++++-------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 ++- unicodetools/data/ucd/dev/LineBreak.txt | 7 ++- unicodetools/data/ucd/dev/PropList.txt | 10 ++-- .../data/ucd/dev/PropertyValueAliases.txt | 3 +- unicodetools/data/ucd/dev/Scripts.txt | 10 ++-- unicodetools/data/ucd/dev/TangutSources.txt | 58 +++++++++++++++++++ unicodetools/data/ucd/dev/UnicodeData.txt | 6 +- .../data/ucd/dev/VerticalOrientation.txt | 12 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 10 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++-- .../dev/extracted/DerivedCombiningClass.txt | 10 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 14 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 16 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 ++--- .../data/ucd/dev/extracted/DerivedName.txt | 10 ++-- .../org/unicode/props/UcdPropertyValues.java | 1 + .../unicode/text/UCD/MakeUnicodeFiles.java | 1 + .../main/java/org/unicode/text/UCD/UCD.java | 10 ++++ .../java/org/unicode/text/UCD/UCD_Types.java | 6 ++ .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 1 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 24 files changed, 184 insertions(+), 93 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 4101b2488..ff1048e3e 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -315,6 +315,7 @@ FFF0..FFFF; Specials 18800..18AFF; Tangut Components 18B00..18CFF; Khitan Small Script 18D00..18D7F; Tangut Supplement +18D80..18DFF; Tangut Components Supplement 1AFF0..1AFFF; Kana Extended-B 1B000..1B0FF; Kana Supplement 1B100..1B12F; Kana Extended-A diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 65de50da3..4f59a0c5e 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 15:06:00 GMT +# Date: 2024-11-15, 15:32:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2041,7 +2041,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 13460..143FA ; 16.0 # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE +187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF +18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE @@ -2057,7 +2059,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5185 +# Total code points: 5213 # ================================================ @@ -2092,7 +2094,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS +18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 264 +# Total code points: 266 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 94bb32a96..7f8bb3478 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 15:06:24 GMT +# Date: 2024-11-15, 15:32:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1337,9 +1337,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Alphabetic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1458,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142938 +# Total code points: 142968 # ================================================ @@ -6909,9 +6909,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; ID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -7019,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141437 +# Total code points: 141467 # ================================================ @@ -8300,9 +8300,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; ID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8448,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144770 +# Total code points: 144800 # ================================================ @@ -9126,9 +9126,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; XID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -9236,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141414 +# Total code points: 141444 # ================================================ @@ -10518,9 +10518,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; XID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10666,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144751 +# Total code points: 144781 # ================================================ @@ -12740,9 +12740,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Grapheme_Base # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12945,7 +12945,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152956 +# Total code points: 152986 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 307e7abf1..19ce796cb 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:06:30 GMT +# Date: 2024-11-15, 15:32:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2384,11 +2384,12 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index a856d05be..dac04fee7 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 15:06:31 GMT +# Date: 2024-11-15, 15:32:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3295,11 +3295,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index bc5673c73..e43a9d672 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 15:06:43 GMT +# Date: 2024-11-15, 15:33:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -880,9 +880,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -894,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106482 +# Total code points: 106512 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 224a2e06c..1b2611436 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-14, 22:51:32 GMT +# Date: 2024-11-15, 15:33:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -467,6 +467,7 @@ blk; Tamil_Sup ; Tamil_Supplement blk; Tangsa ; Tangsa blk; Tangut ; Tangut blk; Tangut_Components ; Tangut_Components +blk; Tangut_Components_Sup ; Tangut_Components_Supplement blk; Tangut_Sup ; Tangut_Supplement blk; Telugu ; Telugu blk; Thaana ; Thaana diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 9e998924e..3c8922f9d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 15:07:02 GMT +# Date: 2024-11-15, 15:33:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2768,11 +2768,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 16FE0 ; Tangut # Lm TANGUT ITERATION MARK -17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6914 +# Total code points: 6944 # ================================================ diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index 9c888daeb..ad9bc8c48 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -51,6 +51,8 @@ # [Grammar of the Tangut Language]. Moscow, 1968. # UTN42 = Andrew West and Viacheslav Zaytsev, Tangut Character Additions and Glyph Corrections, # Unicode Technical Note #42. 2019-12-21. +# N5217 = Andrew West, Proposal to encode 2 Tangut components and 28 Tangut ideographs, +# WG2 N5217 = L2/23-149. 2023-10-02. # # For more information, see Section 18.11, Tangut, of the # core specification. @@ -12327,6 +12329,22 @@ U+187F6 kTGT_MergedSrc UTN42-010 U+187F6 kRSTUnicode 79.14 U+187F7 kTGT_MergedSrc UTN42-011 U+187F7 kRSTUnicode 79.19 +U+187F8 kTGT_MergedSrc N5217-01 +U+187F8 kRSTUnicode 206.19 +U+187F9 kTGT_MergedSrc N5217-02 +U+187F9 kRSTUnicode 267.9 +U+187FA kTGT_MergedSrc N5217-03 +U+187FA kRSTUnicode 75.19 +U+187FB kTGT_MergedSrc N5217-04 +U+187FB kRSTUnicode 490.12 +U+187FC kTGT_MergedSrc N5217-05 +U+187FC kRSTUnicode 114.9 +U+187FD kTGT_MergedSrc N5217-06 +U+187FD kRSTUnicode 217.15 +U+187FE kTGT_MergedSrc N5217-07 +U+187FE kRSTUnicode 519.13 +U+187FF kTGT_MergedSrc N5217-08 +U+187FF kRSTUnicode 736.18 U+18D00 kTGT_MergedSrc L2008-3489 U+18D00 kRSTUnicode 17.7 U+18D01 kTGT_MergedSrc L2008-1667 @@ -12345,5 +12363,45 @@ U+18D07 kTGT_MergedSrc L2008-1106 U+18D07 kRSTUnicode 485.12 U+18D08 kTGT_MergedSrc L2008-4456 U+18D08 kRSTUnicode 674.14 +U+18D09 kTGT_MergedSrc N5217-09 +U+18D09 kRSTUnicode 590.14 +U+18D0A kTGT_MergedSrc N5217-10 +U+18D0A kRSTUnicode 267.18 +U+18D0B kTGT_MergedSrc N5217-11 +U+18D0B kRSTUnicode 267.10 +U+18D0C kTGT_MergedSrc N5217-12 +U+18D0C kRSTUnicode 655.13 +U+18D0D kTGT_MergedSrc N5217-13 +U+18D0D kRSTUnicode 456.14 +U+18D0E kTGT_MergedSrc N5217-14 +U+18D0E kRSTUnicode 273.9 +U+18D0F kTGT_MergedSrc N5217-15 +U+18D0F kRSTUnicode 278.11 +U+18D10 kTGT_MergedSrc N5217-16 +U+18D10 kRSTUnicode 106.13 +U+18D11 kTGT_MergedSrc N5217-17 +U+18D11 kRSTUnicode 75.8 +U+18D12 kTGT_MergedSrc N5217-18 +U+18D12 kRSTUnicode 17.7 +U+18D13 kTGT_MergedSrc N5217-19 +U+18D13 kRSTUnicode 106.15 +U+18D14 kTGT_MergedSrc N5217-20 +U+18D14 kRSTUnicode 167.15 +U+18D15 kTGT_MergedSrc N5217-21 +U+18D15 kRSTUnicode 462.11 +U+18D16 kTGT_MergedSrc N5217-22 +U+18D16 kRSTUnicode 579.17 +U+18D17 kTGT_MergedSrc N5217-23 +U+18D17 kRSTUnicode 210.13 +U+18D18 kTGT_MergedSrc N5217-24 +U+18D18 kRSTUnicode 278.13 +U+18D19 kTGT_MergedSrc N5217-25 +U+18D19 kRSTUnicode 141.19 +U+18D1A kTGT_MergedSrc N5217-26 +U+18D1A kRSTUnicode 75.13 +U+18D1B kTGT_MergedSrc N5217-27 +U+18D1B kRSTUnicode 36.7 +U+18D1C kTGT_MergedSrc N5217-28 +U+18D1C kRSTUnicode 141.9 # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 966ba1ba5..9b4883e11 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30545,7 +30545,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF5;YANGQIN SIGN SLOW THREE HALF BEATS;Nl;0;L;;;;3/2;N;;;;; 16FF6;YANGQIN SIGN SLOW TWO BEATS;Nl;0;L;;;;2;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; +187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; 18802;TANGUT COMPONENT-003;Lo;0;L;;;;;N;;;;; @@ -31786,7 +31786,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D08;;Lo;0;L;;;;;N;;;;; +18D1C;;Lo;0;L;;;;;N;;;;; +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; 1AFF1;KATAKANA LETTER MINNAN TONE-3;Lm;0;L;;;;;N;;;;; 1AFF2;KATAKANA LETTER MINNAN TONE-4;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 4edfd8b9c..5bbf7de08 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 15:07:04 GMT +# Date: 2024-11-15, 15:33:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -46,6 +46,7 @@ # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF @@ -2214,14 +2215,15 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FF2..16FF3 ; U # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; U # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 16FF7..16FFF ; U # Cn [9] .. -17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -187F8..187FF ; U # Cn [8] .. +17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFE ; U # Cn [41] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D08 ; U # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -18D09..18D7F ; U # Cn [119] .. +18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D1D..18D7F ; U # Cn [99] .. +18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index f2cdd7c5a..fb85e384f 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 15:07:02 GMT +# Date: 2024-11-15, 15:33:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2536,9 +2536,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; OLetter # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2611,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137022 +# Total code points: 137052 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 4c52a387e..697a45bd1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 15:06:21 GMT +# Date: 2024-11-15, 15:32:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1116,9 +1116,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; L # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1227,7 +1227,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815156 code points not listed here. +# The above property value applies to 815126 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c4c01ffa0..46158468b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 15:06:23 GMT +# Date: 2024-11-15, 15:32:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1861,9 +1861,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; 0 # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2082,7 +2082,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821317 code points not listed here. +# The above property value applies to 821287 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c69b23b4b..3980c0bd6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:06:25 GMT +# Date: 2024-11-15, 15:32:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2127,8 +2127,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760835 code points not listed here. -# Total code points: 792420 +# The above property value applies to 760805 code points not listed here. +# Total code points: 792390 # ================================================ @@ -2532,9 +2532,9 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; W # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; W # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; W # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2604,7 +2604,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182615 +# Total code points: 182645 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index d1fa21d11..43491004f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 15:06:26 GMT +# Date: 2024-11-15, 15:32:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -580,9 +580,9 @@ FFFE..FFFF ; Cn # [2] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. 16FF7..16FFF ; Cn # [9] .. -187F8..187FF ; Cn # [8] .. 18CD6..18CFE ; Cn # [41] .. -18D09..1AFEF ; Cn # [8935] .. +18D1D..18D7F ; Cn # [99] .. +18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819269 +# Total code points: 819239 # ================================================ @@ -2656,9 +2656,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16D99..16D9C ; Lo # [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION -17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D08 ; Lo # [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D1C ; Lo # [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2725,7 +2725,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136584 +# Total code points: 136614 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 1af810dfe..9c3549d4d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 15:06:27 GMT +# Date: 2024-11-15, 15:32:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757389 code points not listed here. -# Total code points: 894857 +# The above property value applies to 757359 code points not listed here. +# Total code points: 894827 # ================================================ @@ -1777,9 +1777,9 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1868,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172424 +# Total code points: 172454 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 165570f71..e4d2fbd56 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 15:06:28 GMT +# Date: 2024-11-15, 15:32:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37215,7 +37215,7 @@ FFFD ; REPLACEMENT CHARACTER 16FF4 ; YANGQIN SIGN SLOW ONE BEAT 16FF5 ; YANGQIN SIGN SLOW THREE HALF BEATS 16FF6 ; YANGQIN SIGN SLOW TWO BEATS -17000..187F7 ; TANGUT IDEOGRAPH-* +17000..187FF ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 18802 ; TANGUT COMPONENT-003 @@ -37986,7 +37986,9 @@ FFFD ; REPLACEMENT CHARACTER 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D08 ; TANGUT IDEOGRAPH-* +18D00..18D1C ; TANGUT IDEOGRAPH-* +18D80 ; TANGUT COMPONENT-769 +18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 1AFF2 ; KATAKANA LETTER MINNAN TONE-4 @@ -45631,6 +45633,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155262 +# Total code points: 155292 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 850b02979..4ec9928b4 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -492,6 +492,7 @@ public enum Block_Values implements Named { Tangsa("Tangsa"), Tangut("Tangut"), Tangut_Components("Tangut_Components"), + Tangut_Components_Supplement("Tangut_Components_Sup"), Tangut_Supplement("Tangut_Sup"), Telugu("Telugu"), Thaana("Thaana"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index 7fea779e6..a3ea48f26 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -724,6 +724,7 @@ private static void generateUnicodeData(String filename) throws IOException { final BagFormatter bf = new BagFormatter(); bf.setHexValue(false) .setMergeRanges(true) + .setRangeBreakSource(null) .setNoSpacesBeforeSemicolon() .setMinSpacesAfterSemicolon(0) .setUnicodeDataStyleRanges(true) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index ec0abca40..a3ae7da1d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1309,6 +1309,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + return TANGUT_BASE; + } } if (rCompositeVersion >= 0xd0000) { @@ -1321,6 +1326,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + return TANGUT_SUP_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 80f60dcd6..e486c0993 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -28,10 +28,16 @@ public interface UCD_Types { // Unicode 12: // 17000;;Lo;0;L;;;;;N;;;;; // 187F7;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 17000;;Lo;0;L;;;;;N;;;;; + // 187FF;;Lo;0;L;;;;;N;;;;; public static final int TANGUT_SUP_BASE = 0x18D00; // Unicode 13: // 18D00;;Lo;0;L;;;;;N;;;;; // 18D08;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 18D00;;Lo;0;L;;;;;N;;;;; + // 18D1C;;Lo;0;L;;;;;N;;;;; public static final int // 4E00;;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 357d1bf6d..9ba202a24 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -873,6 +873,7 @@ Format: kenFile skipValue=Rotated # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 956790f7a..8d596b081 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -312,6 +312,7 @@ Tamil ; Tamil Tamil_Sup ; Tamil_Supplement Tangut ; Tangut Tangut_Components ; Tangut_Components +Tangut_Components_Sup ; Tangut_Components_Supplement Tangut_Sup ; Tangut_Supplement Telugu ; Telugu Thaana ; Thaana From 46bb6d231b906e06c8ab287a5a8f3d142c88194d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 17:10:15 +0100 Subject: [PATCH 31/38] Two Tangut ideographs (#951) * UnicodeData.txt and hardcoded ranges from L2/23-246 * LineBreak.txt * new block * Regenerate UCD * GenerateEnums * EAW=W * Regenerate UCD * SUP * script * Regenerate UCD * Do not break UnicodeData ranges * Regenerate UCD * sources * Tangut Components Supplement to vo=U * Regenerate UCD * Ideographic * Regenerate UCD * Bump the upper bound of the Tangut Supplement * Regenerate UCD * TangutSources.txt lines from the proposal * Document the source * A test * The hobgoblin of little minds --- unicodetools/data/ucd/dev/DerivedAge.txt | 10 +++---- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- .../data/ucd/dev/PropertyValueAliases.txt | 2 +- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- unicodetools/data/ucd/dev/TangutSources.txt | 11 +++++++- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- .../data/ucd/dev/VerticalOrientation.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++--- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 10 +++---- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++--- .../main/java/org/unicode/text/UCD/UCD.java | 14 +++++----- .../TODO-MISSING-RMG-ISSUE.txt | 18 +++++++++++++ 19 files changed, 96 insertions(+), 67 deletions(-) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 4f59a0c5e..1924ef93d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 15:32:19 GMT +# Date: 2024-11-15, 15:57:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2041,9 +2041,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 13460..143FA ; 16.0 # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE -187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF -18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE @@ -2059,7 +2057,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5213 +# Total code points: 5185 # ================================================ @@ -2094,8 +2092,10 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS +187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF +18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 266 +# Total code points: 296 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 7f8bb3478..8ae667717 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 15:32:46 GMT +# Date: 2024-11-15, 15:57:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1338,7 +1338,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Alphabetic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1458,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142968 +# Total code points: 142970 # ================================================ @@ -6910,7 +6910,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; ID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -7019,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141467 +# Total code points: 141469 # ================================================ @@ -8301,7 +8301,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; ID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -8448,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144800 +# Total code points: 144802 # ================================================ @@ -9127,7 +9127,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; XID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -9236,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141444 +# Total code points: 141446 # ================================================ @@ -10519,7 +10519,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; XID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -10666,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144781 +# Total code points: 144783 # ================================================ @@ -12741,7 +12741,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Grapheme_Base # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Grapheme_Base # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -12945,7 +12945,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152986 +# Total code points: 152988 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 19ce796cb..c4ec8dd03 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:32:54 GMT +# Date: 2024-11-15, 15:57:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2388,7 +2388,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; W # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index dac04fee7..3645e8540 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 15:32:55 GMT +# Date: 2024-11-15, 15:57:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3299,7 +3299,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e43a9d672..a9eb0c5ab 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 15:33:06 GMT +# Date: 2024-11-15, 15:58:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -881,7 +881,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF @@ -894,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106512 +# Total code points: 106514 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 1b2611436..03d7a8b30 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-15, 15:33:08 GMT +# Date: 2024-11-15, 15:58:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 3c8922f9d..0b3d28238 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 15:33:26 GMT +# Date: 2024-11-15, 15:58:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2769,10 +2769,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; Tangut # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6944 +# Total code points: 6946 # ================================================ diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index ad9bc8c48..401b8d8bc 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -34,7 +34,9 @@ # # The following abbreviations are used in kTGT_MergedSrc: # -# H2004 = Hán Xiǎománg (韓小忙), 西夏文正字研究 (Xīxiàwén Zhèngzì Yánjiū) +# H2021 = Hán Xiǎománg (韩小忙), 西夏文词典: 世俗文献部分 (Xīxiàwén Cídiǎn: Shìsú Wénxiàn Bùfēn) +# [Tangut Word Dictionary: Secular Literature Part, 9 vols.]. 2021. +# H2004 = Hán Xiǎománg (韩小忙), 西夏文正字研究 (Xīxiàwén Zhèngzì Yánjiū) # [Research into the Correct Forms of Tangut Characters]. 2004. # L1986 = Lǐ Fànwén (李範文), 同音研究 (Tóngyīn Yánjiū) # [Study of the Homophones]. Yinchuan. 1986 @@ -12403,5 +12405,12 @@ U+18D1B kTGT_MergedSrc N5217-27 U+18D1B kRSTUnicode 36.7 U+18D1C kTGT_MergedSrc N5217-28 U+18D1C kRSTUnicode 141.9 +<<<<<<< HEAD +U+18D1D kTGT_MergedSrc H2021-309801 +U+18D1D kRSTUnicode 106.13 +U+18D1E kTGT_MergedSrc H2021-834001 +U+18D1E kRSTUnicode 579.14 +======= +>>>>>>> la-vache/main # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 9b4883e11..e69059b21 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -31786,7 +31786,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D1C;;Lo;0;L;;;;;N;;;;; +18D1E;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 5bbf7de08..6e52d34f5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 15:33:29 GMT +# Date: 2024-11-15, 15:58:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2220,8 +2220,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFE ; U # Cn [41] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -18D1D..18D7F ; U # Cn [99] .. +18D00..18D1E ; U # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D1F..18D7F ; U # Cn [97] .. 18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index fb85e384f..b42d7d2a2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 15:33:27 GMT +# Date: 2024-11-15, 15:58:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2537,7 +2537,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; OLetter # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2611,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137052 +# Total code points: 137054 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 697a45bd1..671fe74ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 15:32:43 GMT +# Date: 2024-11-15, 15:57:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1117,7 +1117,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; L # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1227,7 +1227,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815126 code points not listed here. +# The above property value applies to 815124 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 46158468b..17176ce34 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 15:32:46 GMT +# Date: 2024-11-15, 15:57:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1862,7 +1862,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; 0 # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2082,7 +2082,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821287 code points not listed here. +# The above property value applies to 821285 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 3980c0bd6..2216c60d6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:32:48 GMT +# Date: 2024-11-15, 15:57:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2127,8 +2127,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760805 code points not listed here. -# Total code points: 792390 +# The above property value applies to 760803 code points not listed here. +# Total code points: 792388 # ================================================ @@ -2533,7 +2533,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; W # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; W # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2604,7 +2604,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182645 +# Total code points: 182647 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 43491004f..caa0df6ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 15:32:49 GMT +# Date: 2024-11-15, 15:57:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -581,7 +581,7 @@ FFFE..FFFF ; Cn # [2] .. 16FE5..16FEF ; Cn # [11] .. 16FF7..16FFF ; Cn # [9] .. 18CD6..18CFE ; Cn # [41] .. -18D1D..18D7F ; Cn # [99] .. +18D1F..18D7F ; Cn # [97] .. 18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819239 +# Total code points: 819237 # ================================================ @@ -2657,7 +2657,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Lo # [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Lo # [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO @@ -2725,7 +2725,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136614 +# Total code points: 136616 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 9c3549d4d..4c63f1980 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 15:32:51 GMT +# Date: 2024-11-15, 15:57:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757359 code points not listed here. -# Total code points: 894827 +# The above property value applies to 757357 code points not listed here. +# Total code points: 894825 # ================================================ @@ -1778,7 +1778,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB @@ -1868,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172454 +# Total code points: 172456 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index e4d2fbd56..4b1c2cfd9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 15:32:51 GMT +# Date: 2024-11-15, 15:57:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37986,7 +37986,7 @@ FFFD ; REPLACEMENT CHARACTER 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D1C ; TANGUT IDEOGRAPH-* +18D00..18D1E ; TANGUT IDEOGRAPH-* 18D80 ; TANGUT COMPONENT-769 18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 @@ -45633,6 +45633,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155292 +# Total code points: 155294 # EOF diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index a3ae7da1d..22a7b0f12 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1309,9 +1309,8 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } - // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. - if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { - // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + if (ch <= 0x187FF && rCompositeVersion >= 0x110000) { + // Unicode 17 added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. return TANGUT_BASE; } } @@ -1326,9 +1325,12 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } - // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. - if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { - // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + if (ch <= 0x18D1C && rCompositeVersion >= 0x110000) { + // Unicode 17 added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + return TANGUT_SUP_BASE; + } + if (ch <= 0x18D1E && rCompositeVersion >= 0x110000) { + // Unicode 17 added TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E. return TANGUT_SUP_BASE; } } diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt new file mode 100644 index 000000000..21a9644c8 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt @@ -0,0 +1,18 @@ +# [Template for property comparison tests of character encoding proposals] +# [RMG ISSUE TITLE] +# https://github.com/unicode-org/utc-release-management/issues/[RMG ISSUE NUMBER] + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring kRSTUnicode kTGT_MergedSrc: +Propertywise [\x{18D00}\x{18D1D}\x{18D1E}] AreAlike +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file From c1857c5d063b2615a8bf53150ee0e3180e0b7f87 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 17:52:00 +0100 Subject: [PATCH 32/38] =?UTF-8?q?114=E2=88=921=3D113=20Tangut=20components?= =?UTF-8?q?=20(#579)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt from L2/23-247 * LB=ID * new block * Regenerate UCD * GenerateEnums * EAW=W * Regenerate UCD * Scripts.txt * Regenerate UCD * Not the unassigned ones * Regenerate UCD * Tangut Components Supplement to vo=U * Regenerate UCD * Drop U+18D8E and shift and rename U+18D8F..U+18DF3; as far as UCD is concerned, that means drop U+18DF3. * Regenerate UCD * remove stray EAW line * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +- .../data/ucd/dev/DerivedCoreProperties.txt | 26 ++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +- unicodetools/data/ucd/dev/LineBreak.txt | 4 +- unicodetools/data/ucd/dev/PropList.txt | 6 +- .../data/ucd/dev/PropertyValueAliases.txt | 2 +- unicodetools/data/ucd/dev/Scripts.txt | 6 +- unicodetools/data/ucd/dev/UnicodeData.txt | 113 +++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 6 +- .../dev/auxiliary/SentenceBreakProperty.txt | 6 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 +- .../dev/extracted/DerivedCombiningClass.txt | 6 +- .../dev/extracted/DerivedEastAsianWidth.txt | 10 +- .../dev/extracted/DerivedGeneralCategory.txt | 10 +- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +- .../data/ucd/dev/extracted/DerivedName.txt | 117 +++++++++++++++++- 16 files changed, 282 insertions(+), 56 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 1924ef93d..bd27220a9 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 15:57:18 GMT +# Date: 2024-11-15, 16:13:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2094,8 +2094,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS 187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 -# Total code points: 296 +# Total code points: 409 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 8ae667717..599f5faa7 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 15:57:42 GMT +# Date: 2024-11-15, 16:14:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1339,7 +1339,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; Alphabetic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1458,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142970 +# Total code points: 143083 # ================================================ @@ -6911,7 +6911,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; ID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -7019,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141469 +# Total code points: 141582 # ================================================ @@ -8302,7 +8302,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; ID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8448,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144802 +# Total code points: 144915 # ================================================ @@ -9128,7 +9128,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; XID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -9236,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141446 +# Total code points: 141559 # ================================================ @@ -10520,7 +10520,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; XID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10666,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144783 +# Total code points: 144896 # ================================================ @@ -12742,7 +12742,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Grapheme_Base # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; Grapheme_Base # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12945,7 +12945,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152988 +# Total code points: 153101 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index c4ec8dd03..a28270e6f 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:57:48 GMT +# Date: 2024-11-15, 16:14:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2389,7 +2389,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; W # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3645e8540..493a74d9d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 15:57:49 GMT +# Date: 2024-11-15, 16:14:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3300,7 +3300,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; ID # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index a9eb0c5ab..9e45465d3 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 15:58:01 GMT +# Date: 2024-11-15, 16:14:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -882,7 +882,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; Ideographic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -894,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106514 +# Total code points: 106627 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 03d7a8b30..8e291eb46 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-15, 15:58:03 GMT +# Date: 2024-11-15, 16:14:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 0b3d28238..e9c090a04 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 15:58:19 GMT +# Date: 2024-11-15, 16:14:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2770,9 +2770,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 18D00..18D1E ; Tangut # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; Tangut # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 -# Total code points: 6946 +# Total code points: 7059 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index e69059b21..289045350 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -31789,6 +31789,119 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18D1E;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; +18D82;TANGUT COMPONENT-771;Lo;0;L;;;;;N;;;;; +18D83;TANGUT COMPONENT-772;Lo;0;L;;;;;N;;;;; +18D84;TANGUT COMPONENT-773;Lo;0;L;;;;;N;;;;; +18D85;TANGUT COMPONENT-774;Lo;0;L;;;;;N;;;;; +18D86;TANGUT COMPONENT-775;Lo;0;L;;;;;N;;;;; +18D87;TANGUT COMPONENT-776;Lo;0;L;;;;;N;;;;; +18D88;TANGUT COMPONENT-777;Lo;0;L;;;;;N;;;;; +18D89;TANGUT COMPONENT-778;Lo;0;L;;;;;N;;;;; +18D8A;TANGUT COMPONENT-779;Lo;0;L;;;;;N;;;;; +18D8B;TANGUT COMPONENT-780;Lo;0;L;;;;;N;;;;; +18D8C;TANGUT COMPONENT-781;Lo;0;L;;;;;N;;;;; +18D8D;TANGUT COMPONENT-782;Lo;0;L;;;;;N;;;;; +18D8E;TANGUT COMPONENT-783;Lo;0;L;;;;;N;;;;; +18D8F;TANGUT COMPONENT-784;Lo;0;L;;;;;N;;;;; +18D90;TANGUT COMPONENT-785;Lo;0;L;;;;;N;;;;; +18D91;TANGUT COMPONENT-786;Lo;0;L;;;;;N;;;;; +18D92;TANGUT COMPONENT-787;Lo;0;L;;;;;N;;;;; +18D93;TANGUT COMPONENT-788;Lo;0;L;;;;;N;;;;; +18D94;TANGUT COMPONENT-789;Lo;0;L;;;;;N;;;;; +18D95;TANGUT COMPONENT-790;Lo;0;L;;;;;N;;;;; +18D96;TANGUT COMPONENT-791;Lo;0;L;;;;;N;;;;; +18D97;TANGUT COMPONENT-792;Lo;0;L;;;;;N;;;;; +18D98;TANGUT COMPONENT-793;Lo;0;L;;;;;N;;;;; +18D99;TANGUT COMPONENT-794;Lo;0;L;;;;;N;;;;; +18D9A;TANGUT COMPONENT-795;Lo;0;L;;;;;N;;;;; +18D9B;TANGUT COMPONENT-796;Lo;0;L;;;;;N;;;;; +18D9C;TANGUT COMPONENT-797;Lo;0;L;;;;;N;;;;; +18D9D;TANGUT COMPONENT-798;Lo;0;L;;;;;N;;;;; +18D9E;TANGUT COMPONENT-799;Lo;0;L;;;;;N;;;;; +18D9F;TANGUT COMPONENT-800;Lo;0;L;;;;;N;;;;; +18DA0;TANGUT COMPONENT-801;Lo;0;L;;;;;N;;;;; +18DA1;TANGUT COMPONENT-802;Lo;0;L;;;;;N;;;;; +18DA2;TANGUT COMPONENT-803;Lo;0;L;;;;;N;;;;; +18DA3;TANGUT COMPONENT-804;Lo;0;L;;;;;N;;;;; +18DA4;TANGUT COMPONENT-805;Lo;0;L;;;;;N;;;;; +18DA5;TANGUT COMPONENT-806;Lo;0;L;;;;;N;;;;; +18DA6;TANGUT COMPONENT-807;Lo;0;L;;;;;N;;;;; +18DA7;TANGUT COMPONENT-808;Lo;0;L;;;;;N;;;;; +18DA8;TANGUT COMPONENT-809;Lo;0;L;;;;;N;;;;; +18DA9;TANGUT COMPONENT-810;Lo;0;L;;;;;N;;;;; +18DAA;TANGUT COMPONENT-811;Lo;0;L;;;;;N;;;;; +18DAB;TANGUT COMPONENT-812;Lo;0;L;;;;;N;;;;; +18DAC;TANGUT COMPONENT-813;Lo;0;L;;;;;N;;;;; +18DAD;TANGUT COMPONENT-814;Lo;0;L;;;;;N;;;;; +18DAE;TANGUT COMPONENT-815;Lo;0;L;;;;;N;;;;; +18DAF;TANGUT COMPONENT-816;Lo;0;L;;;;;N;;;;; +18DB0;TANGUT COMPONENT-817;Lo;0;L;;;;;N;;;;; +18DB1;TANGUT COMPONENT-818;Lo;0;L;;;;;N;;;;; +18DB2;TANGUT COMPONENT-819;Lo;0;L;;;;;N;;;;; +18DB3;TANGUT COMPONENT-820;Lo;0;L;;;;;N;;;;; +18DB4;TANGUT COMPONENT-821;Lo;0;L;;;;;N;;;;; +18DB5;TANGUT COMPONENT-822;Lo;0;L;;;;;N;;;;; +18DB6;TANGUT COMPONENT-823;Lo;0;L;;;;;N;;;;; +18DB7;TANGUT COMPONENT-824;Lo;0;L;;;;;N;;;;; +18DB8;TANGUT COMPONENT-825;Lo;0;L;;;;;N;;;;; +18DB9;TANGUT COMPONENT-826;Lo;0;L;;;;;N;;;;; +18DBA;TANGUT COMPONENT-827;Lo;0;L;;;;;N;;;;; +18DBB;TANGUT COMPONENT-828;Lo;0;L;;;;;N;;;;; +18DBC;TANGUT COMPONENT-829;Lo;0;L;;;;;N;;;;; +18DBD;TANGUT COMPONENT-830;Lo;0;L;;;;;N;;;;; +18DBE;TANGUT COMPONENT-831;Lo;0;L;;;;;N;;;;; +18DBF;TANGUT COMPONENT-832;Lo;0;L;;;;;N;;;;; +18DC0;TANGUT COMPONENT-833;Lo;0;L;;;;;N;;;;; +18DC1;TANGUT COMPONENT-834;Lo;0;L;;;;;N;;;;; +18DC2;TANGUT COMPONENT-835;Lo;0;L;;;;;N;;;;; +18DC3;TANGUT COMPONENT-836;Lo;0;L;;;;;N;;;;; +18DC4;TANGUT COMPONENT-837;Lo;0;L;;;;;N;;;;; +18DC5;TANGUT COMPONENT-838;Lo;0;L;;;;;N;;;;; +18DC6;TANGUT COMPONENT-839;Lo;0;L;;;;;N;;;;; +18DC7;TANGUT COMPONENT-840;Lo;0;L;;;;;N;;;;; +18DC8;TANGUT COMPONENT-841;Lo;0;L;;;;;N;;;;; +18DC9;TANGUT COMPONENT-842;Lo;0;L;;;;;N;;;;; +18DCA;TANGUT COMPONENT-843;Lo;0;L;;;;;N;;;;; +18DCB;TANGUT COMPONENT-844;Lo;0;L;;;;;N;;;;; +18DCC;TANGUT COMPONENT-845;Lo;0;L;;;;;N;;;;; +18DCD;TANGUT COMPONENT-846;Lo;0;L;;;;;N;;;;; +18DCE;TANGUT COMPONENT-847;Lo;0;L;;;;;N;;;;; +18DCF;TANGUT COMPONENT-848;Lo;0;L;;;;;N;;;;; +18DD0;TANGUT COMPONENT-849;Lo;0;L;;;;;N;;;;; +18DD1;TANGUT COMPONENT-850;Lo;0;L;;;;;N;;;;; +18DD2;TANGUT COMPONENT-851;Lo;0;L;;;;;N;;;;; +18DD3;TANGUT COMPONENT-852;Lo;0;L;;;;;N;;;;; +18DD4;TANGUT COMPONENT-853;Lo;0;L;;;;;N;;;;; +18DD5;TANGUT COMPONENT-854;Lo;0;L;;;;;N;;;;; +18DD6;TANGUT COMPONENT-855;Lo;0;L;;;;;N;;;;; +18DD7;TANGUT COMPONENT-856;Lo;0;L;;;;;N;;;;; +18DD8;TANGUT COMPONENT-857;Lo;0;L;;;;;N;;;;; +18DD9;TANGUT COMPONENT-858;Lo;0;L;;;;;N;;;;; +18DDA;TANGUT COMPONENT-859;Lo;0;L;;;;;N;;;;; +18DDB;TANGUT COMPONENT-860;Lo;0;L;;;;;N;;;;; +18DDC;TANGUT COMPONENT-861;Lo;0;L;;;;;N;;;;; +18DDD;TANGUT COMPONENT-862;Lo;0;L;;;;;N;;;;; +18DDE;TANGUT COMPONENT-863;Lo;0;L;;;;;N;;;;; +18DDF;TANGUT COMPONENT-864;Lo;0;L;;;;;N;;;;; +18DE0;TANGUT COMPONENT-865;Lo;0;L;;;;;N;;;;; +18DE1;TANGUT COMPONENT-866;Lo;0;L;;;;;N;;;;; +18DE2;TANGUT COMPONENT-867;Lo;0;L;;;;;N;;;;; +18DE3;TANGUT COMPONENT-868;Lo;0;L;;;;;N;;;;; +18DE4;TANGUT COMPONENT-869;Lo;0;L;;;;;N;;;;; +18DE5;TANGUT COMPONENT-870;Lo;0;L;;;;;N;;;;; +18DE6;TANGUT COMPONENT-871;Lo;0;L;;;;;N;;;;; +18DE7;TANGUT COMPONENT-872;Lo;0;L;;;;;N;;;;; +18DE8;TANGUT COMPONENT-873;Lo;0;L;;;;;N;;;;; +18DE9;TANGUT COMPONENT-874;Lo;0;L;;;;;N;;;;; +18DEA;TANGUT COMPONENT-875;Lo;0;L;;;;;N;;;;; +18DEB;TANGUT COMPONENT-876;Lo;0;L;;;;;N;;;;; +18DEC;TANGUT COMPONENT-877;Lo;0;L;;;;;N;;;;; +18DED;TANGUT COMPONENT-878;Lo;0;L;;;;;N;;;;; +18DEE;TANGUT COMPONENT-879;Lo;0;L;;;;;N;;;;; +18DEF;TANGUT COMPONENT-880;Lo;0;L;;;;;N;;;;; +18DF0;TANGUT COMPONENT-881;Lo;0;L;;;;;N;;;;; +18DF1;TANGUT COMPONENT-882;Lo;0;L;;;;;N;;;;; +18DF2;TANGUT COMPONENT-883;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; 1AFF1;KATAKANA LETTER MINNAN TONE-3;Lm;0;L;;;;;N;;;;; 1AFF2;KATAKANA LETTER MINNAN TONE-4;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 6e52d34f5..5d0f5a803 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 15:58:22 GMT +# Date: 2024-11-15, 16:14:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2222,8 +2222,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; U # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D1F..18D7F ; U # Cn [97] .. -18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -18D82..18DFF ; U # Cn [126] .. +18D80..18DF2 ; U # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 +18DF3..18DFF ; U # Cn [13] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index b42d7d2a2..7521c2c87 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 15:58:20 GMT +# Date: 2024-11-15, 16:14:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2538,7 +2538,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; OLetter # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2611,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137054 +# Total code points: 137167 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 671fe74ae..a8e5d8077 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 15:57:39 GMT +# Date: 2024-11-15, 16:14:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1118,7 +1118,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; L # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1227,7 +1227,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815124 code points not listed here. +# The above property value applies to 815011 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 17176ce34..c7460e1f2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 15:57:41 GMT +# Date: 2024-11-15, 16:14:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1863,7 +1863,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; 0 # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2082,7 +2082,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821285 code points not listed here. +# The above property value applies to 821172 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 2216c60d6..f278e89de 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 15:57:44 GMT +# Date: 2024-11-15, 16:14:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2127,8 +2127,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760803 code points not listed here. -# Total code points: 792388 +# The above property value applies to 760690 code points not listed here. +# Total code points: 792275 # ================================================ @@ -2534,7 +2534,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; W # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2604,7 +2604,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182647 +# Total code points: 182760 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index caa0df6ae..c3d65f8ab 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 15:57:44 GMT +# Date: 2024-11-15, 16:14:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -582,7 +582,7 @@ FFFE..FFFF ; Cn # [2] .. 16FF7..16FFF ; Cn # [9] .. 18CD6..18CFE ; Cn # [41] .. 18D1F..18D7F ; Cn # [97] .. -18D82..1AFEF ; Cn # [8814] .. +18DF3..1AFEF ; Cn # [8701] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819237 +# Total code points: 819124 # ================================================ @@ -2658,7 +2658,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Lo # [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; Lo # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2725,7 +2725,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136616 +# Total code points: 136729 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 4c63f1980..49703d6fa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 15:57:46 GMT +# Date: 2024-11-15, 16:14:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757357 code points not listed here. -# Total code points: 894825 +# The above property value applies to 757244 code points not listed here. +# Total code points: 894712 # ================================================ @@ -1779,7 +1779,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E -18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DF2 ; ID # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1868,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172456 +# Total code points: 172569 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 4b1c2cfd9..0fac2108c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 15:57:46 GMT +# Date: 2024-11-15, 16:14:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37989,6 +37989,119 @@ FFFD ; REPLACEMENT CHARACTER 18D00..18D1E ; TANGUT IDEOGRAPH-* 18D80 ; TANGUT COMPONENT-769 18D81 ; TANGUT COMPONENT-770 +18D82 ; TANGUT COMPONENT-771 +18D83 ; TANGUT COMPONENT-772 +18D84 ; TANGUT COMPONENT-773 +18D85 ; TANGUT COMPONENT-774 +18D86 ; TANGUT COMPONENT-775 +18D87 ; TANGUT COMPONENT-776 +18D88 ; TANGUT COMPONENT-777 +18D89 ; TANGUT COMPONENT-778 +18D8A ; TANGUT COMPONENT-779 +18D8B ; TANGUT COMPONENT-780 +18D8C ; TANGUT COMPONENT-781 +18D8D ; TANGUT COMPONENT-782 +18D8E ; TANGUT COMPONENT-783 +18D8F ; TANGUT COMPONENT-784 +18D90 ; TANGUT COMPONENT-785 +18D91 ; TANGUT COMPONENT-786 +18D92 ; TANGUT COMPONENT-787 +18D93 ; TANGUT COMPONENT-788 +18D94 ; TANGUT COMPONENT-789 +18D95 ; TANGUT COMPONENT-790 +18D96 ; TANGUT COMPONENT-791 +18D97 ; TANGUT COMPONENT-792 +18D98 ; TANGUT COMPONENT-793 +18D99 ; TANGUT COMPONENT-794 +18D9A ; TANGUT COMPONENT-795 +18D9B ; TANGUT COMPONENT-796 +18D9C ; TANGUT COMPONENT-797 +18D9D ; TANGUT COMPONENT-798 +18D9E ; TANGUT COMPONENT-799 +18D9F ; TANGUT COMPONENT-800 +18DA0 ; TANGUT COMPONENT-801 +18DA1 ; TANGUT COMPONENT-802 +18DA2 ; TANGUT COMPONENT-803 +18DA3 ; TANGUT COMPONENT-804 +18DA4 ; TANGUT COMPONENT-805 +18DA5 ; TANGUT COMPONENT-806 +18DA6 ; TANGUT COMPONENT-807 +18DA7 ; TANGUT COMPONENT-808 +18DA8 ; TANGUT COMPONENT-809 +18DA9 ; TANGUT COMPONENT-810 +18DAA ; TANGUT COMPONENT-811 +18DAB ; TANGUT COMPONENT-812 +18DAC ; TANGUT COMPONENT-813 +18DAD ; TANGUT COMPONENT-814 +18DAE ; TANGUT COMPONENT-815 +18DAF ; TANGUT COMPONENT-816 +18DB0 ; TANGUT COMPONENT-817 +18DB1 ; TANGUT COMPONENT-818 +18DB2 ; TANGUT COMPONENT-819 +18DB3 ; TANGUT COMPONENT-820 +18DB4 ; TANGUT COMPONENT-821 +18DB5 ; TANGUT COMPONENT-822 +18DB6 ; TANGUT COMPONENT-823 +18DB7 ; TANGUT COMPONENT-824 +18DB8 ; TANGUT COMPONENT-825 +18DB9 ; TANGUT COMPONENT-826 +18DBA ; TANGUT COMPONENT-827 +18DBB ; TANGUT COMPONENT-828 +18DBC ; TANGUT COMPONENT-829 +18DBD ; TANGUT COMPONENT-830 +18DBE ; TANGUT COMPONENT-831 +18DBF ; TANGUT COMPONENT-832 +18DC0 ; TANGUT COMPONENT-833 +18DC1 ; TANGUT COMPONENT-834 +18DC2 ; TANGUT COMPONENT-835 +18DC3 ; TANGUT COMPONENT-836 +18DC4 ; TANGUT COMPONENT-837 +18DC5 ; TANGUT COMPONENT-838 +18DC6 ; TANGUT COMPONENT-839 +18DC7 ; TANGUT COMPONENT-840 +18DC8 ; TANGUT COMPONENT-841 +18DC9 ; TANGUT COMPONENT-842 +18DCA ; TANGUT COMPONENT-843 +18DCB ; TANGUT COMPONENT-844 +18DCC ; TANGUT COMPONENT-845 +18DCD ; TANGUT COMPONENT-846 +18DCE ; TANGUT COMPONENT-847 +18DCF ; TANGUT COMPONENT-848 +18DD0 ; TANGUT COMPONENT-849 +18DD1 ; TANGUT COMPONENT-850 +18DD2 ; TANGUT COMPONENT-851 +18DD3 ; TANGUT COMPONENT-852 +18DD4 ; TANGUT COMPONENT-853 +18DD5 ; TANGUT COMPONENT-854 +18DD6 ; TANGUT COMPONENT-855 +18DD7 ; TANGUT COMPONENT-856 +18DD8 ; TANGUT COMPONENT-857 +18DD9 ; TANGUT COMPONENT-858 +18DDA ; TANGUT COMPONENT-859 +18DDB ; TANGUT COMPONENT-860 +18DDC ; TANGUT COMPONENT-861 +18DDD ; TANGUT COMPONENT-862 +18DDE ; TANGUT COMPONENT-863 +18DDF ; TANGUT COMPONENT-864 +18DE0 ; TANGUT COMPONENT-865 +18DE1 ; TANGUT COMPONENT-866 +18DE2 ; TANGUT COMPONENT-867 +18DE3 ; TANGUT COMPONENT-868 +18DE4 ; TANGUT COMPONENT-869 +18DE5 ; TANGUT COMPONENT-870 +18DE6 ; TANGUT COMPONENT-871 +18DE7 ; TANGUT COMPONENT-872 +18DE8 ; TANGUT COMPONENT-873 +18DE9 ; TANGUT COMPONENT-874 +18DEA ; TANGUT COMPONENT-875 +18DEB ; TANGUT COMPONENT-876 +18DEC ; TANGUT COMPONENT-877 +18DED ; TANGUT COMPONENT-878 +18DEE ; TANGUT COMPONENT-879 +18DEF ; TANGUT COMPONENT-880 +18DF0 ; TANGUT COMPONENT-881 +18DF1 ; TANGUT COMPONENT-882 +18DF2 ; TANGUT COMPONENT-883 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 1AFF2 ; KATAKANA LETTER MINNAN TONE-4 @@ -45633,6 +45746,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155294 +# Total code points: 155407 # EOF From 8e9858386bb883bce89d914aeefcb123731a5e6b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 19:08:25 +0100 Subject: [PATCH 33/38] 10 more legacy computing symbols (#581) * UnicodeData.txt from L2/23-252 * LB=AL * Scripts.txt * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 +++++-- .../data/ucd/dev/DerivedCoreProperties.txt | 7 +++++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++++- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++++- unicodetools/data/ucd/dev/Scripts.txt | 7 +++++-- unicodetools/data/ucd/dev/UnicodeData.txt | 10 ++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 5 ++++- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 11 +++++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 7 +++++-- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 7 +++++-- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 16 ++++++++++------ .../data/ucd/dev/extracted/DerivedLineBreak.txt | 11 +++++++---- .../data/ucd/dev/extracted/DerivedName.txt | 14 ++++++++++++-- 13 files changed, 83 insertions(+), 29 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index bd27220a9..3f8881bd2 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 16:13:40 GMT +# Date: 2024-11-15, 16:53:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2095,7 +2095,10 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 +1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL +1CEBA..1CEBF ; 17.0 # [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 409 +# Total code points: 419 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 599f5faa7..4962e586b 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 16:14:04 GMT +# Date: 2024-11-15, 16:54:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12760,7 +12760,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1BC9F ; Grapheme_Base # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1CC00..1CCEF ; Grapheme_Base # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; Grapheme_Base # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; Grapheme_Base # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; Grapheme_Base # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -12935,6 +12937,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1FB00..1FB92 ; Grapheme_Base # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; Grapheme_Base # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; Grapheme_Base # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; Grapheme_Base # So ALARM BELL SYMBOL 20000..2A6DF ; Grapheme_Base # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Grapheme_Base # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -12945,7 +12948,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153101 +# Total code points: 153111 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a28270e6f..d45456764 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 16:14:10 GMT +# Date: 2024-11-15, 16:54:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2410,7 +2410,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; N # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; N # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2683,6 +2685,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; N # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; N # So ALARM BELL SYMBOL 20000..2A6DF ; W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A6E0..2A6FF ; W # Cn [32] .. 2A700..2B739 ; W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 493a74d9d..602884f88 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 16:14:11 GMT +# Date: 2024-11-15, 16:54:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3321,7 +3321,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1BCA0..1BCA3 ; CM # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; AL # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; NU # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; AL # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF00..1CF2D ; CM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; CM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -3667,6 +3669,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; AL # So ALARM BELL SYMBOL 1FC00..1FFFD ; ID # Cn [1022] .. 20000..2A6DF ; ID # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A6E0..2A6FF ; ID # Cn [32] .. diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index e9c090a04..2fecdb546 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 16:14:42 GMT +# Date: 2024-11-15, 16:55:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -523,7 +523,9 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; Common # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; Common # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; Common # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Common # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; Common # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -629,10 +631,11 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; Common # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; Common # So ALARM BELL SYMBOL E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9054 +# Total code points: 9064 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 289045350..176d80912 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -33008,6 +33008,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1CCF7;OUTLINED DIGIT SEVEN;Nd;0;EN; 0037;7;7;7;N;;;;; 1CCF8;OUTLINED DIGIT EIGHT;Nd;0;EN; 0038;8;8;8;N;;;;; 1CCF9;OUTLINED DIGIT NINE;Nd;0;EN; 0039;9;9;9;N;;;;; +1CCFA;SNAKE SYMBOL;So;0;ON;;;;;N;;;;; +1CCFB;FLYING SAUCER SYMBOL;So;0;ON;;;;;N;;;;; +1CCFC;NOSE SYMBOL;So;0;ON;;;;;N;;;;; 1CD00;BLOCK OCTANT-3;So;0;ON;;;;;N;;;;; 1CD01;BLOCK OCTANT-23;So;0;ON;;;;;N;;;;; 1CD02;BLOCK OCTANT-123;So;0;ON;;;;;N;;;;; @@ -33444,6 +33447,12 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1CEB1;KEYHOLE;So;0;ON;;;;;N;;;;; 1CEB2;OLD PERSONAL COMPUTER WITH MONITOR IN PORTRAIT ORIENTATION;So;0;ON;;;;;N;;;;; 1CEB3;BLACK RIGHT TRIANGLE CARET;So;0;ON;;;;;N;;;;; +1CEBA;FRAGILE SYMBOL;So;0;ON;;;;;N;;;;; +1CEBB;OFFICE BUILDING SYMBOL;So;0;ON;;;;;N;;;;; +1CEBC;TREE SYMBOL;So;0;ON;;;;;N;;;;; +1CEBD;APPLE SYMBOL;So;0;ON;;;;;N;;;;; +1CEBE;CHERRY SYMBOL;So;0;ON;;;;;N;;;;; +1CEBF;STRAWBERRY SYMBOL;So;0;ON;;;;;N;;;;; 1CF00;ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF01;ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF02;ZNAMENNY COMBINING MARK TSATA ON LEFT;Mn;0;NSM;;;;;N;;;;; @@ -39594,6 +39603,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FBF7;SEGMENTED DIGIT SEVEN;Nd;0;EN; 0037;7;7;7;N;;;;; 1FBF8;SEGMENTED DIGIT EIGHT;Nd;0;EN; 0038;8;8;8;N;;;;; 1FBF9;SEGMENTED DIGIT NINE;Nd;0;EN; 0039;9;9;9;N;;;;; +1FBFA;ALARM BELL SYMBOL;So;0;ON;;;;;N;;;;; 20000;;Lo;0;L;;;;;N;;;;; 2A6DF;;Lo;0;L;;;;;N;;;;; 2A700;;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 5d0f5a803..74935bc34 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 16:14:45 GMT +# Date: 2024-11-15, 16:55:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2254,7 +2254,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1BCA0..1BCA3 ; R # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; R # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; R # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; R # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; R # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; R # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF00..1CF2D ; U # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF2E..1CF2F ; U # Cn [2] .. 1CF30..1CF46 ; U # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG @@ -2503,6 +2505,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1FB00..1FB92 ; R # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; R # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; R # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; R # So ALARM BELL SYMBOL 20000..2A6DF ; U # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A6E0..2A6FF ; U # Cn [32] .. 2A700..2B739 ; U # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a8e5d8077..955d42660 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 16:14:01 GMT +# Date: 2024-11-15, 16:54:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1227,8 +1227,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815011 code points not listed here. -# Total code points: 1095476 +# The above property value applies to 815001 code points not listed here. +# Total code points: 1095466 # ================================================ @@ -1959,7 +1959,9 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA 16FE2 ; ON # Po OLD CHINESE HOOK MARK 1CC00..1CCD5 ; ON # So [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT +1CCFA..1CCFC ; ON # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; ON # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA @@ -2013,8 +2015,9 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FAF0..1FAF8 ; ON # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE +1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6786 +# Total code points: 6796 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c7460e1f2..1bc1aa08a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 16:14:03 GMT +# Date: 2024-11-15, 16:54:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1883,7 +1883,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1BCA0..1BCA3 ; 0 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; 0 # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; 0 # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; 0 # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; 0 # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; 0 # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2067,6 +2069,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; 0 # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; 0 # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; 0 # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; 0 # So ALARM BELL SYMBOL 20000..2A6DF ; 0 # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; 0 # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; 0 # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -2082,7 +2085,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821172 code points not listed here. +# The above property value applies to 821162 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index f278e89de..e7cedf4a4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 16:14:06 GMT +# Date: 2024-11-15, 16:54:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1908,7 +1908,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1CC00..1CCEF ; N # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CCFA..1CCFC ; N # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2124,10 +2126,11 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; N # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBF0..1FBF9 ; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +1FBFA ; N # So ALARM BELL SYMBOL E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760690 code points not listed here. +# The above property value applies to 760680 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index c3d65f8ab..a14726cda 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 16:14:06 GMT +# Date: 2024-11-15, 16:54:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -597,8 +597,9 @@ FFFE..FFFF ; Cn # [2] .. 1BC89..1BC8F ; Cn # [7] .. 1BC9A..1BC9B ; Cn # [2] .. 1BCA4..1CBFF ; Cn # [3932] .. -1CCFA..1CCFF ; Cn # [6] .. -1CEB4..1CEFF ; Cn # [76] .. +1CCFD..1CCFF ; Cn # [3] .. +1CEB4..1CEB9 ; Cn # [6] .. +1CEC0..1CEFF ; Cn # [64] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -732,7 +733,7 @@ FFFE..FFFF ; Cn # [2] .. 1FAEA..1FAEF ; Cn # [6] .. 1FAF9..1FAFF ; Cn # [7] .. 1FB93 ; Cn # -1FBFA..1FFFF ; Cn # [1030] .. +1FBFB..1FFFF ; Cn # [1029] .. 2A6E0..2A6FF ; Cn # [32] .. 2B73A..2B73F ; Cn # [6] .. 2B81E..2B81F ; Cn # [2] .. @@ -748,7 +749,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819124 +# Total code points: 819114 # ================================================ @@ -4257,7 +4258,9 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16B45 ; So # PAHAWH HMONG SIGN CIM TSOV ROG 1BC9C ; So # DUPLOYAN SIGN O WITH CROSS 1CC00..1CCEF ; So # [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CCFA..1CCFC ; So # [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; So # [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -4314,8 +4317,9 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FAF0..1FAF8 ; So # [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE +1FBFA ; So # ALARM BELL SYMBOL -# Total code points: 7410 +# Total code points: 7420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 49703d6fa..1bc76d4d3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 16:14:08 GMT +# Date: 2024-11-15, 16:54:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757244 code points not listed here. -# Total code points: 894712 +# The above property value applies to 757234 code points not listed here. +# Total code points: 894702 # ================================================ @@ -1469,7 +1469,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1BC90..1BC99 ; AL # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9C ; AL # So DUPLOYAN SIGN O WITH CROSS 1CC00..1CCEF ; AL # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CCFA..1CCFC ; AL # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1626,8 +1628,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE +1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26876 +# Total code points: 26886 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 0fac2108c..ca9beb9ea 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 16:14:08 GMT +# Date: 2024-11-15, 16:54:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -38813,6 +38813,9 @@ FFFD ; REPLACEMENT CHARACTER 1CCF7 ; OUTLINED DIGIT SEVEN 1CCF8 ; OUTLINED DIGIT EIGHT 1CCF9 ; OUTLINED DIGIT NINE +1CCFA ; SNAKE SYMBOL +1CCFB ; FLYING SAUCER SYMBOL +1CCFC ; NOSE SYMBOL 1CD00 ; BLOCK OCTANT-3 1CD01 ; BLOCK OCTANT-23 1CD02 ; BLOCK OCTANT-123 @@ -39249,6 +39252,12 @@ FFFD ; REPLACEMENT CHARACTER 1CEB1 ; KEYHOLE 1CEB2 ; OLD PERSONAL COMPUTER WITH MONITOR IN PORTRAIT ORIENTATION 1CEB3 ; BLACK RIGHT TRIANGLE CARET +1CEBA ; FRAGILE SYMBOL +1CEBB ; OFFICE BUILDING SYMBOL +1CEBC ; TREE SYMBOL +1CEBD ; APPLE SYMBOL +1CEBE ; CHERRY SYMBOL +1CEBF ; STRAWBERRY SYMBOL 1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT 1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT 1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT @@ -45399,6 +45408,7 @@ FFFD ; REPLACEMENT CHARACTER 1FBF7 ; SEGMENTED DIGIT SEVEN 1FBF8 ; SEGMENTED DIGIT EIGHT 1FBF9 ; SEGMENTED DIGIT NINE +1FBFA ; ALARM BELL SYMBOL 20000..2A6DF ; CJK UNIFIED IDEOGRAPH-* 2A700..2B739 ; CJK UNIFIED IDEOGRAPH-* 2B740..2B81D ; CJK UNIFIED IDEOGRAPH-* @@ -45746,6 +45756,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155407 +# Total code points: 155417 # EOF From d2160f51f6fbd455b5b54efe16b5f0c6ef475447 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 16 Nov 2024 01:50:34 +0100 Subject: [PATCH 34/38] 21 historical asteroid symbols (#590) * UnicodeData.txt from L2/23-207 * LB is a mess around here, but this should probably be AL. * Scripts.txt * New block * Regenerate UCD * GenerateEnums * Miscellaneous symbols supplement to vo=U * Regenerate UCD * comment * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 7 +++--- .../data/ucd/dev/DerivedCoreProperties.txt | 9 +++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++-- .../data/ucd/dev/PropertyValueAliases.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 9 +++---- unicodetools/data/ucd/dev/UnicodeData.txt | 21 ++++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 9 ++++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 13 +++++----- .../dev/extracted/DerivedCombiningClass.txt | 9 +++---- .../dev/extracted/DerivedEastAsianWidth.txt | 9 +++---- .../dev/extracted/DerivedGeneralCategory.txt | 14 +++++------ .../ucd/dev/extracted/DerivedLineBreak.txt | 15 +++++------ .../data/ucd/dev/extracted/DerivedName.txt | 25 +++++++++++++++++-- .../org/unicode/props/UcdPropertyValues.java | 1 + .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 1 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 18 files changed, 101 insertions(+), 57 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index ff1048e3e..bfd6a6e04 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -324,6 +324,7 @@ FFF0..FFFF; Specials 1BC00..1BC9F; Duployan 1BCA0..1BCAF; Shorthand Format Controls 1CC00..1CEBF; Symbols for Legacy Computing Supplement +1CEC0..1CEFF; Miscellaneous Symbols Supplement 1CF00..1CFCF; Znamenny Musical Notation 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 3f8881bd2..6aee37058 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 16:53:58 GMT +# Date: 2024-11-15, 18:26:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2096,9 +2096,10 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL -1CEBA..1CEBF ; 17.0 # [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA +1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 419 +# Total code points: 440 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 4962e586b..02f5b3d3a 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 16:54:24 GMT +# Date: 2024-11-15, 18:26:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12762,7 +12762,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1CCF0..1CCF9 ; Grapheme_Base # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CCFA..1CCFC ; Grapheme_Base # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; Grapheme_Base # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; Grapheme_Base # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -12915,8 +12915,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F400..1F6D7 ; Grapheme_Base # So [728] RAT..ELEVATOR 1F6DC..1F6EC ; Grapheme_Base # So [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; Grapheme_Base # So [13] SATELLITE..ROLLER SKATE -1F700..1F776 ; Grapheme_Base # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; Grapheme_Base # So [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; Grapheme_Base # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; Grapheme_Base # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; Grapheme_Base # So HEAVY EQUALS SIGN 1F800..1F80B ; Grapheme_Base # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -12948,7 +12947,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153111 +# Total code points: 153132 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index d45456764..212616cb8 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 16:54:31 GMT +# Date: 2024-11-15, 18:26:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2413,6 +2413,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1CCFA..1CCFC ; N # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEC0..1CED0 ; N # So [17] HEBE..LEUKOTHEA 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2656,8 +2657,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1F6EB..1F6EC ; W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING 1F6F0..1F6F3 ; N # So [4] SATELLITE..PASSENGER SHIP 1F6F4..1F6FC ; W # So [9] SCOOTER..ROLLER SKATE -1F700..1F776 ; N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F77F ; N # So [5] HAUMEA..ORCUS +1F700..1F77F ; N # So [128] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ORCUS 1F780..1F7D9 ; N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; W # So HEAVY EQUALS SIGN diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 602884f88..c8b79a161 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 16:54:32 GMT +# Date: 2024-11-15, 18:27:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3324,6 +3324,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1CCFA..1CCFC ; AL # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEC0..1CED0 ; AL # So [17] HEBE..LEUKOTHEA 1CF00..1CF2D ; CM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; CM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -3607,7 +3608,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F6FD..1F6FF ; ID # Cn [3] .. 1F700..1F773 ; AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE 1F774..1F776 ; ID # So [3] LOT OF FORTUNE..LUNAR ECLIPSE -1F777..1F77A ; ID # Cn [4] .. +1F777..1F77A ; AL # So [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1F77B..1F77F ; ID # So [5] HAUMEA..ORCUS 1F780..1F7D4 ; AL # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR 1F7D5..1F7D9 ; ID # So [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 8e291eb46..8f082b5ee 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-15, 16:14:26 GMT +# Date: 2024-11-15, 18:27:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -363,6 +363,7 @@ blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Symbols_Sup ; Miscellaneous_Symbols_Supplement blk; Misc_Technical ; Miscellaneous_Technical blk; Modi ; Modi blk; Modifier_Letters ; Spacing_Modifier_Letters diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 2fecdb546..6aab3fe08 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 16:55:03 GMT +# Date: 2024-11-15, 18:27:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -525,7 +525,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1CCF0..1CCF9 ; Common # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CCFA..1CCFC ; Common # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Common # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; Common # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; Common # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -609,8 +609,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR 1F6DC..1F6EC ; Common # So [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE -1F700..1F776 ; Common # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; Common # So [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; Common # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; Common # So HEAVY EQUALS SIGN 1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -635,7 +634,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9064 +# Total code points: 9085 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 176d80912..2eeab0d76 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -33453,6 +33453,23 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1CEBD;APPLE SYMBOL;So;0;ON;;;;;N;;;;; 1CEBE;CHERRY SYMBOL;So;0;ON;;;;;N;;;;; 1CEBF;STRAWBERRY SYMBOL;So;0;ON;;;;;N;;;;; +1CEC0;HEBE;So;0;ON;;;;;N;;;;; +1CEC1;IRIS;So;0;ON;;;;;N;;;;; +1CEC2;FLORA;So;0;ON;;;;;N;;;;; +1CEC3;METIS;So;0;ON;;;;;N;;;;; +1CEC4;PARTHENOPE;So;0;ON;;;;;N;;;;; +1CEC5;VICTORIA;So;0;ON;;;;;N;;;;; +1CEC6;EGERIA;So;0;ON;;;;;N;;;;; +1CEC7;IRENE;So;0;ON;;;;;N;;;;; +1CEC8;EUNOMIA;So;0;ON;;;;;N;;;;; +1CEC9;PSYCHE;So;0;ON;;;;;N;;;;; +1CECA;THETIS;So;0;ON;;;;;N;;;;; +1CECB;MELPOMENE;So;0;ON;;;;;N;;;;; +1CECC;FORTUNA;So;0;ON;;;;;N;;;;; +1CECD;ASTRONOMICAL SYMBOL FOR ASTEROID PROSERPINA;So;0;ON;;;;;N;;;;; +1CECE;BELLONA;So;0;ON;;;;;N;;;;; +1CECF;AMPHITRITE;So;0;ON;;;;;N;;;;; +1CED0;LEUKOTHEA;So;0;ON;;;;;N;;;;; 1CF00;ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF01;ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF02;ZNAMENNY COMBINING MARK TSATA ON LEFT;Mn;0;NSM;;;;;N;;;;; @@ -38616,6 +38633,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F774;LOT OF FORTUNE;So;0;ON;;;;;N;;;;; 1F775;OCCULTATION;So;0;ON;;;;;N;;;;; 1F776;LUNAR ECLIPSE;So;0;ON;;;;;N;;;;; +1F777;VESTA FORM TWO;So;0;ON;;;;;N;;;;; +1F778;ASTRAEA FORM TWO;So;0;ON;;;;;N;;;;; +1F779;HYGIEA FORM TWO;So;0;ON;;;;;N;;;;; +1F77A;PARTHENOPE FORM TWO;So;0;ON;;;;;N;;;;; 1F77B;HAUMEA;So;0;ON;;;;;N;;;;; 1F77C;MAKEMAKE;So;0;ON;;;;;N;;;;; 1F77D;GONGGONG;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 74935bc34..dd6fbc1c1 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 16:55:06 GMT +# Date: 2024-11-15, 18:27:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -51,6 +51,7 @@ # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF # Musical Symbols: U+1CF00..U+1CFCF +# Miscellaneous Symbols Supplement: U+1CEC0..U+1CEFF # Musical Symbols: U+1D000..U+1D1FF # Mayan Numerals: U+1D2E0..U+1D2FF # Symbols & Rods: U+1D300..U+1D37F @@ -2257,6 +2258,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1CCFA..1CCFC ; R # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; R # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; R # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEC0..1CED0 ; U # So [17] HEBE..LEUKOTHEA +1CED1..1CEFF ; U # Cn [47] .. 1CF00..1CF2D ; U # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF2E..1CF2F ; U # Cn [2] .. 1CF30..1CF46 ; U # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG @@ -2469,9 +2472,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1F6ED..1F6EF ; U # Cn [3] .. 1F6F0..1F6FC ; U # So [13] SATELLITE..ROLLER SKATE 1F6FD..1F6FF ; U # Cn [3] .. -1F700..1F776 ; U # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F777..1F77A ; U # Cn [4] .. -1F77B..1F77F ; U # So [5] HAUMEA..ORCUS +1F700..1F77F ; U # So [128] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ORCUS 1F780..1F7D9 ; U # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR 1F7DA..1F7DF ; U # Cn [6] .. 1F7E0..1F7EB ; U # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 955d42660..503947ace 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 16:54:22 GMT +# Date: 2024-11-15, 18:26:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1227,8 +1227,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815001 code points not listed here. -# Total code points: 1095466 +# The above property value applies to 814980 code points not listed here. +# Total code points: 1095445 # ================================================ @@ -1961,7 +1961,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1CC00..1CCD5 ; ON # So [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT 1CCFA..1CCFC ; ON # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; ON # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; ON # So [23] FRAGILE SYMBOL..LEUKOTHEA 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA @@ -1994,8 +1994,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F400..1F6D7 ; ON # So [728] RAT..ELEVATOR 1F6DC..1F6EC ; ON # So [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE -1F700..1F776 ; ON # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; ON # So [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; ON # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; ON # So HEAVY EQUALS SIGN 1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -2017,7 +2016,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6796 +# Total code points: 6817 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 1bc1aa08a..c9dc66d89 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 16:54:23 GMT +# Date: 2024-11-15, 18:26:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1885,7 +1885,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1CCF0..1CCF9 ; 0 # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CCFA..1CCFC ; 0 # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; 0 # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; 0 # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; 0 # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2047,8 +2047,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F400..1F6D7 ; 0 # So [728] RAT..ELEVATOR 1F6DC..1F6EC ; 0 # So [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; 0 # So [13] SATELLITE..ROLLER SKATE -1F700..1F776 ; 0 # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; 0 # So [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; 0 # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; 0 # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; 0 # So HEAVY EQUALS SIGN 1F800..1F80B ; 0 # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -2085,7 +2084,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821162 code points not listed here. +# The above property value applies to 821141 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index e7cedf4a4..32ec0f89c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 16:54:26 GMT +# Date: 2024-11-15, 18:26:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1910,7 +1910,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1CCF0..1CCF9 ; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CCFA..1CCFC ; N # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; N # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2109,8 +2109,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F6D3..1F6D4 ; N # So [2] STUPA..PAGODA 1F6E0..1F6EA ; N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE 1F6F0..1F6F3 ; N # So [4] SATELLITE..PASSENGER SHIP -1F700..1F776 ; N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; N # So [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; N # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F800..1F80B ; N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD 1F810..1F847 ; N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW 1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW @@ -2130,7 +2129,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760680 code points not listed here. +# The above property value applies to 760659 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index a14726cda..1493ad97a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 16:54:26 GMT +# Date: 2024-11-15, 18:26:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -599,7 +599,7 @@ FFFE..FFFF ; Cn # [2] .. 1BCA4..1CBFF ; Cn # [3932] .. 1CCFD..1CCFF ; Cn # [3] .. 1CEB4..1CEB9 ; Cn # [6] .. -1CEC0..1CEFF ; Cn # [64] .. +1CED1..1CEFF ; Cn # [47] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -713,7 +713,6 @@ FFFE..FFFF ; Cn # [2] .. 1F6D8..1F6DB ; Cn # [4] .. 1F6ED..1F6EF ; Cn # [3] .. 1F6FD..1F6FF ; Cn # [3] .. -1F777..1F77A ; Cn # [4] .. 1F7DA..1F7DF ; Cn # [6] .. 1F7EC..1F7EF ; Cn # [4] .. 1F7F1..1F7FF ; Cn # [15] .. @@ -749,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819114 +# Total code points: 819093 # ================================================ @@ -4260,7 +4259,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1CC00..1CCEF ; So # [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCFA..1CCFC ; So # [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; So # [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; So # [23] FRAGILE SYMBOL..LEUKOTHEA 1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -4296,8 +4295,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F400..1F6D7 ; So # [728] RAT..ELEVATOR 1F6DC..1F6EC ; So # [17] WIRELESS..AIRPLANE ARRIVING 1F6F0..1F6FC ; So # [13] SATELLITE..ROLLER SKATE -1F700..1F776 ; So # [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F7D9 ; So # [95] HAUMEA..NINE POINTED WHITE STAR +1F700..1F7D9 ; So # [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; So # [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; So # HEAVY EQUALS SIGN 1F800..1F80B ; So # [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -4319,7 +4317,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; So # ALARM BELL SYMBOL -# Total code points: 7420 +# Total code points: 7441 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 1bc76d4d3..51bf03908 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 16:54:28 GMT +# Date: 2024-11-15, 18:26:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757234 code points not listed here. -# Total code points: 894702 +# The above property value applies to 757217 code points not listed here. +# Total code points: 894685 # ================================================ @@ -1471,7 +1471,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1CC00..1CCEF ; AL # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z 1CCFA..1CCFC ; AL # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL +1CEBA..1CED0 ; AL # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1616,6 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F650..1F675 ; AL # So [38] NORTH WEST POINTING LEAF..SWASH AMPERSAND ORNAMENT 1F67C..1F67F ; AL # So [4] VERY HEAVY SOLIDUS..REVERSE CHECKER BOARD 1F700..1F773 ; AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F777..1F77A ; AL # So [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1F780..1F7D4 ; AL # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR 1F800..1F80B ; AL # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD 1F810..1F847 ; AL # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW @@ -1630,7 +1631,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26886 +# Total code points: 26907 # ================================================ @@ -1870,8 +1871,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61865 code points not listed here. -# Total code points: 172569 +# The above property value applies to 61861 code points not listed here. +# Total code points: 172565 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index ca9beb9ea..44ca333f1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 16:54:28 GMT +# Date: 2024-11-15, 18:26:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -39258,6 +39258,23 @@ FFFD ; REPLACEMENT CHARACTER 1CEBD ; APPLE SYMBOL 1CEBE ; CHERRY SYMBOL 1CEBF ; STRAWBERRY SYMBOL +1CEC0 ; HEBE +1CEC1 ; IRIS +1CEC2 ; FLORA +1CEC3 ; METIS +1CEC4 ; PARTHENOPE +1CEC5 ; VICTORIA +1CEC6 ; EGERIA +1CEC7 ; IRENE +1CEC8 ; EUNOMIA +1CEC9 ; PSYCHE +1CECA ; THETIS +1CECB ; MELPOMENE +1CECC ; FORTUNA +1CECD ; ASTRONOMICAL SYMBOL FOR ASTEROID PROSERPINA +1CECE ; BELLONA +1CECF ; AMPHITRITE +1CED0 ; LEUKOTHEA 1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT 1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT 1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT @@ -44421,6 +44438,10 @@ FFFD ; REPLACEMENT CHARACTER 1F774 ; LOT OF FORTUNE 1F775 ; OCCULTATION 1F776 ; LUNAR ECLIPSE +1F777 ; VESTA FORM TWO +1F778 ; ASTRAEA FORM TWO +1F779 ; HYGIEA FORM TWO +1F77A ; PARTHENOPE FORM TWO 1F77B ; HAUMEA 1F77C ; MAKEMAKE 1F77D ; GONGGONG @@ -45756,6 +45777,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155417 +# Total code points: 155438 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 4ec9928b4..687558562 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -388,6 +388,7 @@ public enum Block_Values implements Named { Miscellaneous_Mathematical_Symbols_B("Misc_Math_Symbols_B"), Miscellaneous_Symbols_And_Pictographs("Misc_Pictographs"), Miscellaneous_Symbols("Misc_Symbols"), + Miscellaneous_Symbols_Supplement("Misc_Symbols_Sup"), Miscellaneous_Technical("Misc_Technical"), Modi("Modi"), Spacing_Modifier_Letters("Modifier_Letters"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 9ba202a24..8e13a3f01 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -878,6 +878,7 @@ Format: kenFile skipValue=Rotated # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF # Musical Symbols: U+1CF00..U+1CFCF +# Miscellaneous Symbols Supplement: U+1CEC0..U+1CEFF # Musical Symbols: U+1D000..U+1D1FF # Mayan Numerals: U+1D2E0..U+1D2FF # Symbols & Rods: U+1D300..U+1D37F diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 8d596b081..97e71dbe0 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -207,6 +207,7 @@ Miao ; Miao Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B Misc_Symbols ; Miscellaneous_Symbols +Misc_Symbols_Sup ; Miscellaneous_Symbols_Supplement Misc_Arrows ; Miscellaneous_Symbols_And_Arrows Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs Misc_Technical ; Miscellaneous_Technical From b9dd70216b5cd80a340b65cfd28a167c33c5f8e4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 16 Nov 2024 02:35:32 +0100 Subject: [PATCH 35/38] 16 geomantic figures (#591) * UnicodeData.txt from L2/23-218 * LB=AL like the hexagrams? * Scripts.txt * New block * Regenerate UCD * GenerateEnums * comment * Miscellaneous symbols supplement to vo=U * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 5 +++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- .../data/ucd/dev/PropertyValueAliases.txt | 2 +- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 16 +++++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 6 ++++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 20 +++++++++++++++++-- 14 files changed, 74 insertions(+), 29 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 6aee37058..be4d70f2c 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 18:26:29 GMT +# Date: 2024-11-16, 00:52:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2097,9 +2097,10 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL 1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; 17.0 # [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 440 +# Total code points: 456 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 02f5b3d3a..b8a412035 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 18:26:53 GMT +# Date: 2024-11-16, 00:53:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12763,6 +12763,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1CCFA..1CCFC ; Grapheme_Base # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; Grapheme_Base # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; Grapheme_Base # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -12947,7 +12948,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153132 +# Total code points: 153148 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 212616cb8..e7e9a8adc 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 18:26:59 GMT +# Date: 2024-11-16, 00:53:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2414,6 +2414,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CEC0..1CED0 ; N # So [17] HEBE..LEUKOTHEA +1CEE0..1CEEF ; N # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index c8b79a161..27118b713 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 18:27:00 GMT +# Date: 2024-11-16, 00:53:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3325,6 +3325,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CEC0..1CED0 ; AL # So [17] HEBE..LEUKOTHEA +1CEE0..1CEEF ; AL # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF00..1CF2D ; CM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; CM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 8f082b5ee..6c4a70472 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-15, 18:27:15 GMT +# Date: 2024-11-16, 00:53:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 6aab3fe08..7d9ceea69 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 18:27:31 GMT +# Date: 2024-11-16, 00:53:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -526,6 +526,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1CCFA..1CCFC ; Common # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; Common # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; Common # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; Common # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -634,7 +635,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9085 +# Total code points: 9101 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 2eeab0d76..ae33abf44 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -33470,6 +33470,22 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1CECE;BELLONA;So;0;ON;;;;;N;;;;; 1CECF;AMPHITRITE;So;0;ON;;;;;N;;;;; 1CED0;LEUKOTHEA;So;0;ON;;;;;N;;;;; +1CEE0;GEOMANTIC FIGURE POPULUS;So;0;ON;;;;;N;;;;; +1CEE1;GEOMANTIC FIGURE TRISTITIA;So;0;ON;;;;;N;;;;; +1CEE2;GEOMANTIC FIGURE ALBUS;So;0;ON;;;;;N;;;;; +1CEE3;GEOMANTIC FIGURE FORTUNA MAJOR;So;0;ON;;;;;N;;;;; +1CEE4;GEOMANTIC FIGURE RUBEUS;So;0;ON;;;;;N;;;;; +1CEE5;GEOMANTIC FIGURE ACQUISITIO;So;0;ON;;;;;N;;;;; +1CEE6;GEOMANTIC FIGURE CONJUNCTIO;So;0;ON;;;;;N;;;;; +1CEE7;GEOMANTIC FIGURE CAPUT DRACONIS;So;0;ON;;;;;N;;;;; +1CEE8;GEOMANTIC FIGURE LAETITIA;So;0;ON;;;;;N;;;;; +1CEE9;GEOMANTIC FIGURE CARCER;So;0;ON;;;;;N;;;;; +1CEEA;GEOMANTIC FIGURE AMISSIO;So;0;ON;;;;;N;;;;; +1CEEB;GEOMANTIC FIGURE PUELLA;So;0;ON;;;;;N;;;;; +1CEEC;GEOMANTIC FIGURE FORTUNA MINOR;So;0;ON;;;;;N;;;;; +1CEED;GEOMANTIC FIGURE PUER;So;0;ON;;;;;N;;;;; +1CEEE;GEOMANTIC FIGURE CAUDA DRACONIS;So;0;ON;;;;;N;;;;; +1CEEF;GEOMANTIC FIGURE VIA;So;0;ON;;;;;N;;;;; 1CF00;ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF01;ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF02;ZNAMENNY COMBINING MARK TSATA ON LEFT;Mn;0;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index dd6fbc1c1..dbe1f9d12 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 18:27:34 GMT +# Date: 2024-11-16, 00:54:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2259,7 +2259,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1CD00..1CEB3 ; R # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CEBF ; R # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CEC0..1CED0 ; U # So [17] HEBE..LEUKOTHEA -1CED1..1CEFF ; U # Cn [47] .. +1CED1..1CEDF ; U # Cn [15] .. +1CEE0..1CEEF ; U # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0..1CEFF ; U # Cn [16] .. 1CF00..1CF2D ; U # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF2E..1CF2F ; U # Cn [2] .. 1CF30..1CF46 ; U # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 503947ace..1b61723a7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 18:26:50 GMT +# Date: 2024-11-16, 00:53:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1227,8 +1227,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 814980 code points not listed here. -# Total code points: 1095445 +# The above property value applies to 814964 code points not listed here. +# Total code points: 1095429 # ================================================ @@ -1962,6 +1962,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1CCFA..1CCFC ; ON # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; ON # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; ON # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA @@ -2016,7 +2017,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6817 +# Total code points: 6833 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c9dc66d89..8eece77ce 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 18:26:52 GMT +# Date: 2024-11-16, 00:53:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1886,6 +1886,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1CCFA..1CCFC ; 0 # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; 0 # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; 0 # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; 0 # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2084,7 +2085,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821141 code points not listed here. +# The above property value applies to 821125 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 32ec0f89c..17eaf7b85 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 18:26:55 GMT +# Date: 2024-11-16, 00:53:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1911,6 +1911,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1CCFA..1CCFC ; N # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; N # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; N # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2129,7 +2130,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760659 code points not listed here. +# The above property value applies to 760643 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 1493ad97a..929366593 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 18:26:55 GMT +# Date: 2024-11-16, 00:53:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -599,7 +599,8 @@ FFFE..FFFF ; Cn # [2] .. 1BCA4..1CBFF ; Cn # [3932] .. 1CCFD..1CCFF ; Cn # [3] .. 1CEB4..1CEB9 ; Cn # [6] .. -1CED1..1CEFF ; Cn # [47] .. +1CED1..1CEDF ; Cn # [15] .. +1CEF0..1CEFF ; Cn # [16] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -748,7 +749,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819093 +# Total code points: 819077 # ================================================ @@ -4260,6 +4261,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1CCFA..1CCFC ; So # [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; So # [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; So # [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -4317,7 +4319,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; So # ALARM BELL SYMBOL -# Total code points: 7441 +# Total code points: 7457 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 51bf03908..ae8e56914 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 18:26:57 GMT +# Date: 2024-11-16, 00:53:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757217 code points not listed here. -# Total code points: 894685 +# The above property value applies to 757201 code points not listed here. +# Total code points: 894669 # ================================================ @@ -1472,6 +1472,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1CCFA..1CCFC ; AL # So [3] SNAKE SYMBOL..NOSE SYMBOL 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; AL # So [23] FRAGILE SYMBOL..LEUKOTHEA +1CEE0..1CEEF ; AL # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1631,7 +1632,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26907 +# Total code points: 26923 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 44ca333f1..e8d04fd19 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 18:26:57 GMT +# Date: 2024-11-16, 00:53:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -39275,6 +39275,22 @@ FFFD ; REPLACEMENT CHARACTER 1CECE ; BELLONA 1CECF ; AMPHITRITE 1CED0 ; LEUKOTHEA +1CEE0 ; GEOMANTIC FIGURE POPULUS +1CEE1 ; GEOMANTIC FIGURE TRISTITIA +1CEE2 ; GEOMANTIC FIGURE ALBUS +1CEE3 ; GEOMANTIC FIGURE FORTUNA MAJOR +1CEE4 ; GEOMANTIC FIGURE RUBEUS +1CEE5 ; GEOMANTIC FIGURE ACQUISITIO +1CEE6 ; GEOMANTIC FIGURE CONJUNCTIO +1CEE7 ; GEOMANTIC FIGURE CAPUT DRACONIS +1CEE8 ; GEOMANTIC FIGURE LAETITIA +1CEE9 ; GEOMANTIC FIGURE CARCER +1CEEA ; GEOMANTIC FIGURE AMISSIO +1CEEB ; GEOMANTIC FIGURE PUELLA +1CEEC ; GEOMANTIC FIGURE FORTUNA MINOR +1CEED ; GEOMANTIC FIGURE PUER +1CEEE ; GEOMANTIC FIGURE CAUDA DRACONIS +1CEEF ; GEOMANTIC FIGURE VIA 1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT 1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT 1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT @@ -45777,6 +45793,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155438 +# Total code points: 155454 # EOF From 1aaefbf71f82685505980c45b71334ed475372dd Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 16 Nov 2024 02:50:48 +0100 Subject: [PATCH 36/38] Ten Chemical Symbols (#586) * UnicodeData from L2/23-193R2 * LB=AL * Scripts.txt * Regenerate UCD * UTC-178 Correct the name of the provisionally assigned U+1F8D6 LONG RIGHTWARDS ARROW WITH THROUGH X to LONG RIGHTWARDS ARROW THROUGH X. (Ref. L2/24-008) * Regenerate UCD * UTC-177-C47 * Regenerate UCD * 2B96 to 1CEF0 * New block * Regenerate UCD * GenerateEnums --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 ++++--- .../data/ucd/dev/DerivedCoreProperties.txt | 10 +++++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +++- unicodetools/data/ucd/dev/LineBreak.txt | 4 +++- unicodetools/data/ucd/dev/PropList.txt | 2 +- unicodetools/data/ucd/dev/PropertyValueAliases.txt | 2 +- unicodetools/data/ucd/dev/Scripts.txt | 6 ++++-- unicodetools/data/ucd/dev/UnicodeData.txt | 10 ++++++++++ unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 ++++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 6 ++++-- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 6 ++++-- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 13 ++++++++----- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 10 ++++++---- .../data/ucd/dev/extracted/DerivedName.txt | 14 ++++++++++++-- 15 files changed, 77 insertions(+), 33 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index be4d70f2c..da42857db 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-16, 00:52:56 GMT +# Date: 2024-11-16, 01:38:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2097,10 +2097,11 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL 1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA -1CEE0..1CEEF ; 17.0 # [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEE0..1CEF0 ; 17.0 # [17] GEOMANTIC FIGURE POPULUS..MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO +1F8D0..1F8D8 ; 17.0 # [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 456 +# Total code points: 466 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index b8a412035..363e63f38 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-16, 00:53:20 GMT +# Date: 2024-11-16, 01:39:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -178,6 +178,7 @@ FF5E ; Math # Sm FULLWIDTH TILDE FFE2 ; Math # Sm FULLWIDTH NOT SIGN FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 10D8E..10D8F ; Math # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN +1CEF0 ; Math # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1D400..1D454 ; Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -253,8 +254,9 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F8D0..1F8D8 ; Math # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -# Total code points: 2312 +# Total code points: 2322 # ================================================ @@ -12764,6 +12766,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1CD00..1CEB3 ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; Grapheme_Base # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; Grapheme_Base # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; Grapheme_Base # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -12926,6 +12929,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F890..1F8AD ; Grapheme_Base # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; Grapheme_Base # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; Grapheme_Base # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Grapheme_Base # So [13] BALLET SHOES..CRUTCH @@ -12948,7 +12952,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153148 +# Total code points: 153158 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index e7e9a8adc..5928d481b 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 00:53:26 GMT +# Date: 2024-11-16, 01:39:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2415,6 +2415,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1CEBA..1CEBF ; N # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CEC0..1CED0 ; N # So [17] HEBE..LEUKOTHEA 1CEE0..1CEEF ; N # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; N # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2669,6 +2670,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; N # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; N # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; N # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C..1F93A ; W # So [47] PINCHED FINGERS..FENCER 1F93B ; N # So MODERN PENTATHLON diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 27118b713..562d7c557 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-16, 00:53:27 GMT +# Date: 2024-11-16, 01:39:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3326,6 +3326,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1CEBA..1CEBF ; AL # So [6] FRAGILE SYMBOL..STRAWBERRY SYMBOL 1CEC0..1CED0 ; AL # So [17] HEBE..LEUKOTHEA 1CEE0..1CEEF ; AL # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; AL # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF00..1CF2D ; CM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; CM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -3625,6 +3626,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; AL # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; AL # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C ; EB # So PINCHED FINGERS 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 9e45465d3..6b10ed304 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 16:14:24 GMT +# Date: 2024-11-16, 01:39:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 6c4a70472..31fadf9ed 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-16, 00:53:43 GMT +# Date: 2024-11-16, 01:39:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 7d9ceea69..dd3f21d49 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-16, 00:53:59 GMT +# Date: 2024-11-16, 01:40:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -527,6 +527,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1CD00..1CEB3 ; Common # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; Common # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; Common # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; Common # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -620,6 +621,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; Common # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; Common # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; Common # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Common # So [13] BALLET SHOES..CRUTCH @@ -635,7 +637,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9101 +# Total code points: 9111 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ae33abf44..d4b4463da 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -33486,6 +33486,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1CEED;GEOMANTIC FIGURE PUER;So;0;ON;;;;;N;;;;; 1CEEE;GEOMANTIC FIGURE CAUDA DRACONIS;So;0;ON;;;;;N;;;;; 1CEEF;GEOMANTIC FIGURE VIA;So;0;ON;;;;;N;;;;; +1CEF0;MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR;Sm;0;ON;;;;;N;;;;; 1CF00;ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF01;ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF02;ZNAMENNY COMBINING MARK TSATA ON LEFT;Mn;0;NSM;;;;;N;;;;; @@ -38923,6 +38924,15 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F8BB;SOUTH WEST ARROW FROM BAR;So;0;ON;;;;;N;;;;; 1F8C0;LEFTWARDS ARROW FROM DOWNWARDS ARROW;So;0;ON;;;;;N;;;;; 1F8C1;RIGHTWARDS ARROW FROM DOWNWARDS ARROW;So;0;ON;;;;;N;;;;; +1F8D0;LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW;Sm;0;ON;;;;;N;;;;; +1F8D1;LONG RIGHTWARDS HARPOON OVER LONG LEFTWARDS HARPOON;Sm;0;ON;;;;;N;;;;; +1F8D2;LONG RIGHTWARDS HARPOON ABOVE SHORT LEFTWARDS HARPOON;Sm;0;ON;;;;;N;;;;; +1F8D3;SHORT RIGHTWARDS HARPOON ABOVE LONG LEFTWARDS HARPOON;Sm;0;ON;;;;;N;;;;; +1F8D4;LONG LEFTWARDS HARPOON ABOVE SHORT RIGHTWARDS HARPOON;Sm;0;ON;;;;;N;;;;; +1F8D5;SHORT LEFTWARDS HARPOON ABOVE LONG RIGHTWARDS HARPOON;Sm;0;ON;;;;;N;;;;; +1F8D6;LONG RIGHTWARDS ARROW THROUGH X;Sm;0;ON;;;;;N;;;;; +1F8D7;LONG RIGHTWARDS ARROW WITH DOUBLE SLASH;Sm;0;ON;;;;;N;;;;; +1F8D8;LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE;Sm;0;ON;;;;;N;;;;; 1F900;CIRCLED CROSS FORMEE WITH FOUR DOTS;So;0;ON;;;;;N;;;;; 1F901;CIRCLED CROSS FORMEE WITH TWO DOTS;So;0;ON;;;;;N;;;;; 1F902;CIRCLED CROSS FORMEE;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index dbe1f9d12..35e060550 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-16, 00:54:02 GMT +# Date: 2024-11-16, 01:40:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2261,7 +2261,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1CEC0..1CED0 ; U # So [17] HEBE..LEUKOTHEA 1CED1..1CEDF ; U # Cn [15] .. 1CEE0..1CEEF ; U # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA -1CEF0..1CEFF ; U # Cn [16] .. +1CEF0 ; U # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR +1CEF1..1CEFF ; U # Cn [15] .. 1CF00..1CF2D ; U # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF2E..1CF2F ; U # Cn [2] .. 1CF30..1CF46 ; U # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG @@ -2488,6 +2489,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1F890..1F8AD ; R # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; R # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; R # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; R # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F9FF ; U # So [256] CIRCLED CROSS FORMEE WITH FOUR DOTS..NAZAR AMULET 1FA00..1FA53 ; U # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FA54..1FA5F ; U # Cn [12] .. diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 1b61723a7..d0982869f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-16, 00:53:17 GMT +# Date: 2024-11-16, 01:39:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1227,8 +1227,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 814964 code points not listed here. -# Total code points: 1095429 +# The above property value applies to 814954 code points not listed here. +# Total code points: 1095419 # ================================================ @@ -1963,6 +1963,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; ON # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; ON # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; ON # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA @@ -2005,6 +2006,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; ON # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; ON # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH @@ -2017,7 +2019,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6833 +# Total code points: 6843 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 8eece77ce..3ce9cfc7f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-16, 00:53:19 GMT +# Date: 2024-11-16, 01:39:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1887,6 +1887,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1CD00..1CEB3 ; 0 # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; 0 # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; 0 # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; 0 # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2058,6 +2059,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F890..1F8AD ; 0 # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; 0 # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; 0 # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; 0 # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH @@ -2085,7 +2087,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821125 code points not listed here. +# The above property value applies to 821115 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 17eaf7b85..9ab83f5ff 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 00:53:22 GMT +# Date: 2024-11-16, 01:39:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1912,6 +1912,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1CD00..1CEB3 ; N # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; N # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; N # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; N # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2118,6 +2119,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; N # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; N # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; N # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F93B ; N # So MODERN PENTATHLON 1F946 ; N # So RIFLE @@ -2130,7 +2132,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760643 code points not listed here. +# The above property value applies to 760633 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 929366593..c734fcdd8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-16, 00:53:22 GMT +# Date: 2024-11-16, 01:39:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -600,7 +600,7 @@ FFFE..FFFF ; Cn # [2] .. 1CCFD..1CCFF ; Cn # [3] .. 1CEB4..1CEB9 ; Cn # [6] .. 1CED1..1CEDF ; Cn # [15] .. -1CEF0..1CEFF ; Cn # [16] .. +1CEF1..1CEFF ; Cn # [15] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -723,7 +723,8 @@ FFFE..FFFF ; Cn # [2] .. 1F888..1F88F ; Cn # [8] .. 1F8AE..1F8AF ; Cn # [2] .. 1F8BC..1F8BF ; Cn # [4] .. -1F8C2..1F8FF ; Cn # [62] .. +1F8C2..1F8CF ; Cn # [14] .. +1F8D9..1F8FF ; Cn # [39] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. @@ -749,7 +750,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819077 +# Total code points: 819067 # ================================================ @@ -4042,6 +4043,7 @@ FF5E ; Sm # FULLWIDTH TILDE FFE2 ; Sm # FULLWIDTH NOT SIGN FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 10D8E..10D8F ; Sm # [2] GARAY PLUS SIGN..GARAY MINUS SIGN +1CEF0 ; Sm # MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1D6C1 ; Sm # MATHEMATICAL BOLD NABLA 1D6DB ; Sm # MATHEMATICAL BOLD PARTIAL DIFFERENTIAL 1D6FB ; Sm # MATHEMATICAL ITALIC NABLA @@ -4053,8 +4055,9 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 1D7A9 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA 1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F8D0..1F8D8 ; Sm # [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -# Total code points: 950 +# Total code points: 960 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index ae8e56914..dbc0c1e38 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-16, 00:53:24 GMT +# Date: 2024-11-16, 01:39:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757201 code points not listed here. -# Total code points: 894669 +# The above property value applies to 757191 code points not listed here. +# Total code points: 894659 # ================================================ @@ -1473,6 +1473,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CEBA..1CED0 ; AL # So [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEEF ; AL # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA +1CEF0 ; AL # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1626,13 +1627,14 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; AL # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0..1F8D8 ; AL # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26923 +# Total code points: 26933 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index e8d04fd19..535c985b7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-16, 00:53:24 GMT +# Date: 2024-11-16, 01:39:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -39291,6 +39291,7 @@ FFFD ; REPLACEMENT CHARACTER 1CEED ; GEOMANTIC FIGURE PUER 1CEEE ; GEOMANTIC FIGURE CAUDA DRACONIS 1CEEF ; GEOMANTIC FIGURE VIA +1CEF0 ; MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR 1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT 1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT 1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT @@ -44728,6 +44729,15 @@ FFFD ; REPLACEMENT CHARACTER 1F8BB ; SOUTH WEST ARROW FROM BAR 1F8C0 ; LEFTWARDS ARROW FROM DOWNWARDS ARROW 1F8C1 ; RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8D0 ; LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW +1F8D1 ; LONG RIGHTWARDS HARPOON OVER LONG LEFTWARDS HARPOON +1F8D2 ; LONG RIGHTWARDS HARPOON ABOVE SHORT LEFTWARDS HARPOON +1F8D3 ; SHORT RIGHTWARDS HARPOON ABOVE LONG LEFTWARDS HARPOON +1F8D4 ; LONG LEFTWARDS HARPOON ABOVE SHORT RIGHTWARDS HARPOON +1F8D5 ; SHORT LEFTWARDS HARPOON ABOVE LONG RIGHTWARDS HARPOON +1F8D6 ; LONG RIGHTWARDS ARROW THROUGH X +1F8D7 ; LONG RIGHTWARDS ARROW WITH DOUBLE SLASH +1F8D8 ; LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900 ; CIRCLED CROSS FORMEE WITH FOUR DOTS 1F901 ; CIRCLED CROSS FORMEE WITH TWO DOTS 1F902 ; CIRCLED CROSS FORMEE @@ -45793,6 +45803,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155454 +# Total code points: 155464 # EOF From c0e13ddcccd44b22f6802820d5ab3185f5c75a4e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 16 Nov 2024 03:03:44 +0100 Subject: [PATCH 37/38] Tai Yo (#755) * UnicodeData.txt lines from L2/22-289R * LineBreak.txt lines from L2/22-289R * script * ShortBlockNames.txt * Blocks.txt * These signs look alphabetic * Missing semicolons * Regenerate UCD * Fix bad codepoint for PHO * Regenerate UCD * Mind the gap * GenerateEnums * UTC-176-C38 Change the name of provisionally assigned character U+1E6FE TAI YO SYMBOL MEUANG to TAI YO SYMBOL MUEANG. * Regenerate UCD --- unicodetools/data/ucd/dev/Blocks.txt | 1 + unicodetools/data/ucd/dev/DerivedAge.txt | 7 +- .../data/ucd/dev/DerivedCoreProperties.txt | 87 ++++++++++++++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 13 ++- unicodetools/data/ucd/dev/LineBreak.txt | 13 ++- .../data/ucd/dev/NormalizationTest.txt | 12 ++- unicodetools/data/ucd/dev/PropList.txt | 8 +- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- unicodetools/data/ucd/dev/Scripts.txt | 18 +++- unicodetools/data/ucd/dev/UnicodeData.txt | 55 ++++++++++++ .../data/ucd/dev/VerticalOrientation.txt | 13 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 8 +- .../dev/auxiliary/SentenceBreakProperty.txt | 17 +++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 17 +++- .../ucd/dev/extracted/DerivedBidiClass.txt | 19 +++- .../dev/extracted/DerivedCombiningClass.txt | 19 +++- .../dev/extracted/DerivedEastAsianWidth.txt | 15 +++- .../dev/extracted/DerivedGeneralCategory.txt | 26 ++++-- .../ucd/dev/extracted/DerivedJoiningType.txt | 8 +- .../ucd/dev/extracted/DerivedLineBreak.txt | 21 +++-- .../data/ucd/dev/extracted/DerivedName.txt | 59 ++++++++++++- .../org/unicode/props/UcdPropertyValues.java | 2 + .../org/unicode/text/UCD/ShortBlockNames.txt | 1 + 23 files changed, 390 insertions(+), 53 deletions(-) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index bfd6a6e04..11304adfd 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -343,6 +343,7 @@ FFF0..FFFF; Specials 1E2C0..1E2FF; Wancho 1E4D0..1E4FF; Nag Mundari 1E5D0..1E5FF; Ol Onal +1E6C0..1E6FF; Tai Yo 1E7E0..1E7FF; Ethiopic Extended-B 1E800..1E8DF; Mende Kikakui 1E900..1E95F; Adlam diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index da42857db..d5b60d56b 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-16, 01:38:58 GMT +# Date: 2024-11-16, 01:52:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2098,10 +2098,13 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL 1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEF0 ; 17.0 # [17] GEOMANTIC FIGURE POPULUS..MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR +1E6C0..1E6DE ; 17.0 # [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6F5 ; 17.0 # [22] TAI YO LETTER AA..TAI YO SIGN OM +1E6FE..1E6FF ; 17.0 # [2] TAI YO SYMBOL MUEANG..TAI YO XAM LAI 1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1F8D0..1F8D8 ; 17.0 # [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 466 +# Total code points: 521 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 363e63f38..9bd8bb8e3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-16, 01:39:23 GMT +# Date: 2024-11-16, 01:52:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1406,6 +1406,17 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1E4EB ; Alphabetic # Lm NAG MUNDARI SIGN OJOD 1E5D0..1E5ED ; Alphabetic # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; Alphabetic # Lo OL ONAL SIGN HODDOND +1E6C0..1E6DE ; Alphabetic # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; Alphabetic # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; Alphabetic # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; Alphabetic # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; Alphabetic # Mn TAI YO SIGN AU +1E6E7..1E6ED ; Alphabetic # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; Alphabetic # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; Alphabetic # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; Alphabetic # Mn TAI YO SIGN OM +1E6FE ; Alphabetic # Lo TAI YO SYMBOL MUEANG +1E6FF ; Alphabetic # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Alphabetic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1460,7 +1471,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 143083 +# Total code points: 143138 # ================================================ @@ -3530,6 +3541,11 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1E4EB ; Case_Ignorable # Lm NAG MUNDARI SIGN OJOD 1E4EC..1E4EF ; Case_Ignorable # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Case_Ignorable # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Case_Ignorable # Mn TAI YO SIGN UE +1E6E6 ; Case_Ignorable # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Case_Ignorable # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Case_Ignorable # Mn TAI YO SIGN OM +1E6FF ; Case_Ignorable # Lm TAI YO XAM LAI 1E8D0..1E8D6 ; Case_Ignorable # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Case_Ignorable # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1E94B ; Case_Ignorable # Lm ADLAM NASALIZATION MARK @@ -3538,7 +3554,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2792 +# Total code points: 2798 # ================================================ @@ -6971,6 +6987,13 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1E4EB ; ID_Start # Lm NAG MUNDARI SIGN OJOD 1E5D0..1E5ED ; ID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; ID_Start # Lo OL ONAL SIGN HODDOND +1E6C0..1E6DE ; ID_Start # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; ID_Start # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; ID_Start # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; ID_Start # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; ID_Start # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; ID_Start # Lo TAI YO SYMBOL MUEANG +1E6FF ; ID_Start # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -7021,7 +7044,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141582 +# Total code points: 141632 # ================================================ @@ -8395,6 +8418,17 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1E5EE..1E5EF ; ID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E5F0 ; ID_Continue # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; ID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E6C0..1E6DE ; ID_Continue # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; ID_Continue # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; ID_Continue # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; ID_Continue # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; ID_Continue # Mn TAI YO SIGN AU +1E6E7..1E6ED ; ID_Continue # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; ID_Continue # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; ID_Continue # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; ID_Continue # Mn TAI YO SIGN OM +1E6FE ; ID_Continue # Lo TAI YO SYMBOL MUEANG +1E6FF ; ID_Continue # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -8450,7 +8484,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144915 +# Total code points: 144970 # ================================================ @@ -9188,6 +9222,13 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1E4EB ; XID_Start # Lm NAG MUNDARI SIGN OJOD 1E5D0..1E5ED ; XID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; XID_Start # Lo OL ONAL SIGN HODDOND +1E6C0..1E6DE ; XID_Start # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; XID_Start # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; XID_Start # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; XID_Start # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; XID_Start # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; XID_Start # Lo TAI YO SYMBOL MUEANG +1E6FF ; XID_Start # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -9238,7 +9279,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141559 +# Total code points: 141609 # ================================================ @@ -10613,6 +10654,17 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1E5EE..1E5EF ; XID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E5F0 ; XID_Continue # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; XID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E6C0..1E6DE ; XID_Continue # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; XID_Continue # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; XID_Continue # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; XID_Continue # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; XID_Continue # Mn TAI YO SIGN AU +1E6E7..1E6ED ; XID_Continue # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; XID_Continue # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; XID_Continue # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; XID_Continue # Mn TAI YO SIGN OM +1E6FE ; XID_Continue # Lo TAI YO SYMBOL MUEANG +1E6FF ; XID_Continue # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -10668,7 +10720,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144896 +# Total code points: 144951 # ================================================ @@ -11141,12 +11193,16 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 1E2EC..1E2EF ; Grapheme_Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Grapheme_Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Grapheme_Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Grapheme_Extend # Mn TAI YO SIGN UE +1E6E6 ; Grapheme_Extend # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Grapheme_Extend # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Grapheme_Extend # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; Grapheme_Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Grapheme_Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2231 +# Total code points: 2236 # ================================================ @@ -12849,6 +12905,13 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1E5F0 ; Grapheme_Base # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; Grapheme_Base # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; Grapheme_Base # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; Grapheme_Base # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; Grapheme_Base # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; Grapheme_Base # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; Grapheme_Base # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; Grapheme_Base # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; Grapheme_Base # Lo TAI YO SYMBOL MUEANG +1E6FF ; Grapheme_Base # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Grapheme_Base # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -12952,7 +13015,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153158 +# Total code points: 153208 # ================================================ @@ -13499,12 +13562,16 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 1E2EC..1E2EF ; InCB; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; InCB; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; InCB; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; InCB; Extend # Mn TAI YO SIGN UE +1E6E6 ; InCB; Extend # Mn TAI YO SIGN AU +1E6EE..1E6EF ; InCB; Extend # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; InCB; Extend # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; InCB; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2230 +# Total code points: 2235 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 5928d481b..61dd9fc45 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 01:39:30 GMT +# Date: 2024-11-16, 01:52:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2526,6 +2526,17 @@ FFFD ; A # So REPLACEMENT CHARACTER 1E5F0 ; N # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; N # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; N # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; N # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; N # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; N # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; N # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; N # Mn TAI YO SIGN AU +1E6E7..1E6ED ; N # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; N # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; N # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; N # Mn TAI YO SIGN OM +1E6FE ; N # Lo TAI YO SYMBOL MUEANG +1E6FF ; N # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 562d7c557..cd919df5b 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-16, 01:39:31 GMT +# Date: 2024-11-16, 01:52:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3437,6 +3437,17 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1E5F0 ; AL # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; NU # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; AL # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; AL # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; AL # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; CM # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; AL # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; CM # Mn TAI YO SIGN AU +1E6E7..1E6ED ; AL # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; CM # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; AL # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; CM # Mn TAI YO SIGN OM +1E6FE ; AL # Lo TAI YO SYMBOL MUEANG +1E6FF ; AL # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; AL # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; AL # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; AL # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index d509d5415..5bed33153 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-17.0.0.txt -# Date: 2024-11-14, 19:48:38 GMT +# Date: 2024-11-16, 01:52:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -19079,6 +19079,16 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1E5EE 0315 0300 05AE 0062;0061 05AE 1E5EE 0300 0315 0062;0061 05AE 1E5EE 0300 0315 0062;0061 05AE 1E5EE 0300 0315 0062;0061 05AE 1E5EE 0300 0315 0062; # (a◌𞗮◌̕◌̀◌֮b; a◌֮◌𞗮◌̀◌̕b; a◌֮◌𞗮◌̀◌̕b; a◌֮◌𞗮◌̀◌̕b; a◌֮◌𞗮◌̀◌̕b; ) LATIN SMALL LETTER A, OL ONAL SIGN MU, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 1E5EF 0062;0061 1DFA 0316 1E5EF 059A 0062;0061 1DFA 0316 1E5EF 059A 0062;0061 1DFA 0316 1E5EF 059A 0062;0061 1DFA 0316 1E5EF 059A 0062; # (a◌֚◌̖◌᷺◌𞗯b; a◌᷺◌̖◌𞗯◌֚b; a◌᷺◌̖◌𞗯◌֚b; a◌᷺◌̖◌𞗯◌֚b; a◌᷺◌̖◌𞗯◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, OL ONAL SIGN IKIR, LATIN SMALL LETTER B 0061 1E5EF 059A 0316 1DFA 0062;0061 1DFA 1E5EF 0316 059A 0062;0061 1DFA 1E5EF 0316 059A 0062;0061 1DFA 1E5EF 0316 059A 0062;0061 1DFA 1E5EF 0316 059A 0062; # (a◌𞗯◌֚◌̖◌᷺b; a◌᷺◌𞗯◌̖◌֚b; a◌᷺◌𞗯◌̖◌֚b; a◌᷺◌𞗯◌̖◌֚b; a◌᷺◌𞗯◌̖◌֚b; ) LATIN SMALL LETTER A, OL ONAL SIGN IKIR, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 1E6E3 0062;00E0 05AE 1E6E3 0315 0062;0061 05AE 0300 1E6E3 0315 0062;00E0 05AE 1E6E3 0315 0062;0061 05AE 0300 1E6E3 0315 0062; # (a◌̕◌̀◌֮◌𞛣b; à◌֮◌𞛣◌̕b; a◌֮◌̀◌𞛣◌̕b; à◌֮◌𞛣◌̕b; a◌֮◌̀◌𞛣◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TAI YO SIGN UE, LATIN SMALL LETTER B +0061 1E6E3 0315 0300 05AE 0062;0061 05AE 1E6E3 0300 0315 0062;0061 05AE 1E6E3 0300 0315 0062;0061 05AE 1E6E3 0300 0315 0062;0061 05AE 1E6E3 0300 0315 0062; # (a◌𞛣◌̕◌̀◌֮b; a◌֮◌𞛣◌̀◌̕b; a◌֮◌𞛣◌̀◌̕b; a◌֮◌𞛣◌̀◌̕b; a◌֮◌𞛣◌̀◌̕b; ) LATIN SMALL LETTER A, TAI YO SIGN UE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1E6E6 0062;00E0 05AE 1E6E6 0315 0062;0061 05AE 0300 1E6E6 0315 0062;00E0 05AE 1E6E6 0315 0062;0061 05AE 0300 1E6E6 0315 0062; # (a◌̕◌̀◌֮◌𞛦b; à◌֮◌𞛦◌̕b; a◌֮◌̀◌𞛦◌̕b; à◌֮◌𞛦◌̕b; a◌֮◌̀◌𞛦◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TAI YO SIGN AU, LATIN SMALL LETTER B +0061 1E6E6 0315 0300 05AE 0062;0061 05AE 1E6E6 0300 0315 0062;0061 05AE 1E6E6 0300 0315 0062;0061 05AE 1E6E6 0300 0315 0062;0061 05AE 1E6E6 0300 0315 0062; # (a◌𞛦◌̕◌̀◌֮b; a◌֮◌𞛦◌̀◌̕b; a◌֮◌𞛦◌̀◌̕b; a◌֮◌𞛦◌̀◌̕b; a◌֮◌𞛦◌̀◌̕b; ) LATIN SMALL LETTER A, TAI YO SIGN AU, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1E6EE 0062;00E0 05AE 1E6EE 0315 0062;0061 05AE 0300 1E6EE 0315 0062;00E0 05AE 1E6EE 0315 0062;0061 05AE 0300 1E6EE 0315 0062; # (a◌̕◌̀◌֮◌𞛮b; à◌֮◌𞛮◌̕b; a◌֮◌̀◌𞛮◌̕b; à◌֮◌𞛮◌̕b; a◌֮◌̀◌𞛮◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TAI YO SIGN AY, LATIN SMALL LETTER B +0061 1E6EE 0315 0300 05AE 0062;0061 05AE 1E6EE 0300 0315 0062;0061 05AE 1E6EE 0300 0315 0062;0061 05AE 1E6EE 0300 0315 0062;0061 05AE 1E6EE 0300 0315 0062; # (a◌𞛮◌̕◌̀◌֮b; a◌֮◌𞛮◌̀◌̕b; a◌֮◌𞛮◌̀◌̕b; a◌֮◌𞛮◌̀◌̕b; a◌֮◌𞛮◌̀◌̕b; ) LATIN SMALL LETTER A, TAI YO SIGN AY, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1E6EF 0062;00E0 05AE 1E6EF 0315 0062;0061 05AE 0300 1E6EF 0315 0062;00E0 05AE 1E6EF 0315 0062;0061 05AE 0300 1E6EF 0315 0062; # (a◌̕◌̀◌֮◌𞛯b; à◌֮◌𞛯◌̕b; a◌֮◌̀◌𞛯◌̕b; à◌֮◌𞛯◌̕b; a◌֮◌̀◌𞛯◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TAI YO SIGN ANG, LATIN SMALL LETTER B +0061 1E6EF 0315 0300 05AE 0062;0061 05AE 1E6EF 0300 0315 0062;0061 05AE 1E6EF 0300 0315 0062;0061 05AE 1E6EF 0300 0315 0062;0061 05AE 1E6EF 0300 0315 0062; # (a◌𞛯◌̕◌̀◌֮b; a◌֮◌𞛯◌̀◌̕b; a◌֮◌𞛯◌̀◌̕b; a◌֮◌𞛯◌̀◌̕b; a◌֮◌𞛯◌̀◌̕b; ) LATIN SMALL LETTER A, TAI YO SIGN ANG, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1E6F5 0062;00E0 05AE 1E6F5 0315 0062;0061 05AE 0300 1E6F5 0315 0062;00E0 05AE 1E6F5 0315 0062;0061 05AE 0300 1E6F5 0315 0062; # (a◌̕◌̀◌֮◌𞛵b; à◌֮◌𞛵◌̕b; a◌֮◌̀◌𞛵◌̕b; à◌֮◌𞛵◌̕b; a◌֮◌̀◌𞛵◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, TAI YO SIGN OM, LATIN SMALL LETTER B +0061 1E6F5 0315 0300 05AE 0062;0061 05AE 1E6F5 0300 0315 0062;0061 05AE 1E6F5 0300 0315 0062;0061 05AE 1E6F5 0300 0315 0062;0061 05AE 1E6F5 0300 0315 0062; # (a◌𞛵◌̕◌̀◌֮b; a◌֮◌𞛵◌̀◌̕b; a◌֮◌𞛵◌̀◌̕b; a◌֮◌𞛵◌̀◌̕b; a◌֮◌𞛵◌̀◌̕b; ) LATIN SMALL LETTER A, TAI YO SIGN OM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 1E8D0 0062;0061 1DFA 0316 1E8D0 059A 0062;0061 1DFA 0316 1E8D0 059A 0062;0061 1DFA 0316 1E8D0 059A 0062;0061 1DFA 0316 1E8D0 059A 0062; # (a◌֚◌̖◌᷺◌𞣐b; a◌᷺◌̖◌𞣐◌֚b; a◌᷺◌̖◌𞣐◌֚b; a◌᷺◌̖◌𞣐◌֚b; a◌᷺◌̖◌𞣐◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, MENDE KIKAKUI COMBINING NUMBER TEENS, LATIN SMALL LETTER B 0061 1E8D0 059A 0316 1DFA 0062;0061 1DFA 1E8D0 0316 059A 0062;0061 1DFA 1E8D0 0316 059A 0062;0061 1DFA 1E8D0 0316 059A 0062;0061 1DFA 1E8D0 0316 059A 0062; # (a◌𞣐◌֚◌̖◌᷺b; a◌᷺◌𞣐◌̖◌֚b; a◌᷺◌𞣐◌̖◌֚b; a◌᷺◌𞣐◌̖◌֚b; a◌᷺◌𞣐◌̖◌֚b; ) LATIN SMALL LETTER A, MENDE KIKAKUI COMBINING NUMBER TEENS, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 1E8D1 0062;0061 1DFA 0316 1E8D1 059A 0062;0061 1DFA 0316 1E8D1 059A 0062;0061 1DFA 0316 1E8D1 059A 0062;0061 1DFA 0316 1E8D1 059A 0062; # (a◌֚◌̖◌᷺◌𞣑b; a◌᷺◌̖◌𞣑◌֚b; a◌᷺◌̖◌𞣑◌֚b; a◌᷺◌̖◌𞣑◌֚b; a◌᷺◌̖◌𞣑◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, MENDE KIKAKUI COMBINING NUMBER TENS, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 6b10ed304..7adc434ff 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-16, 01:39:45 GMT +# Date: 2024-11-16, 01:52:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -860,12 +860,16 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E08F ; Other_Alphabetic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E6E3 ; Other_Alphabetic # Mn TAI YO SIGN UE +1E6E6 ; Other_Alphabetic # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Other_Alphabetic # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Other_Alphabetic # Mn TAI YO SIGN OM 1E947 ; Other_Alphabetic # Mn ADLAM HAMZA 1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1506 +# Total code points: 1511 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 31fadf9ed..76a8cdd58 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-11-16, 01:39:48 GMT +# Date: 2024-11-16, 01:52:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -462,6 +462,7 @@ blk; Tai_Le ; Tai_Le blk; Tai_Tham ; Tai_Tham blk; Tai_Viet ; Tai_Viet blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Tai_Yo ; Tai_Yo blk; Takri ; Takri blk; Tamil ; Tamil blk; Tamil_Sup ; Tamil_Supplement @@ -1467,6 +1468,7 @@ sc ; Talu ; New_Tai_Lue sc ; Taml ; Tamil sc ; Tang ; Tangut sc ; Tavt ; Tai_Viet +sc ; Tayo ; Tai_Yo sc ; Telu ; Telugu sc ; Tfng ; Tifinagh sc ; Tglg ; Tagalog diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index dd3f21d49..6e67b28b1 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-16, 01:40:06 GMT +# Date: 2024-11-16, 01:53:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3160,6 +3160,22 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ +1E6C0..1E6DE ; Tai_Yo # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; Tai_Yo # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; Tai_Yo # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; Tai_Yo # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; Tai_Yo # Mn TAI YO SIGN AU +1E6E7..1E6ED ; Tai_Yo # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; Tai_Yo # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; Tai_Yo # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; Tai_Yo # Mn TAI YO SIGN OM +1E6FE ; Tai_Yo # Lo TAI YO SYMBOL MUEANG +1E6FF ; Tai_Yo # Lm TAI YO XAM LAI + +# Total code points: 55 + +# ================================================ + 11DB0..11DD8 ; Tolong_Siki # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH 11DD9 ; Tolong_Siki # Lm TOLONG SIKI SIGN SELA 11DDA..11DDB ; Tolong_Siki # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index d4b4463da..7bd41dd52 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -36426,6 +36426,61 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1E5F9;OL ONAL DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 1E5FA;OL ONAL DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 1E5FF;OL ONAL ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; +1E6C0;TAI YO LETTER LOW KO;Lo;0;L;;;;;N;;;;; +1E6C1;TAI YO LETTER HIGH KO;Lo;0;L;;;;;N;;;;; +1E6C2;TAI YO LETTER LOW KHO;Lo;0;L;;;;;N;;;;; +1E6C3;TAI YO LETTER HIGH KHO;Lo;0;L;;;;;N;;;;; +1E6C4;TAI YO LETTER GO;Lo;0;L;;;;;N;;;;; +1E6C5;TAI YO LETTER NGO;Lo;0;L;;;;;N;;;;; +1E6C6;TAI YO LETTER CO;Lo;0;L;;;;;N;;;;; +1E6C7;TAI YO LETTER LOW XO;Lo;0;L;;;;;N;;;;; +1E6C8;TAI YO LETTER HIGH XO;Lo;0;L;;;;;N;;;;; +1E6C9;TAI YO LETTER LOW NYO;Lo;0;L;;;;;N;;;;; +1E6CA;TAI YO LETTER HIGH NYO;Lo;0;L;;;;;N;;;;; +1E6CB;TAI YO LETTER DO;Lo;0;L;;;;;N;;;;; +1E6CC;TAI YO LETTER LOW TO;Lo;0;L;;;;;N;;;;; +1E6CD;TAI YO LETTER HIGH TO;Lo;0;L;;;;;N;;;;; +1E6CE;TAI YO LETTER THO;Lo;0;L;;;;;N;;;;; +1E6CF;TAI YO LETTER NO;Lo;0;L;;;;;N;;;;; +1E6D0;TAI YO LETTER BO;Lo;0;L;;;;;N;;;;; +1E6D1;TAI YO LETTER LOW PO;Lo;0;L;;;;;N;;;;; +1E6D2;TAI YO LETTER HIGH PO;Lo;0;L;;;;;N;;;;; +1E6D3;TAI YO LETTER PHO;Lo;0;L;;;;;N;;;;; +1E6D4;TAI YO LETTER LOW FO;Lo;0;L;;;;;N;;;;; +1E6D5;TAI YO LETTER HIGH FO;Lo;0;L;;;;;N;;;;; +1E6D6;TAI YO LETTER MO;Lo;0;L;;;;;N;;;;; +1E6D7;TAI YO LETTER YO;Lo;0;L;;;;;N;;;;; +1E6D8;TAI YO LETTER LO;Lo;0;L;;;;;N;;;;; +1E6D9;TAI YO LETTER VO;Lo;0;L;;;;;N;;;;; +1E6DA;TAI YO LETTER LOW HO;Lo;0;L;;;;;N;;;;; +1E6DB;TAI YO LETTER HIGH HO;Lo;0;L;;;;;N;;;;; +1E6DC;TAI YO LETTER QO;Lo;0;L;;;;;N;;;;; +1E6DD;TAI YO LETTER LOW KVO;Lo;0;L;;;;;N;;;;; +1E6DE;TAI YO LETTER HIGH KVO;Lo;0;L;;;;;N;;;;; +1E6E0;TAI YO LETTER AA;Lo;0;L;;;;;N;;;;; +1E6E1;TAI YO LETTER I;Lo;0;L;;;;;N;;;;; +1E6E2;TAI YO LETTER UE;Lo;0;L;;;;;N;;;;; +1E6E3;TAI YO SIGN UE;Mn;230;NSM;;;;;N;;;;; +1E6E4;TAI YO LETTER U;Lo;0;L;;;;;N;;;;; +1E6E5;TAI YO LETTER AE;Lo;0;L;;;;;N;;;;; +1E6E6;TAI YO SIGN AU;Mn;230;NSM;;;;;N;;;;; +1E6E7;TAI YO LETTER O;Lo;0;L;;;;;N;;;;; +1E6E8;TAI YO LETTER E;Lo;0;L;;;;;N;;;;; +1E6E9;TAI YO LETTER IA;Lo;0;L;;;;;N;;;;; +1E6EA;TAI YO LETTER UEA;Lo;0;L;;;;;N;;;;; +1E6EB;TAI YO LETTER UA;Lo;0;L;;;;;N;;;;; +1E6EC;TAI YO LETTER OO;Lo;0;L;;;;;N;;;;; +1E6ED;TAI YO LETTER AUE;Lo;0;L;;;;;N;;;;; +1E6EE;TAI YO SIGN AY;Mn;230;NSM;;;;;N;;;;; +1E6EF;TAI YO SIGN ANG;Mn;230;NSM;;;;;N;;;;; +1E6F0;TAI YO LETTER AN;Lo;0;L;;;;;N;;;;; +1E6F1;TAI YO LETTER AM;Lo;0;L;;;;;N;;;;; +1E6F2;TAI YO LETTER AK;Lo;0;L;;;;;N;;;;; +1E6F3;TAI YO LETTER AT;Lo;0;L;;;;;N;;;;; +1E6F4;TAI YO LETTER AP;Lo;0;L;;;;;N;;;;; +1E6F5;TAI YO SIGN OM;Mn;230;NSM;;;;;N;;;;; +1E6FE;TAI YO SYMBOL MUEANG;Lo;0;L;;;;;N;;;;; +1E6FF;TAI YO XAM LAI;Lm;0;L;;;;;N;;;;; 1E7E0;ETHIOPIC SYLLABLE HHYA;Lo;0;L;;;;;N;;;;; 1E7E1;ETHIOPIC SYLLABLE HHYU;Lo;0;L;;;;;N;;;;; 1E7E2;ETHIOPIC SYLLABLE HHYI;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 35e060550..b87026465 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-16, 01:40:09 GMT +# Date: 2024-11-16, 01:53:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2383,6 +2383,17 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1E5F0 ; R # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; R # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; R # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; R # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; R # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; R # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; R # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; R # Mn TAI YO SIGN AU +1E6E7..1E6ED ; R # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; R # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; R # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; R # Mn TAI YO SIGN OM +1E6FE ; R # Lo TAI YO SYMBOL MUEANG +1E6FF ; R # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; R # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; R # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; R # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 48351bad9..47a250bb6 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-11-14, 19:48:31 GMT +# Date: 2024-11-16, 01:52:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -495,13 +495,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Extend # Mn TAI YO SIGN UE +1E6E6 ; Extend # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Extend # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Extend # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2236 +# Total code points: 2241 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 7521c2c87..47fa1117f 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 16:14:43 GMT +# Date: 2024-11-16, 01:53:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -590,12 +590,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Extend # Mn TAI YO SIGN UE +1E6E6 ; Extend # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Extend # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Extend # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2642 +# Total code points: 2647 # ================================================ @@ -2562,6 +2566,13 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1E4EB ; OLetter # Lm NAG MUNDARI SIGN OJOD 1E5D0..1E5ED ; OLetter # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; OLetter # Lo OL ONAL SIGN HODDOND +1E6C0..1E6DE ; OLetter # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; OLetter # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; OLetter # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; OLetter # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; OLetter # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; OLetter # Lo TAI YO SYMBOL MUEANG +1E6FF ; OLetter # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; OLetter # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; OLetter # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; OLetter # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2611,7 +2622,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137167 +# Total code points: 137217 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 668852b57..995258336 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-11-14, 22:51:52 GMT +# Date: 2024-11-16, 01:53:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -626,13 +626,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Extend # Mn TAI YO SIGN UE +1E6E6 ; Extend # Mn TAI YO SIGN AU +1E6EE..1E6EF ; Extend # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Extend # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2646 +# Total code points: 2651 # ================================================ @@ -1328,6 +1332,13 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1E4EB ; ALetter # Lm NAG MUNDARI SIGN OJOD 1E5D0..1E5ED ; ALetter # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; ALetter # Lo OL ONAL SIGN HODDOND +1E6C0..1E6DE ; ALetter # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; ALetter # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; ALetter # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; ALetter # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; ALetter # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; ALetter # Lo TAI YO SYMBOL MUEANG +1E6FF ; ALetter # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ALetter # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ALetter # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1372,7 +1383,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33954 +# Total code points: 34004 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index d0982869f..3fe8eeed6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-16, 01:39:21 GMT +# Date: 2024-11-16, 01:52:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1204,6 +1204,13 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1E5F0 ; L # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; L # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; L # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; L # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; L # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; L # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; L # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; L # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; L # Lo TAI YO SYMBOL MUEANG +1E6FF ; L # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1227,8 +1234,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 814954 code points not listed here. -# Total code points: 1095419 +# The above property value applies to 814899 code points not listed here. +# Total code points: 1095414 # ================================================ @@ -2432,11 +2439,15 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; NSM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; NSM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; NSM # Mn TAI YO SIGN UE +1E6E6 ; NSM # Mn TAI YO SIGN AU +1E6EE..1E6EF ; NSM # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; NSM # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2066 +# Total code points: 2071 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3ce9cfc7f..e38926f9e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-16, 01:39:23 GMT +# Date: 2024-11-16, 01:52:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1979,6 +1979,13 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1E5F0 ; 0 # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; 0 # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; 0 # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; 0 # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; 0 # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; 0 # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; 0 # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; 0 # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; 0 # Lo TAI YO SYMBOL MUEANG +1E6FF ; 0 # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; 0 # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; 0 # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; 0 # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2087,8 +2094,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821115 code points not listed here. -# Total code points: 1113148 +# The above property value applies to 821060 code points not listed here. +# Total code points: 1113143 # ================================================ @@ -2834,9 +2841,13 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E2EC..1E2EF ; 230 # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EF ; 230 # Mn NAG MUNDARI SIGN SUTUH 1E5EE ; 230 # Mn OL ONAL SIGN MU +1E6E3 ; 230 # Mn TAI YO SIGN UE +1E6E6 ; 230 # Mn TAI YO SIGN AU +1E6EE..1E6EF ; 230 # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; 230 # Mn TAI YO SIGN OM 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 541 +# Total code points: 546 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 9ab83f5ff..74e1ca4b1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 01:39:25 GMT +# Date: 2024-11-16, 01:52:33 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2021,6 +2021,17 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1E5F0 ; N # Lo OL ONAL SIGN HODDOND 1E5F1..1E5FA ; N # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E5FF ; N # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; N # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; N # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E3 ; N # Mn TAI YO SIGN UE +1E6E4..1E6E5 ; N # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E6 ; N # Mn TAI YO SIGN AU +1E6E7..1E6ED ; N # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6EE..1E6EF ; N # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F0..1E6F4 ; N # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6F5 ; N # Mn TAI YO SIGN OM +1E6FE ; N # Lo TAI YO SYMBOL MUEANG +1E6FF ; N # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2132,7 +2143,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760633 code points not listed here. +# The above property value applies to 760578 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index c734fcdd8..90fd78b88 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-16, 01:39:26 GMT +# Date: 2024-11-16, 01:52:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -653,7 +653,10 @@ FFFE..FFFF ; Cn # [2] .. 1E300..1E4CF ; Cn # [464] .. 1E4FA..1E5CF ; Cn # [214] .. 1E5FB..1E5FE ; Cn # [4] .. -1E600..1E7DF ; Cn # [480] .. +1E600..1E6BF ; Cn # [192] .. +1E6DF ; Cn # +1E6F6..1E6FD ; Cn # [8] .. +1E700..1E7DF ; Cn # [224] .. 1E7E7 ; Cn # 1E7EC ; Cn # 1E7EF ; Cn # @@ -750,7 +753,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819067 +# Total code points: 819012 # ================================================ @@ -2183,9 +2186,10 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E4EB ; Lm # NAG MUNDARI SIGN OJOD +1E6FF ; Lm # TAI YO XAM LAI 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 409 +# Total code points: 410 # ================================================ @@ -2679,6 +2683,12 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1E4D0..1E4EA ; Lo # [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E5D0..1E5ED ; Lo # [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; Lo # OL ONAL SIGN HODDOND +1E6C0..1E6DE ; Lo # [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; Lo # [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; Lo # [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; Lo # [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; Lo # [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; Lo # TAI YO SYMBOL MUEANG 1E7E0..1E7E6 ; Lo # [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Lo # [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Lo # [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2727,7 +2737,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136729 +# Total code points: 136778 # ================================================ @@ -3093,11 +3103,15 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E2EC..1E2EF ; Mn # [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Mn # [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; Mn # [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; Mn # TAI YO SIGN UE +1E6E6 ; Mn # TAI YO SIGN AU +1E6EE..1E6EF ; Mn # [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; Mn # TAI YO SIGN OM 1E8D0..1E8D6 ; Mn # [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2058 +# Total code points: 2063 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index d505d6ad3..6cc91b403 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-11-14, 19:48:28 GMT +# Date: 2024-11-16, 01:52:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -582,6 +582,10 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 1E2EC..1E2EF ; T # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; T # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; T # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; T # Mn TAI YO SIGN UE +1E6E6 ; T # Mn TAI YO SIGN AU +1E6EE..1E6EF ; T # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; T # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; T # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; T # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1E94B ; T # Lm ADLAM NASALIZATION MARK @@ -589,6 +593,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2223 +# Total code points: 2228 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index dbc0c1e38..97fdea86a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-16, 01:39:27 GMT +# Date: 2024-11-16, 01:52:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757191 code points not listed here. -# Total code points: 894659 +# The above property value applies to 757136 code points not listed here. +# Total code points: 894604 # ================================================ @@ -1550,6 +1550,13 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1E5D0..1E5ED ; AL # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG 1E5F0 ; AL # Lo OL ONAL SIGN HODDOND 1E5FF ; AL # Po OL ONAL ABBREVIATION SIGN +1E6C0..1E6DE ; AL # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO +1E6E0..1E6E2 ; AL # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE +1E6E4..1E6E5 ; AL # Lo [2] TAI YO LETTER U..TAI YO LETTER AE +1E6E7..1E6ED ; AL # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE +1E6F0..1E6F4 ; AL # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP +1E6FE ; AL # Lo TAI YO SYMBOL MUEANG +1E6FF ; AL # Lm TAI YO XAM LAI 1E7E0..1E7E6 ; AL # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; AL # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; AL # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1634,7 +1641,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26933 +# Total code points: 26983 # ================================================ @@ -2412,13 +2419,17 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 1E2EC..1E2EF ; CM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; CM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E5EE..1E5EF ; CM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E6E3 ; CM # Mn TAI YO SIGN UE +1E6E6 ; CM # Mn TAI YO SIGN AU +1E6EE..1E6EF ; CM # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG +1E6F5 ; CM # Mn TAI YO SIGN OM 1E8D0..1E8D6 ; CM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; CM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2510 +# Total code points: 2515 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 535c985b7..7d23d17ed 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-16, 01:39:28 GMT +# Date: 2024-11-16, 01:52:35 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -42231,6 +42231,61 @@ FFFD ; REPLACEMENT CHARACTER 1E5F9 ; OL ONAL DIGIT EIGHT 1E5FA ; OL ONAL DIGIT NINE 1E5FF ; OL ONAL ABBREVIATION SIGN +1E6C0 ; TAI YO LETTER LOW KO +1E6C1 ; TAI YO LETTER HIGH KO +1E6C2 ; TAI YO LETTER LOW KHO +1E6C3 ; TAI YO LETTER HIGH KHO +1E6C4 ; TAI YO LETTER GO +1E6C5 ; TAI YO LETTER NGO +1E6C6 ; TAI YO LETTER CO +1E6C7 ; TAI YO LETTER LOW XO +1E6C8 ; TAI YO LETTER HIGH XO +1E6C9 ; TAI YO LETTER LOW NYO +1E6CA ; TAI YO LETTER HIGH NYO +1E6CB ; TAI YO LETTER DO +1E6CC ; TAI YO LETTER LOW TO +1E6CD ; TAI YO LETTER HIGH TO +1E6CE ; TAI YO LETTER THO +1E6CF ; TAI YO LETTER NO +1E6D0 ; TAI YO LETTER BO +1E6D1 ; TAI YO LETTER LOW PO +1E6D2 ; TAI YO LETTER HIGH PO +1E6D3 ; TAI YO LETTER PHO +1E6D4 ; TAI YO LETTER LOW FO +1E6D5 ; TAI YO LETTER HIGH FO +1E6D6 ; TAI YO LETTER MO +1E6D7 ; TAI YO LETTER YO +1E6D8 ; TAI YO LETTER LO +1E6D9 ; TAI YO LETTER VO +1E6DA ; TAI YO LETTER LOW HO +1E6DB ; TAI YO LETTER HIGH HO +1E6DC ; TAI YO LETTER QO +1E6DD ; TAI YO LETTER LOW KVO +1E6DE ; TAI YO LETTER HIGH KVO +1E6E0 ; TAI YO LETTER AA +1E6E1 ; TAI YO LETTER I +1E6E2 ; TAI YO LETTER UE +1E6E3 ; TAI YO SIGN UE +1E6E4 ; TAI YO LETTER U +1E6E5 ; TAI YO LETTER AE +1E6E6 ; TAI YO SIGN AU +1E6E7 ; TAI YO LETTER O +1E6E8 ; TAI YO LETTER E +1E6E9 ; TAI YO LETTER IA +1E6EA ; TAI YO LETTER UEA +1E6EB ; TAI YO LETTER UA +1E6EC ; TAI YO LETTER OO +1E6ED ; TAI YO LETTER AUE +1E6EE ; TAI YO SIGN AY +1E6EF ; TAI YO SIGN ANG +1E6F0 ; TAI YO LETTER AN +1E6F1 ; TAI YO LETTER AM +1E6F2 ; TAI YO LETTER AK +1E6F3 ; TAI YO LETTER AT +1E6F4 ; TAI YO LETTER AP +1E6F5 ; TAI YO SIGN OM +1E6FE ; TAI YO SYMBOL MUEANG +1E6FF ; TAI YO XAM LAI 1E7E0 ; ETHIOPIC SYLLABLE HHYA 1E7E1 ; ETHIOPIC SYLLABLE HHYU 1E7E2 ; ETHIOPIC SYLLABLE HHYI @@ -45803,6 +45858,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155464 +# Total code points: 155519 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 687558562..396aa2979 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -487,6 +487,7 @@ public enum Block_Values implements Named { Tai_Tham("Tai_Tham"), Tai_Viet("Tai_Viet"), Tai_Xuan_Jing_Symbols("Tai_Xuan_Jing"), + Tai_Yo("Tai_Yo"), Takri("Takri"), Tamil("Tamil"), Tamil_Supplement("Tamil_Sup"), @@ -1997,6 +1998,7 @@ public enum Script_Values implements Named { Tamil("Taml"), Tangut("Tang"), Tai_Viet("Tavt"), + Tai_Yo("Tayo"), Telugu("Telu"), Tifinagh("Tfng"), Tagalog("Tglg"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 97e71dbe0..75df5748a 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -307,6 +307,7 @@ Tai_Le ; Tai_Le Tai_Tham ; Tai_Tham Tai_Viet ; Tai_Viet Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +Tai_Yo ; Tai_Yo Tangsa ; Tangsa Takri ; Takri Tamil ; Tamil From cf818ba1493a59fd270d80e1b3a0dd7fdd5e3919 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 16 Nov 2024 03:23:17 +0100 Subject: [PATCH 38/38] Shatranj symbols (#665) * UnicodeData.txt lines from L2/24-020 * lb=AL like its chess neighbours. * Common * Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 6 +++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 6 +++--- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ++++ unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 10 +++++----- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 10 +++++----- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 10 +++++----- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 8 ++++++-- 13 files changed, 48 insertions(+), 39 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d5b60d56b..4c96be735 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-16, 01:52:06 GMT +# Date: 2024-11-16, 02:08:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2103,8 +2103,9 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 1E6FE..1E6FF ; 17.0 # [2] TAI YO SYMBOL MUEANG..TAI YO XAM LAI 1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO 1F8D0..1F8D8 ; 17.0 # [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE +1FA54..1FA57 ; 17.0 # [4] WHITE CHESS FERZ..BLACK CHESS ALFIL 1FBFA ; 17.0 # ALARM BELL SYMBOL -# Total code points: 521 +# Total code points: 525 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 9bd8bb8e3..6411377c3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-16, 01:52:31 GMT +# Date: 2024-11-16, 02:09:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -12993,7 +12993,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F8B0..1F8BB ; Grapheme_Base # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; Grapheme_Base # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1F900..1FA57 ; Grapheme_Base # So [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL 1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Grapheme_Base # So [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; Grapheme_Base # So [10] YO-YO..HARP @@ -13015,7 +13015,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 153208 +# Total code points: 153212 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 61dd9fc45..8a71f1df7 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 01:52:38 GMT +# Date: 2024-11-16, 02:09:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2688,7 +2688,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1F93C..1F945 ; W # So [10] WRESTLERS..GOAL NET 1F946 ; N # So RIFLE 1F947..1F9FF ; W # So [185] FIRST PLACE MEDAL..NAZAR AMULET -1FA00..1FA53 ; N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA00..1FA57 ; N # So [88] NEUTRAL CHESS KING..BLACK CHESS ALFIL 1FA60..1FA6D ; N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; W # So [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; W # So [10] YO-YO..HARP diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index cd919df5b..7b12fca2e 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-16, 01:52:39 GMT +# Date: 2024-11-16, 02:09:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3663,8 +3663,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F9D0 ; ID # So FACE WITH MONOCLE 1F9D1..1F9DD ; EB # So [13] ADULT..ELF 1F9DE..1F9FF ; ID # So [34] GENIE..NAZAR AMULET -1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP -1FA54..1FA5F ; ID # Cn [12] .. +1FA00..1FA57 ; AL # So [88] NEUTRAL CHESS KING..BLACK CHESS ALFIL +1FA58..1FA5F ; ID # Cn [8] .. 1FA60..1FA6D ; ID # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA6E..1FA6F ; ID # Cn [2] .. 1FA70..1FA7C ; ID # So [13] BALLET SHOES..CRUTCH diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 6e67b28b1..ecc6efd67 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-16, 01:53:14 GMT +# Date: 2024-11-16, 02:09:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -622,7 +622,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F8B0..1F8BB ; Common # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; Common # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; Common # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1F900..1FA57 ; Common # So [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL 1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Common # So [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; Common # So [10] YO-YO..HARP @@ -637,7 +637,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9111 +# Total code points: 9115 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 7bd41dd52..65587dea4 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -39328,6 +39328,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FA51;BLACK CHESS KNIGHT-QUEEN;So;0;ON;;;;;N;;;;; 1FA52;BLACK CHESS KNIGHT-ROOK;So;0;ON;;;;;N;;;;; 1FA53;BLACK CHESS KNIGHT-BISHOP;So;0;ON;;;;;N;;;;; +1FA54;WHITE CHESS FERZ;So;0;ON;;;;;N;;;;; +1FA55;WHITE CHESS ALFIL;So;0;ON;;;;;N;;;;; +1FA56;BLACK CHESS FERZ;So;0;ON;;;;;N;;;;; +1FA57;BLACK CHESS ALFIL;So;0;ON;;;;;N;;;;; 1FA60;XIANGQI RED GENERAL;So;0;ON;;;;;N;;;;; 1FA61;XIANGQI RED MANDARIN;So;0;ON;;;;;N;;;;; 1FA62;XIANGQI RED ELEPHANT;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index b87026465..4b8ad73b5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-16, 01:53:17 GMT +# Date: 2024-11-16, 02:10:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2502,8 +2502,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1F8C0..1F8C1 ; R # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; R # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F9FF ; U # So [256] CIRCLED CROSS FORMEE WITH FOUR DOTS..NAZAR AMULET -1FA00..1FA53 ; U # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP -1FA54..1FA5F ; U # Cn [12] .. +1FA00..1FA57 ; U # So [88] NEUTRAL CHESS KING..BLACK CHESS ALFIL +1FA58..1FA5F ; U # Cn [8] .. 1FA60..1FA6D ; U # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA6E..1FA6F ; U # Cn [2] .. 1FA70..1FA7C ; U # So [13] BALLET SHOES..CRUTCH diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 3fe8eeed6..d77c16711 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-16, 01:52:29 GMT +# Date: 2024-11-16, 02:09:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1234,8 +1234,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 814899 code points not listed here. -# Total code points: 1095414 +# The above property value applies to 814895 code points not listed here. +# Total code points: 1095410 # ================================================ @@ -2014,7 +2014,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F8B0..1F8BB ; ON # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; ON # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1F900..1FA57 ; ON # So [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; ON # So [10] YO-YO..HARP @@ -2026,7 +2026,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6843 +# Total code points: 6847 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index e38926f9e..1a14c41b6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-16, 01:52:31 GMT +# Date: 2024-11-16, 02:09:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2067,7 +2067,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F8B0..1F8BB ; 0 # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; 0 # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; 0 # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE -1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1F900..1FA57 ; 0 # So [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL 1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; 0 # So [10] YO-YO..HARP @@ -2094,7 +2094,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821060 code points not listed here. +# The above property value applies to 821056 code points not listed here. # Total code points: 1113143 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 74e1ca4b1..bdb021cd2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-16, 01:52:33 GMT +# Date: 2024-11-16, 02:09:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2134,7 +2134,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F93B ; N # So MODERN PENTATHLON 1F946 ; N # So RIFLE -1FA00..1FA53 ; N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA00..1FA57 ; N # So [88] NEUTRAL CHESS KING..BLACK CHESS ALFIL 1FA60..1FA6D ; N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; N # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE @@ -2143,7 +2143,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760578 code points not listed here. +# The above property value applies to 760574 code points not listed here. # Total code points: 792275 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 90fd78b88..6ed91b50f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-16, 01:52:34 GMT +# Date: 2024-11-16, 02:09:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -728,7 +728,7 @@ FFFE..FFFF ; Cn # [2] .. 1F8BC..1F8BF ; Cn # [4] .. 1F8C2..1F8CF ; Cn # [14] .. 1F8D9..1F8FF ; Cn # [39] .. -1FA54..1FA5F ; Cn # [12] .. +1FA58..1FA5F ; Cn # [8] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. 1FA8A..1FA8E ; Cn # [5] .. @@ -753,7 +753,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819012 +# Total code points: 819008 # ================================================ @@ -4324,7 +4324,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8BB ; So # [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR 1F8C0..1F8C1 ; So # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW -1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1F900..1FA57 ; So # [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH 1FA80..1FA89 ; So # [10] YO-YO..HARP @@ -4336,7 +4336,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; So # ALARM BELL SYMBOL -# Total code points: 7457 +# Total code points: 7461 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 97fdea86a..c59578005 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-16, 01:52:35 GMT +# Date: 2024-11-16, 02:09:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1636,12 +1636,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F8D0..1F8D8 ; AL # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT -1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA00..1FA57 ; AL # So [88] NEUTRAL CHESS KING..BLACK CHESS ALFIL 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26983 +# Total code points: 26987 # ================================================ @@ -1881,8 +1881,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61861 code points not listed here. -# Total code points: 172565 +# The above property value applies to 61857 code points not listed here. +# Total code points: 172561 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 7d23d17ed..5875dcddf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-16, 01:52:35 GMT +# Date: 2024-11-16, 02:09:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -45133,6 +45133,10 @@ FFFD ; REPLACEMENT CHARACTER 1FA51 ; BLACK CHESS KNIGHT-QUEEN 1FA52 ; BLACK CHESS KNIGHT-ROOK 1FA53 ; BLACK CHESS KNIGHT-BISHOP +1FA54 ; WHITE CHESS FERZ +1FA55 ; WHITE CHESS ALFIL +1FA56 ; BLACK CHESS FERZ +1FA57 ; BLACK CHESS ALFIL 1FA60 ; XIANGQI RED GENERAL 1FA61 ; XIANGQI RED MANDARIN 1FA62 ; XIANGQI RED ELEPHANT @@ -45858,6 +45862,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155519 +# Total code points: 155523 # EOF