From da62528ee762a5ec45304204c0566e64cf491991 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:31:06 +0200 Subject: [PATCH 01/17] UnicodeData.txt and hardcoded ranges from L2/23-246 --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 +++- .../src/main/java/org/unicode/text/UCD/UCD.java | 10 ++++++++++ .../src/main/java/org/unicode/text/UCD/UCD_Types.java | 6 ++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 7faa6be2c..2987129d4 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,5 @@ +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -26008,7 +26010,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; +187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; 18802;TANGUT COMPONENT-003;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 1d572baa5..97e18d15d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1308,6 +1308,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + return TANGUT_BASE; + } } if (rCompositeVersion >= 0xd0000) { @@ -1320,6 +1325,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + return TANGUT_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 972753c37..25b38407b 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -27,10 +27,16 @@ public interface UCD_Types { // Unicode 12: // 17000;;Lo;0;L;;;;;N;;;;; // 187F7;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 17000;;Lo;0;L;;;;;N;;;;; + // 187FF;;Lo;0;L;;;;;N;;;;; public static final int TANGUT_SUP_BASE = 0x18D00; // Unicode 13: // 18D00;;Lo;0;L;;;;;N;;;;; // 18D08;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 18D00;;Lo;0;L;;;;;N;;;;; + // 18D1C;;Lo;0;L;;;;;N;;;;; public static final int // 4E00;;Lo;0;L;;;;;N;;;;; From db777acf3a6bdffde6402f67aa7984d9cf4e3d3e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:35:02 +0200 Subject: [PATCH 02/17] LineBreak.txt --- unicodetools/data/ucd/dev/LineBreak.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index d93ba5183..aa3f1f3cf 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-16, 14:22:28 GMT +# Date: 2023-10-19, 22:32:42 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3216,10 +3216,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 From bf8b3bbcf1b6e2d994c08ba34a06c61dcd7da06e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:40:08 +0200 Subject: [PATCH 03/17] new block --- unicodetools/data/ucd/dev/Blocks.txt | 1 + .../src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 431120ef2..8ad0904b7 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -304,6 +304,7 @@ FFF0..FFFF; Specials 18800..18AFF; Tangut Components 18B00..18CFF; Khitan Small Script 18D00..18D7F; Tangut Supplement +18D80..18DFF; Tangut Components Supplement 1AFF0..1AFFF; Kana Extended-B 1B000..1B0FF; Kana Supplement 1B100..1B12F; Kana Extended-A diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 8a43fe8b7..24b504f3f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -302,6 +302,7 @@ Tamil ; Tamil Tamil_Sup ; Tamil_Supplement Tangut ; Tangut Tangut_Components ; Tangut_Components +Tangut_Components_Sup ; Tangut_Components_Supplement Tangut_Sup ; Tangut_Supplement Telugu ; Telugu Thaana ; Thaana From 7505d1bfffed85eb7eb433e84b45dd712c78e9f1 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:42:09 +0200 Subject: [PATCH 04/17] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 50 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 +-- unicodetools/data/ucd/dev/PropList.txt | 9 ++-- .../data/ucd/dev/PropertyValueAliases.txt | 3 +- unicodetools/data/ucd/dev/Scripts.txt | 9 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 8 ++- .../data/ucd/dev/VerticalOrientation.txt | 10 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 10 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++-- .../dev/extracted/DerivedCombiningClass.txt | 10 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 14 +++--- .../dev/extracted/DerivedGeneralCategory.txt | 16 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 +++--- .../data/ucd/dev/extracted/DerivedName.txt | 10 ++-- 16 files changed, 101 insertions(+), 91 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 386d713b9..21ce575f1 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-17, 12:28:26 GMT +# Date: 2023-10-19, 22:40:51 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2019,7 +2019,10 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF +18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 16.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 125 +# Total code points: 155 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c00c3c976..ed92d48fe 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-17, 12:28:59 GMT +# Date: 2023-10-19, 22:41:18 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1288,9 +1288,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Alphabetic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Alphabetic # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1407,7 +1407,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138481 +# Total code points: 138511 # ================================================ @@ -6764,9 +6764,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; ID_Start # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -6872,7 +6872,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137059 +# Total code points: 137089 # ================================================ @@ -8093,9 +8093,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; ID_Continue # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8236,7 +8236,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140232 +# Total code points: 140262 # ================================================ @@ -8886,9 +8886,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; XID_Start # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8994,7 +8994,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137036 +# Total code points: 137066 # ================================================ @@ -10216,9 +10216,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; XID_Continue # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10359,7 +10359,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140213 +# Total code points: 140243 # ================================================ @@ -12355,9 +12355,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Grapheme_Base # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Grapheme_Base # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Grapheme_Base # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12555,7 +12555,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147736 +# Total code points: 147766 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index b28cc2558..02d75b346 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-10-17, 12:29:07 GMT +# Date: 2023-10-19, 22:41:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2311,10 +2311,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 8c1f3934d..de0a1705d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-10-13, 11:33:44 GMT +# Date: 2023-10-19, 22:41:38 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -849,9 +849,8 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Ideographic # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -863,7 +862,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106476 +# Total code points: 106504 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 17ffec935..898cd4513 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2023-10-17, 12:29:15 GMT +# Date: 2023-10-19, 22:41:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -456,6 +456,7 @@ blk; Tamil_Sup ; Tamil_Supplement blk; Tangsa ; Tangsa blk; Tangut ; Tangut blk; Tangut_Components ; Tangut_Components +blk; Tangut_Components_Sup ; Tangut_Components_Supplement blk; Tangut_Sup ; Tangut_Supplement blk; Telugu ; Telugu blk; Thaana ; Thaana diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index a4ef9c2bd..23d0eb54d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-10-17, 12:29:34 GMT +# Date: 2023-10-19, 22:42:02 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2749,11 +2749,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 16FE0 ; Tangut # Lm TANGUT ITERATION MARK -17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -# Total code points: 6914 +# Total code points: 6942 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 2987129d4..97680c6d3 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,3 @@ -18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; -18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -26010,6 +26008,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; +187F7;;Lo;0;L;;;;;N;;;;; +187F8;;Lo;0;L;;;;;N;;;;; 187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; @@ -27251,6 +27251,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; 18D08;;Lo;0;L;;;;;N;;;;; +18D09;;Lo;0;L;;;;;N;;;;; +18D1C;;Lo;0;L;;;;;N;;;;; +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; 1AFF1;KATAKANA LETTER MINNAN TONE-3;Lm;0;L;;;;;N;;;;; 1AFF2;KATAKANA LETTER MINNAN TONE-4;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 15556dc1e..8e4f9c6b5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-17, 12:29:38 GMT +# Date: 2023-10-19, 22:42:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2143,13 +2143,13 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FFF ; U # Cn [14] .. -17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -187F8..187FF ; U # Cn [8] .. +17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFF ; U # Cn [42] .. -18D00..18D08 ; U # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -18D09..18D7F ; U # Cn [119] .. +18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D1D..18D7F ; U # Cn [99] .. +18D80..18D81 ; R # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 54c8be3db..21c9c3349 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-17, 12:29:35 GMT +# Date: 2023-10-19, 22:42:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2468,9 +2468,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; OLetter # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; OLetter # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2541,7 +2541,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132748 +# Total code points: 132778 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index f7b1418ea..b90a78ec6 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-17, 12:29:39 GMT +# Date: 2023-10-19, 22:42:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1226,6 +1226,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK +18D80..18D81 ; ALetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1316,7 +1317,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29582 +# Total code points: 29584 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index f075a6038..3bce5b4ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-17, 12:28:55 GMT +# Date: 2023-10-19, 22:41:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1076,9 +1076,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; L # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1187,7 +1187,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820342 code points not listed here. +# The above property value applies to 820312 code points not listed here. # Total code points: 1096267 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 639e4123c..0a83310f0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-17, 12:28:58 GMT +# Date: 2023-10-19, 22:41:17 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1798,9 +1798,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE2 ; 0 # Po OLD CHINESE HOOK MARK 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; 0 # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; 0 # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2012,7 +2012,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826641 code points not listed here. +# The above property value applies to 826611 code points not listed here. # Total code points: 1113189 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 99c7aabe0..06469dd73 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-17, 12:29:01 GMT +# Date: 2023-10-19, 22:41:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1828,6 +1828,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2049,8 +2050,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 766159 code points not listed here. -# Total code points: 792618 +# The above property value applies to 766129 code points not listed here. +# Total code points: 792590 # ================================================ @@ -2451,9 +2452,8 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; W # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2522,7 +2522,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182417 +# Total code points: 182445 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 67eed4460..14cc76585 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-17, 12:29:02 GMT +# Date: 2023-10-19, 22:41:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -563,9 +563,9 @@ FFFE..FFFF ; Cn # [2] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. -187F8..187FF ; Cn # [8] .. 18CD6..18CFF ; Cn # [42] .. -18D09..1AFEF ; Cn # [8935] .. +18D1D..18D7F ; Cn # [99] .. +18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # @@ -728,7 +728,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824593 +# Total code points: 824563 # ================================================ @@ -2597,9 +2597,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION -17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Lo # [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2664,7 +2664,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132324 +# Total code points: 132354 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index ad6415932..856403ed7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-17, 12:29:04 GMT +# Date: 2023-10-19, 22:41:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762600 code points not listed here. -# Total code points: 900068 +# The above property value applies to 762570 code points not listed here. +# Total code points: 900038 # ================================================ @@ -1735,9 +1735,9 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 11052..11065 ; ID # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11950..11959 ; ID # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1830,7 +1830,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61978 code points not listed here. -# Total code points: 172568 +# Total code points: 172598 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 2f3f00574..ded94745f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-17, 12:29:04 GMT +# Date: 2023-10-19, 22:41:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -36672,7 +36672,7 @@ FFFD ; REPLACEMENT CHARACTER 16FE4 ; KHITAN SMALL SCRIPT FILLER 16FF0 ; VIETNAMESE ALTERNATE READING MARK CA 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; TANGUT IDEOGRAPH-* +17000..187FF ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 18802 ; TANGUT COMPONENT-003 @@ -37442,7 +37442,9 @@ FFFD ; REPLACEMENT CHARACTER 18AFE ; TANGUT COMPONENT-767 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D08 ; TANGUT IDEOGRAPH-* +18D00..18D1C ; TANGUT IDEOGRAPH-* +18D80 ; TANGUT COMPONENT-769 +18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 1AFF2 ; KATAKANA LETTER MINNAN TONE-4 @@ -44301,6 +44303,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 149938 +# Total code points: 149968 # EOF From edaf9ee28b62ca2ca9d532f4082a1f30e037e672 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:42:40 +0200 Subject: [PATCH 05/17] GenerateEnums --- .../src/main/java/org/unicode/props/UcdPropertyValues.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 3aec7309b..dd962ecc8 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -481,6 +481,7 @@ public enum Block_Values implements Named { Tangsa("Tangsa"), Tangut("Tangut"), Tangut_Components("Tangut_Components"), + Tangut_Components_Supplement("Tangut_Components_Sup"), Tangut_Supplement("Tangut_Sup"), Telugu("Telugu"), Thaana("Thaana"), From f5bbefbdd3d9447550d5c5f9ca1135cf88a2f9fb Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:14:24 +0200 Subject: [PATCH 06/17] EAW=W --- unicodetools/data/ucd/dev/EastAsianWidth.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 02d75b346..7e3cad67d 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -2315,7 +2315,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 From 3e0688e52d3b1f05fa6ff36334f4bf583f54f00b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:16:10 +0200 Subject: [PATCH 07/17] Regenerate UCD --- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 06469dd73..232ee71ce 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-19, 22:41:21 GMT +# Date: 2023-10-19, 23:15:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1828,7 +1828,6 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2051,7 +2050,7 @@ E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG # The above property value applies to 766129 code points not listed here. -# Total code points: 792590 +# Total code points: 792588 # ================================================ @@ -2454,6 +2453,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2522,7 +2522,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182445 +# Total code points: 182447 # ================================================ From 90ade82bd7e84eddded302434087510507e93de2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:21:45 +0200 Subject: [PATCH 08/17] SUP --- unicodetools/src/main/java/org/unicode/text/UCD/UCD.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 97e18d15d..c540e00ff 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1328,7 +1328,7 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. - return TANGUT_BASE; + return TANGUT_SUP_BASE; } } From 950ca7b47a377fc38dc6da417a96e4dc94922378 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:32:10 +0200 Subject: [PATCH 09/17] script --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 23d0eb54d..6df6d823a 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +18D80..18D81;Tangut # Scripts-16.0.0.txt # Date: 2023-10-19, 22:42:02 GMT # © 2023 Unicode®, Inc. From f5eebb0ae635f481c9011a4c637ba72ff5bc70a8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:33:54 +0200 Subject: [PATCH 10/17] Regenerate UCD --- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 6df6d823a..990fb154c 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -18D80..18D81;Tangut # Scripts-16.0.0.txt -# Date: 2023-10-19, 22:42:02 GMT +# Date: 2023-10-19, 23:33:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2752,8 +2751,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6942 +# Total code points: 6944 # ================================================ From c4d0be95bb44b1e7070b192b27aead3c457762ee Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:09:16 +0200 Subject: [PATCH 11/17] Do not break UnicodeData ranges --- .../src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index 759361106..f8db9c647 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -662,6 +662,7 @@ private static void generateUnicodeData(String filename) throws IOException { final BagFormatter bf = new BagFormatter(); bf.setHexValue(false) .setMergeRanges(true) + .setRangeBreakSource(null) .setNoSpacesBeforeSemicolon() .setMinSpacesAfterSemicolon(0) .setUnicodeDataStyleRanges(true) From f175b45c58bd7f611a71c750eca516f2aaf256d2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:11:12 +0200 Subject: [PATCH 12/17] Regenerate UCD --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 97680c6d3..b412e551c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -26008,8 +26008,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; -187F8;;Lo;0;L;;;;;N;;;;; 187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; @@ -27250,8 +27248,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD4;KHITAN SMALL SCRIPT CHARACTER-18CD4;Lo;0;L;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D08;;Lo;0;L;;;;;N;;;;; -18D09;;Lo;0;L;;;;;N;;;;; 18D1C;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; From a9870b1bd3b74eeeb016d0e56a5fede2b441f4be Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:19:16 +0200 Subject: [PATCH 13/17] sources --- unicodetools/data/ucd/dev/TangutSources.txt | 58 +++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index b49891c95..bef390d10 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -50,6 +50,8 @@ # [Grammar of the Tangut Language]. Moscow, 1968. # UTN42 = Andrew West and Viacheslav Zaytsev, Tangut Character Additions and Glyph Corrections, # Unicode Technical Note #42. 2019-12-21. +# N5217 = Andrew West, Proposal to encode 2 Tangut components and 28 Tangut ideographs, +# WG2 N5217 = L2/23-149. 2023-10-02. # # For more information, see Section 18.10, Tangut, of the # core specification. @@ -12326,6 +12328,22 @@ U+187F6 kTGT_MergedSrc UTN42-010 U+187F6 kRSTUnicode 79.14 U+187F7 kTGT_MergedSrc UTN42-011 U+187F7 kRSTUnicode 79.19 +U+187F8 kTGT_MergedSrc N5217-01 +U+187F8 kRSTUnicode 206.19 +U+187F9 kTGT_MergedSrc N5217-02 +U+187F9 kRSTUnicode 267.9 +U+187FA kTGT_MergedSrc N5217-03 +U+187FA kRSTUnicode 75.19 +U+187FB kTGT_MergedSrc N5217-04 +U+187FB kRSTUnicode 490.12 +U+187FC kTGT_MergedSrc N5217-05 +U+187FC kRSTUnicode 114.9 +U+187FD kTGT_MergedSrc N5217-06 +U+187FD kRSTUnicode 217.15 +U+187FE kTGT_MergedSrc N5217-07 +U+187FE kRSTUnicode 519.13 +U+187FF kTGT_MergedSrc N5217-08 +U+187FF kRSTUnicode 736.18 U+18D00 kTGT_MergedSrc L2008-3489 U+18D00 kRSTUnicode 17.7 U+18D01 kTGT_MergedSrc L2008-1667 @@ -12344,5 +12362,45 @@ U+18D07 kTGT_MergedSrc L2008-1106 U+18D07 kRSTUnicode 485.12 U+18D08 kTGT_MergedSrc L2008-4456 U+18D08 kRSTUnicode 674.14 +U+18D09 kTGT_MergedSrc N5217-09 +U+18D09 kRSTUnicode 590.14 +U+18D0A kTGT_MergedSrc N5217-10 +U+18D0A kRSTUnicode 267.18 +U+18D0B kTGT_MergedSrc N5217-11 +U+18D0B kRSTUnicode 267.10 +U+18D0C kTGT_MergedSrc N5217-12 +U+18D0C kRSTUnicode 655.13 +U+18D0D kTGT_MergedSrc N5217-13 +U+18D0D kRSTUnicode 456.14 +U+18D0E kTGT_MergedSrc N5217-14 +U+18D0E kRSTUnicode 273.9 +U+18D0F kTGT_MergedSrc N5217-15 +U+18D0F kRSTUnicode 278.11 +U+18D10 kTGT_MergedSrc N5217-16 +U+18D10 kRSTUnicode 106.13 +U+18D11 kTGT_MergedSrc N5217-17 +U+18D11 kRSTUnicode 75.8 +U+18D12 kTGT_MergedSrc N5217-18 +U+18D12 kRSTUnicode 17.7 +U+18D13 kTGT_MergedSrc N5217-19 +U+18D13 kRSTUnicode 106.15 +U+18D14 kTGT_MergedSrc N5217-20 +U+18D14 kRSTUnicode 167.15 +U+18D15 kTGT_MergedSrc N5217-21 +U+18D15 kRSTUnicode 462.11 +U+18D16 kTGT_MergedSrc N5217-22 +U+18D16 kRSTUnicode 579.17 +U+18D17 kTGT_MergedSrc N5217-23 +U+18D17 kRSTUnicode 210.13 +U+18D18 kTGT_MergedSrc N5217-24 +U+18D18 kRSTUnicode 278.13 +U+18D19 kTGT_MergedSrc N5217-25 +U+18D19 kRSTUnicode 141.19 +U+18D1A kTGT_MergedSrc N5217-26 +U+18D1A kRSTUnicode 75.13 +U+18D1B kTGT_MergedSrc N5217-27 +U+18D1B kRSTUnicode 36.7 +U+18D1C kTGT_MergedSrc N5217-28 +U+18D1C kRSTUnicode 141.9 # EOF From bc7a556379ffc866c39ba84781832b5aebc9696f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 29 Dec 2023 11:44:30 +0100 Subject: [PATCH 14/17] Tangut Components Supplement to vo=U --- unicodetools/data/ucd/dev/VerticalOrientation.txt | 2 +- .../main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 8e4f9c6b5..e775ed632 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -2149,7 +2149,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18CD6..18CFF ; U # Cn [42] .. 18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 18D1D..18D7F ; U # Cn [99] .. -18D80..18D81 ; R # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DFF ; U # Meow 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index db8ebd7b8..5e99eac4f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -880,6 +880,7 @@ Format: kenFile skipValue=Rotated # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF From e59bd667167f674ebf4e85ec040fa2e306990f8d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 29 Dec 2023 11:47:29 +0100 Subject: [PATCH 15/17] Regenerate UCD --- unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index e775ed632..af96c481e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-19, 22:42:07 GMT +# Date: 2023-12-29, 10:46:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -46,6 +46,7 @@ # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF @@ -2149,7 +2150,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18CD6..18CFF ; U # Cn [42] .. 18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 18D1D..18D7F ; U # Cn [99] .. -18D80..18DFF ; U # Meow +18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 From 187deed2258cdd7e6a92ac4ae0c29dbbf61626e3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 8 Jun 2024 03:19:31 +0200 Subject: [PATCH 16/17] Ideographic --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 09099edc0..8bd65d858 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +18D80..18D81;Ideographic # PropList-16.0.0.txt # Date: 2024-06-07, 20:56:15 GMT # © 2024 Unicode®, Inc. From b6f43f5bc50c39f2ff0e94469f58e49c85fc0c28 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 8 Jun 2024 03:42:41 +0200 Subject: [PATCH 17/17] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 8bd65d858..14fe87284 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -18D80..18D81;Ideographic # PropList-16.0.0.txt -# Date: 2024-06-07, 20:56:15 GMT +# Date: 2024-06-08, 01:21:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -874,6 +873,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106505 +# Total code points: 106507 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index dc4ee1d23..adf423bcd 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-06-07, 20:56:33 GMT +# Date: 2024-06-08, 01:21:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1263,7 +1263,6 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK -18D80..18D81 ; ALetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1356,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33793 +# Total code points: 33791 # ================================================