From da62528ee762a5ec45304204c0566e64cf491991 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:31:06 +0200 Subject: [PATCH 01/23] UnicodeData.txt and hardcoded ranges from L2/23-246 --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 +++- .../src/main/java/org/unicode/text/UCD/UCD.java | 10 ++++++++++ .../src/main/java/org/unicode/text/UCD/UCD_Types.java | 6 ++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 7faa6be2c..2987129d4 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,5 @@ +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -26008,7 +26010,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; +187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; 18802;TANGUT COMPONENT-003;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 1d572baa5..97e18d15d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1308,6 +1308,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + return TANGUT_BASE; + } } if (rCompositeVersion >= 0xd0000) { @@ -1320,6 +1325,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. + if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { + // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + return TANGUT_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 972753c37..25b38407b 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -27,10 +27,16 @@ public interface UCD_Types { // Unicode 12: // 17000;;Lo;0;L;;;;;N;;;;; // 187F7;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 17000;;Lo;0;L;;;;;N;;;;; + // 187FF;;Lo;0;L;;;;;N;;;;; public static final int TANGUT_SUP_BASE = 0x18D00; // Unicode 13: // 18D00;;Lo;0;L;;;;;N;;;;; // 18D08;;Lo;0;L;;;;;N;;;;; + // Unicode [..]: + // 18D00;;Lo;0;L;;;;;N;;;;; + // 18D1C;;Lo;0;L;;;;;N;;;;; public static final int // 4E00;;Lo;0;L;;;;;N;;;;; From db777acf3a6bdffde6402f67aa7984d9cf4e3d3e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:35:02 +0200 Subject: [PATCH 02/23] LineBreak.txt --- unicodetools/data/ucd/dev/LineBreak.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index d93ba5183..aa3f1f3cf 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-16, 14:22:28 GMT +# Date: 2023-10-19, 22:32:42 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3216,10 +3216,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 From bf8b3bbcf1b6e2d994c08ba34a06c61dcd7da06e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:40:08 +0200 Subject: [PATCH 03/23] new block --- unicodetools/data/ucd/dev/Blocks.txt | 1 + .../src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 431120ef2..8ad0904b7 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -304,6 +304,7 @@ FFF0..FFFF; Specials 18800..18AFF; Tangut Components 18B00..18CFF; Khitan Small Script 18D00..18D7F; Tangut Supplement +18D80..18DFF; Tangut Components Supplement 1AFF0..1AFFF; Kana Extended-B 1B000..1B0FF; Kana Supplement 1B100..1B12F; Kana Extended-A diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 8a43fe8b7..24b504f3f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -302,6 +302,7 @@ Tamil ; Tamil Tamil_Sup ; Tamil_Supplement Tangut ; Tangut Tangut_Components ; Tangut_Components +Tangut_Components_Sup ; Tangut_Components_Supplement Tangut_Sup ; Tangut_Supplement Telugu ; Telugu Thaana ; Thaana From 7505d1bfffed85eb7eb433e84b45dd712c78e9f1 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:42:09 +0200 Subject: [PATCH 04/23] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 50 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 +-- unicodetools/data/ucd/dev/PropList.txt | 9 ++-- .../data/ucd/dev/PropertyValueAliases.txt | 3 +- unicodetools/data/ucd/dev/Scripts.txt | 9 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 8 ++- .../data/ucd/dev/VerticalOrientation.txt | 10 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 10 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++-- .../dev/extracted/DerivedCombiningClass.txt | 10 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 14 +++--- .../dev/extracted/DerivedGeneralCategory.txt | 16 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 +++--- .../data/ucd/dev/extracted/DerivedName.txt | 10 ++-- 16 files changed, 101 insertions(+), 91 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 386d713b9..21ce575f1 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-17, 12:28:26 GMT +# Date: 2023-10-19, 22:40:51 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2019,7 +2019,10 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF +18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 16.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 125 +# Total code points: 155 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c00c3c976..ed92d48fe 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-17, 12:28:59 GMT +# Date: 2023-10-19, 22:41:18 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1288,9 +1288,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Alphabetic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Alphabetic # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1407,7 +1407,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138481 +# Total code points: 138511 # ================================================ @@ -6764,9 +6764,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; ID_Start # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -6872,7 +6872,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137059 +# Total code points: 137089 # ================================================ @@ -8093,9 +8093,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; ID_Continue # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8236,7 +8236,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140232 +# Total code points: 140262 # ================================================ @@ -8886,9 +8886,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; XID_Start # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8994,7 +8994,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137036 +# Total code points: 137066 # ================================================ @@ -10216,9 +10216,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; XID_Continue # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10359,7 +10359,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140213 +# Total code points: 140243 # ================================================ @@ -12355,9 +12355,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Grapheme_Base # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Grapheme_Base # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Grapheme_Base # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12555,7 +12555,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147736 +# Total code points: 147766 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index b28cc2558..02d75b346 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-10-17, 12:29:07 GMT +# Date: 2023-10-19, 22:41:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2311,10 +2311,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 8c1f3934d..de0a1705d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-10-13, 11:33:44 GMT +# Date: 2023-10-19, 22:41:38 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -849,9 +849,8 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Ideographic # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -863,7 +862,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106476 +# Total code points: 106504 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 17ffec935..898cd4513 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2023-10-17, 12:29:15 GMT +# Date: 2023-10-19, 22:41:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -456,6 +456,7 @@ blk; Tamil_Sup ; Tamil_Supplement blk; Tangsa ; Tangsa blk; Tangut ; Tangut blk; Tangut_Components ; Tangut_Components +blk; Tangut_Components_Sup ; Tangut_Components_Supplement blk; Tangut_Sup ; Tangut_Supplement blk; Telugu ; Telugu blk; Thaana ; Thaana diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index a4ef9c2bd..23d0eb54d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-10-17, 12:29:34 GMT +# Date: 2023-10-19, 22:42:02 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2749,11 +2749,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 16FE0 ; Tangut # Lm TANGUT ITERATION MARK -17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -# Total code points: 6914 +# Total code points: 6942 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 2987129d4..97680c6d3 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,3 @@ -18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; -18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -26010,6 +26008,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; +187F7;;Lo;0;L;;;;;N;;;;; +187F8;;Lo;0;L;;;;;N;;;;; 187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; @@ -27251,6 +27251,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; 18D08;;Lo;0;L;;;;;N;;;;; +18D09;;Lo;0;L;;;;;N;;;;; +18D1C;;Lo;0;L;;;;;N;;;;; +18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; +18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; 1AFF1;KATAKANA LETTER MINNAN TONE-3;Lm;0;L;;;;;N;;;;; 1AFF2;KATAKANA LETTER MINNAN TONE-4;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 15556dc1e..8e4f9c6b5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-17, 12:29:38 GMT +# Date: 2023-10-19, 22:42:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2143,13 +2143,13 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FFF ; U # Cn [14] .. -17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -187F8..187FF ; U # Cn [8] .. +17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFF ; U # Cn [42] .. -18D00..18D08 ; U # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -18D09..18D7F ; U # Cn [119] .. +18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D1D..18D7F ; U # Cn [99] .. +18D80..18D81 ; R # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 54c8be3db..21c9c3349 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-17, 12:29:35 GMT +# Date: 2023-10-19, 22:42:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2468,9 +2468,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK -17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; OLetter # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; OLetter # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2541,7 +2541,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132748 +# Total code points: 132778 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index f7b1418ea..b90a78ec6 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-17, 12:29:39 GMT +# Date: 2023-10-19, 22:42:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1226,6 +1226,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK +18D80..18D81 ; ALetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1316,7 +1317,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29582 +# Total code points: 29584 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index f075a6038..3bce5b4ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-17, 12:28:55 GMT +# Date: 2023-10-19, 22:41:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1076,9 +1076,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; L # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1187,7 +1187,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820342 code points not listed here. +# The above property value applies to 820312 code points not listed here. # Total code points: 1096267 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 639e4123c..0a83310f0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-17, 12:28:58 GMT +# Date: 2023-10-19, 22:41:17 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1798,9 +1798,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE2 ; 0 # Po OLD CHINESE HOOK MARK 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER -17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; 0 # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; 0 # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2012,7 +2012,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826641 code points not listed here. +# The above property value applies to 826611 code points not listed here. # Total code points: 1113189 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 99c7aabe0..06469dd73 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-17, 12:29:01 GMT +# Date: 2023-10-19, 22:41:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1828,6 +1828,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2049,8 +2050,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 766159 code points not listed here. -# Total code points: 792618 +# The above property value applies to 766129 code points not listed here. +# Total code points: 792590 # ================================================ @@ -2451,9 +2452,8 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; W # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2522,7 +2522,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182417 +# Total code points: 182445 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 67eed4460..14cc76585 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-17, 12:29:02 GMT +# Date: 2023-10-19, 22:41:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -563,9 +563,9 @@ FFFE..FFFF ; Cn # [2] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. -187F8..187FF ; Cn # [8] .. 18CD6..18CFF ; Cn # [42] .. -18D09..1AFEF ; Cn # [8935] .. +18D1D..18D7F ; Cn # [99] .. +18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # 1AFFF ; Cn # @@ -728,7 +728,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824593 +# Total code points: 824563 # ================================================ @@ -2597,9 +2597,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION -17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D1C ; Lo # [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2664,7 +2664,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132324 +# Total code points: 132354 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index ad6415932..856403ed7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-17, 12:29:04 GMT +# Date: 2023-10-19, 22:41:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762600 code points not listed here. -# Total code points: 900068 +# The above property value applies to 762570 code points not listed here. +# Total code points: 900038 # ================================================ @@ -1735,9 +1735,9 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 11052..11065 ; ID # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11950..11959 ; ID # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL -17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 +18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1F000..1F02B ; ID # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1830,7 +1830,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61978 code points not listed here. -# Total code points: 172568 +# Total code points: 172598 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 2f3f00574..ded94745f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-17, 12:29:04 GMT +# Date: 2023-10-19, 22:41:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -36672,7 +36672,7 @@ FFFD ; REPLACEMENT CHARACTER 16FE4 ; KHITAN SMALL SCRIPT FILLER 16FF0 ; VIETNAMESE ALTERNATE READING MARK CA 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7 ; TANGUT IDEOGRAPH-* +17000..187FF ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 18802 ; TANGUT COMPONENT-003 @@ -37442,7 +37442,9 @@ FFFD ; REPLACEMENT CHARACTER 18AFE ; TANGUT COMPONENT-767 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D08 ; TANGUT IDEOGRAPH-* +18D00..18D1C ; TANGUT IDEOGRAPH-* +18D80 ; TANGUT COMPONENT-769 +18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 1AFF2 ; KATAKANA LETTER MINNAN TONE-4 @@ -44301,6 +44303,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 149938 +# Total code points: 149968 # EOF From edaf9ee28b62ca2ca9d532f4082a1f30e037e672 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 00:42:40 +0200 Subject: [PATCH 05/23] GenerateEnums --- .../src/main/java/org/unicode/props/UcdPropertyValues.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 3aec7309b..dd962ecc8 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -481,6 +481,7 @@ public enum Block_Values implements Named { Tangsa("Tangsa"), Tangut("Tangut"), Tangut_Components("Tangut_Components"), + Tangut_Components_Supplement("Tangut_Components_Sup"), Tangut_Supplement("Tangut_Sup"), Telugu("Telugu"), Thaana("Thaana"), From f5bbefbdd3d9447550d5c5f9ca1135cf88a2f9fb Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:14:24 +0200 Subject: [PATCH 06/23] EAW=W --- unicodetools/data/ucd/dev/EastAsianWidth.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 02d75b346..7e3cad67d 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -2315,7 +2315,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 From 3e0688e52d3b1f05fa6ff36334f4bf583f54f00b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:16:10 +0200 Subject: [PATCH 07/23] Regenerate UCD --- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 06469dd73..232ee71ce 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-19, 22:41:21 GMT +# Date: 2023-10-19, 23:15:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1828,7 +1828,6 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -18D80..18D81 ; N # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2051,7 +2050,7 @@ E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG # The above property value applies to 766129 code points not listed here. -# Total code points: 792590 +# Total code points: 792588 # ================================================ @@ -2454,6 +2453,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2522,7 +2522,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182445 +# Total code points: 182447 # ================================================ From 90ade82bd7e84eddded302434087510507e93de2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:21:45 +0200 Subject: [PATCH 08/23] SUP --- unicodetools/src/main/java/org/unicode/text/UCD/UCD.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 97e18d15d..c540e00ff 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1328,7 +1328,7 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. - return TANGUT_BASE; + return TANGUT_SUP_BASE; } } From 950ca7b47a377fc38dc6da417a96e4dc94922378 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:32:10 +0200 Subject: [PATCH 09/23] script --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 23d0eb54d..6df6d823a 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +18D80..18D81;Tangut # Scripts-16.0.0.txt # Date: 2023-10-19, 22:42:02 GMT # © 2023 Unicode®, Inc. From f5eebb0ae635f481c9011a4c637ba72ff5bc70a8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 01:33:54 +0200 Subject: [PATCH 10/23] Regenerate UCD --- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 6df6d823a..990fb154c 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -18D80..18D81;Tangut # Scripts-16.0.0.txt -# Date: 2023-10-19, 22:42:02 GMT +# Date: 2023-10-19, 23:33:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2752,8 +2751,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6942 +# Total code points: 6944 # ================================================ From c4d0be95bb44b1e7070b192b27aead3c457762ee Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:09:16 +0200 Subject: [PATCH 11/23] Do not break UnicodeData ranges --- .../src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index 759361106..f8db9c647 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -662,6 +662,7 @@ private static void generateUnicodeData(String filename) throws IOException { final BagFormatter bf = new BagFormatter(); bf.setHexValue(false) .setMergeRanges(true) + .setRangeBreakSource(null) .setNoSpacesBeforeSemicolon() .setMinSpacesAfterSemicolon(0) .setUnicodeDataStyleRanges(true) From f175b45c58bd7f611a71c750eca516f2aaf256d2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:11:12 +0200 Subject: [PATCH 12/23] Regenerate UCD --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 97680c6d3..b412e551c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -26008,8 +26008,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; -187F7;;Lo;0;L;;;;;N;;;;; -187F8;;Lo;0;L;;;;;N;;;;; 187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; 18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;; @@ -27250,8 +27248,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD4;KHITAN SMALL SCRIPT CHARACTER-18CD4;Lo;0;L;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D08;;Lo;0;L;;;;;N;;;;; -18D09;;Lo;0;L;;;;;N;;;;; 18D1C;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; From a9870b1bd3b74eeeb016d0e56a5fede2b441f4be Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 20 Oct 2023 02:19:16 +0200 Subject: [PATCH 13/23] sources --- unicodetools/data/ucd/dev/TangutSources.txt | 58 +++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index b49891c95..bef390d10 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -50,6 +50,8 @@ # [Grammar of the Tangut Language]. Moscow, 1968. # UTN42 = Andrew West and Viacheslav Zaytsev, Tangut Character Additions and Glyph Corrections, # Unicode Technical Note #42. 2019-12-21. +# N5217 = Andrew West, Proposal to encode 2 Tangut components and 28 Tangut ideographs, +# WG2 N5217 = L2/23-149. 2023-10-02. # # For more information, see Section 18.10, Tangut, of the # core specification. @@ -12326,6 +12328,22 @@ U+187F6 kTGT_MergedSrc UTN42-010 U+187F6 kRSTUnicode 79.14 U+187F7 kTGT_MergedSrc UTN42-011 U+187F7 kRSTUnicode 79.19 +U+187F8 kTGT_MergedSrc N5217-01 +U+187F8 kRSTUnicode 206.19 +U+187F9 kTGT_MergedSrc N5217-02 +U+187F9 kRSTUnicode 267.9 +U+187FA kTGT_MergedSrc N5217-03 +U+187FA kRSTUnicode 75.19 +U+187FB kTGT_MergedSrc N5217-04 +U+187FB kRSTUnicode 490.12 +U+187FC kTGT_MergedSrc N5217-05 +U+187FC kRSTUnicode 114.9 +U+187FD kTGT_MergedSrc N5217-06 +U+187FD kRSTUnicode 217.15 +U+187FE kTGT_MergedSrc N5217-07 +U+187FE kRSTUnicode 519.13 +U+187FF kTGT_MergedSrc N5217-08 +U+187FF kRSTUnicode 736.18 U+18D00 kTGT_MergedSrc L2008-3489 U+18D00 kRSTUnicode 17.7 U+18D01 kTGT_MergedSrc L2008-1667 @@ -12344,5 +12362,45 @@ U+18D07 kTGT_MergedSrc L2008-1106 U+18D07 kRSTUnicode 485.12 U+18D08 kTGT_MergedSrc L2008-4456 U+18D08 kRSTUnicode 674.14 +U+18D09 kTGT_MergedSrc N5217-09 +U+18D09 kRSTUnicode 590.14 +U+18D0A kTGT_MergedSrc N5217-10 +U+18D0A kRSTUnicode 267.18 +U+18D0B kTGT_MergedSrc N5217-11 +U+18D0B kRSTUnicode 267.10 +U+18D0C kTGT_MergedSrc N5217-12 +U+18D0C kRSTUnicode 655.13 +U+18D0D kTGT_MergedSrc N5217-13 +U+18D0D kRSTUnicode 456.14 +U+18D0E kTGT_MergedSrc N5217-14 +U+18D0E kRSTUnicode 273.9 +U+18D0F kTGT_MergedSrc N5217-15 +U+18D0F kRSTUnicode 278.11 +U+18D10 kTGT_MergedSrc N5217-16 +U+18D10 kRSTUnicode 106.13 +U+18D11 kTGT_MergedSrc N5217-17 +U+18D11 kRSTUnicode 75.8 +U+18D12 kTGT_MergedSrc N5217-18 +U+18D12 kRSTUnicode 17.7 +U+18D13 kTGT_MergedSrc N5217-19 +U+18D13 kRSTUnicode 106.15 +U+18D14 kTGT_MergedSrc N5217-20 +U+18D14 kRSTUnicode 167.15 +U+18D15 kTGT_MergedSrc N5217-21 +U+18D15 kRSTUnicode 462.11 +U+18D16 kTGT_MergedSrc N5217-22 +U+18D16 kRSTUnicode 579.17 +U+18D17 kTGT_MergedSrc N5217-23 +U+18D17 kRSTUnicode 210.13 +U+18D18 kTGT_MergedSrc N5217-24 +U+18D18 kRSTUnicode 278.13 +U+18D19 kTGT_MergedSrc N5217-25 +U+18D19 kRSTUnicode 141.19 +U+18D1A kTGT_MergedSrc N5217-26 +U+18D1A kRSTUnicode 75.13 +U+18D1B kTGT_MergedSrc N5217-27 +U+18D1B kRSTUnicode 36.7 +U+18D1C kTGT_MergedSrc N5217-28 +U+18D1C kRSTUnicode 141.9 # EOF From bc7a556379ffc866c39ba84781832b5aebc9696f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 29 Dec 2023 11:44:30 +0100 Subject: [PATCH 14/23] Tangut Components Supplement to vo=U --- unicodetools/data/ucd/dev/VerticalOrientation.txt | 2 +- .../main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 8e4f9c6b5..e775ed632 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -2149,7 +2149,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18CD6..18CFF ; U # Cn [42] .. 18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 18D1D..18D7F ; U # Cn [99] .. -18D80..18D81 ; R # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D80..18DFF ; U # Meow 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index db8ebd7b8..5e99eac4f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -880,6 +880,7 @@ Format: kenFile skipValue=Rotated # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF From e59bd667167f674ebf4e85ec040fa2e306990f8d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 29 Dec 2023 11:47:29 +0100 Subject: [PATCH 15/23] Regenerate UCD --- unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index e775ed632..af96c481e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-19, 22:42:07 GMT +# Date: 2023-12-29, 10:46:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -46,6 +46,7 @@ # Anatolian Hieroglyphs: U+14400..U+1467F # Ideographic Symbols & Tangut: U+16FE0..U+18AFF # Khitan Small Script & Tangut Sup: U+18B00..U+18D7F +# Tangut Components Supplement: U+18D80..U+18DFF # Kana Extended-B: U+1AFF0..U+1AFFF # Kana Extended-A & Small Kana Ext: U+1B100..U+1B16F # Nushu: U+1B170..U+1B2FF @@ -2149,7 +2150,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18CD6..18CFF ; U # Cn [42] .. 18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C 18D1D..18D7F ; U # Cn [99] .. -18D80..18DFF ; U # Meow +18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 +18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF4 ; U # Cn 1AFF5..1AFFB ; U # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 From 187deed2258cdd7e6a92ac4ae0c29dbbf61626e3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 8 Jun 2024 03:19:31 +0200 Subject: [PATCH 16/23] Ideographic --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 09099edc0..8bd65d858 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +18D80..18D81;Ideographic # PropList-16.0.0.txt # Date: 2024-06-07, 20:56:15 GMT # © 2024 Unicode®, Inc. From b6f43f5bc50c39f2ff0e94469f58e49c85fc0c28 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 8 Jun 2024 03:42:41 +0200 Subject: [PATCH 17/23] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 8bd65d858..14fe87284 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -18D80..18D81;Ideographic # PropList-16.0.0.txt -# Date: 2024-06-07, 20:56:15 GMT +# Date: 2024-06-08, 01:21:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -874,6 +873,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106505 +# Total code points: 106507 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index dc4ee1d23..adf423bcd 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-06-07, 20:56:33 GMT +# Date: 2024-06-08, 01:21:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1263,7 +1263,6 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK -18D80..18D81 ; ALetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1356,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33793 +# Total code points: 33791 # ================================================ From 1e0e8801194b635610a15b3b28e0f9aa1b3e32cd Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 18 Oct 2024 19:07:28 +0200 Subject: [PATCH 18/23] Bump the upper bound of the Tangut Supplement --- unicodetools/src/main/java/org/unicode/text/UCD/UCD.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index a3ae7da1d..c1dc65f44 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1331,6 +1331,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. return TANGUT_SUP_BASE; } + // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x110000. + if (ch <= 0x18D1E && rCompositeVersion >= 0x110000) { + // Unicode [..] added TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E. + return TANGUT_SUP_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B From b2ccb54278e6f9c3b8743c7416de1c2da7cbb238 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 18 Oct 2024 19:35:32 +0200 Subject: [PATCH 19/23] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/PropList.txt | 6 ++--- unicodetools/data/ucd/dev/Scripts.txt | 8 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- .../data/ucd/dev/VerticalOrientation.txt | 8 +++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++--- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 10 +++---- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++--- 15 files changed, 60 insertions(+), 59 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d3da6bd7c..9642eb0a1 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-10-16, 17:24:13 GMT +# Date: 2024-10-18, 17:33:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2067,8 +2067,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) +18D1D..18D1E ; 17.0 # [2] TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 2 +# Total code points: 4 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 41a8d0e23..be63b6676 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-16, 17:24:45 GMT +# Date: 2024-10-18, 17:34:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1321,7 +1321,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Alphabetic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142789 +# Total code points: 142791 # ================================================ @@ -6853,7 +6853,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; ID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141299 +# Total code points: 141301 # ================================================ @@ -8223,7 +8223,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; ID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144571 +# Total code points: 144573 # ================================================ @@ -9039,7 +9039,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; XID_Start # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141276 +# Total code points: 141278 # ================================================ @@ -10410,7 +10410,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; XID_Continue # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144552 +# Total code points: 144554 # ================================================ @@ -12608,7 +12608,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Grapheme_Base # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Grapheme_Base # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Grapheme_Base # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -12812,7 +12812,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152760 +# Total code points: 152762 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 1f0808f8e..882507d88 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-06-07, 20:56:06 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-10-18, 17:34:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2364,7 +2364,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; W # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; W # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 63652ca42..9f5089aa6 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-16, 17:24:54 GMT +# Date: 2024-10-18, 17:34:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3274,7 +3274,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index b3a671f69..f19bcc137 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-10-16, 17:25:10 GMT +# Date: 2024-10-18, 17:34:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -872,7 +872,7 @@ F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COM FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Ideographic # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106507 +# Total code points: 106509 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index d903a4b50..44508ae85 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ -# Scripts-16.0.0.txt -# Date: 2024-06-07, 20:56:31 GMT +# Scripts-17.0.0.txt +# Date: 2024-10-18, 17:35:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2757,10 +2757,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1C ; Tangut # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; Tangut # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Tangut # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 6944 +# Total code points: 6946 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 76df07294..02663f9a2 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -31522,7 +31522,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D1C;;Lo;0;L;;;;;N;;;;; +18D1E;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 0375c7671..365aba541 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-06-07, 20:56:33 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-10-18, 17:35:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2195,8 +2195,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFE ; U # Cn [41] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1C ; U # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C -18D1D..18D7F ; U # Cn [99] .. +18D00..18D1E ; U # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D1F..18D7F ; U # Cn [97] .. 18D80..18D81 ; U # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 18D82..18DFF ; U # Cn [126] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index be969da59..b2dbf58a3 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-16, 17:25:36 GMT +# Date: 2024-10-18, 17:35:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2511,7 +2511,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; OLetter # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2585,7 +2585,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136938 +# Total code points: 136940 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 7775cb484..d4f69462e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-16, 17:24:41 GMT +# Date: 2024-10-18, 17:34:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1104,7 +1104,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; L # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1214,7 +1214,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815321 code points not listed here. +# The above property value applies to 815319 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 66b1fbccd..3884406ef 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-16, 17:24:44 GMT +# Date: 2024-10-18, 17:34:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1840,7 +1840,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; 0 # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821551 code points not listed here. +# The above property value applies to 821549 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 3475888cc..de59209f0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-16, 17:24:47 GMT +# Date: 2024-10-18, 17:34:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2103,8 +2103,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761069 code points not listed here. -# Total code points: 792390 +# The above property value applies to 761067 code points not listed here. +# Total code points: 792388 # ================================================ @@ -2509,7 +2509,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; W # Lo [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; W # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; W # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2580,7 +2580,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182645 +# Total code points: 182647 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2fbaf6570..9823282a2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-16, 17:24:48 GMT +# Date: 2024-10-18, 17:34:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -580,7 +580,7 @@ FFFE..FFFF ; Cn # [2] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. 18CD6..18CFE ; Cn # [41] .. -18D1D..18D7F ; Cn # [99] .. +18D1F..18D7F ; Cn # [97] .. 18D82..1AFEF ; Cn # [8814] .. 1AFF4 ; Cn # 1AFFC ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819503 +# Total code points: 819501 # ================================================ @@ -2640,7 +2640,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1C ; Lo # [30] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1C +18CFF..18D1E ; Lo # [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Lo # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO @@ -2708,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136507 +# Total code points: 136509 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 9d769f8fa..4c6a5df09 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-16, 17:24:50 GMT +# Date: 2024-10-18, 17:34:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757623 code points not listed here. -# Total code points: 895091 +# The above property value applies to 757621 code points not listed here. +# Total code points: 895089 # ================================================ @@ -1763,7 +1763,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1C ; ID # Lo [29] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1C +18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB @@ -1853,7 +1853,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172451 +# Total code points: 172453 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 6b2c9d464..8ca3e6846 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-10-16, 17:24:50 GMT +# Date: 2024-10-18, 17:34:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37722,7 +37722,7 @@ FFFD ; REPLACEMENT CHARACTER 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D1C ; TANGUT IDEOGRAPH-* +18D00..18D1E ; TANGUT IDEOGRAPH-* 18D80 ; TANGUT COMPONENT-769 18D81 ; TANGUT COMPONENT-770 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 @@ -45369,6 +45369,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155028 +# Total code points: 155030 # EOF From 956100bc50cbd739a86026c8e3c3866601dada32 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 18 Oct 2024 19:49:24 +0200 Subject: [PATCH 20/23] TangutSources.txt lines from the proposal --- unicodetools/data/ucd/dev/TangutSources.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index ad9bc8c48..c3d2ec5b1 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -12403,5 +12403,9 @@ U+18D1B kTGT_MergedSrc N5217-27 U+18D1B kRSTUnicode 36.7 U+18D1C kTGT_MergedSrc N5217-28 U+18D1C kRSTUnicode 141.9 +U+18D1D kTGT_MergedSrc H2021-309801 +U+18D1D kRSTUnicode 106.13 +U+18D1E kTGT_MergedSrc H2021-834001 +U+18D1E kRSTUnicode 579.14 # EOF From 33b1e945f564913d092386d00c5dbac203a80a72 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 18 Oct 2024 20:07:12 +0200 Subject: [PATCH 21/23] Document the source --- unicodetools/data/ucd/dev/TangutSources.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index c3d2ec5b1..d481eeede 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -34,6 +34,8 @@ # # The following abbreviations are used in kTGT_MergedSrc: # +# H2021 = Hán Xiǎománg (韩小忙), 西夏文词典: 世俗文献部分 (Xīxiàwén Cídiǎn: Shìsú Wénxiàn Bùfēn) +# [Tangut Word Dictionary: Secular Literature Part, 9 vols.]. 2021. # H2004 = Hán Xiǎománg (韓小忙), 西夏文正字研究 (Xīxiàwén Zhèngzì Yánjiū) # [Research into the Correct Forms of Tangut Characters]. 2004. # L1986 = Lǐ Fànwén (李範文), 同音研究 (Tóngyīn Yánjiū) From d0ed7d804579495eaab41cab3c2cd5b0e536055c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 19 Oct 2024 02:02:40 +0200 Subject: [PATCH 22/23] A test --- .../TODO-MISSING-RMG-ISSUE.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt new file mode 100644 index 000000000..21a9644c8 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/TODO-MISSING-RMG-ISSUE.txt @@ -0,0 +1,18 @@ +# [Template for property comparison tests of character encoding proposals] +# [RMG ISSUE TITLE] +# https://github.com/unicode-org/utc-release-management/issues/[RMG ISSUE NUMBER] + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring kRSTUnicode kTGT_MergedSrc: +Propertywise [\x{18D00}\x{18D1D}\x{18D1E}] AreAlike +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file From 854763b494f4d4ae83e41d12e0b633a6d317a88d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 19 Oct 2024 04:54:37 +0200 Subject: [PATCH 23/23] The hobgoblin of little minds --- unicodetools/data/ucd/dev/TangutSources.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index d481eeede..59284fb58 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -36,7 +36,7 @@ # # H2021 = Hán Xiǎománg (韩小忙), 西夏文词典: 世俗文献部分 (Xīxiàwén Cídiǎn: Shìsú Wénxiàn Bùfēn) # [Tangut Word Dictionary: Secular Literature Part, 9 vols.]. 2021. -# H2004 = Hán Xiǎománg (韓小忙), 西夏文正字研究 (Xīxiàwén Zhèngzì Yánjiū) +# H2004 = Hán Xiǎománg (韩小忙), 西夏文正字研究 (Xīxiàwén Zhèngzì Yánjiū) # [Research into the Correct Forms of Tangut Characters]. 2004. # L1986 = Lǐ Fànwén (李範文), 同音研究 (Tóngyīn Yánjiū) # [Study of the Homophones]. Yinchuan. 1986