From f03e75790ebeb0117b029f00385ec1ec8ee3f470 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:10:46 +0200 Subject: [PATCH 01/15] Java changes for CJK Extension J --- .../java/org/unicode/text/UCD/MakeUnicodeFiles.java | 2 +- .../src/main/java/org/unicode/text/UCD/UCD.java | 10 ++++++++++ .../src/main/java/org/unicode/text/UCD/UCD_Types.java | 7 ++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index 0c14d0f2d..e5dee89fe 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -1687,7 +1687,7 @@ private static void writeKenFile( // where Cn ranges are allowed to extend from the unassigned part of one block into the // No_Block void beyond. Map ignoreBlocksInCJKVPlanes = new HashMap(); - for (char ext = 'B'; ext <= 'I'; ++ext) { + for (char ext = 'B'; ext <= 'J'; ++ext) { ignoreBlocksInCJKVPlanes.put("CJK_Ext_" + ext, "NB"); } UnicodeProperty blockOrIdeographicPlane = diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index ec0abca40..48a05ad70 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1405,6 +1405,15 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { return CJK_H_BASE; } } + // 323B0..3347B; CJK Unified Ideographs Extension H + if (rCompositeVersion >= 0x110000) { + if (ch <= CJK_J_BASE) { + return ch; // Extension H first char + } + if (ch < CJK_J_LIMIT) { + return CJK_J_BASE; + } + } if (ch < 0xF0000) { return ch; @@ -1659,6 +1668,7 @@ UData get(int codePoint, boolean fixStrings) { case CJK_I_BASE: case CJK_G_BASE: // Extension G case CJK_H_BASE: + case CJK_J_BASE: if (fixStrings) { constructedName = "CJK UNIFIED IDEOGRAPH-" + Utility.hex(codePoint, 4); } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index fa61f9bc6..d81fa9f09 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -85,7 +85,12 @@ public interface UCD_Types { // 31350;;Lo;0;L;;;;;N;;;;; // 323AF;;Lo;0;L;;;;;N;;;;; CJK_H_BASE = 0x31350, - CJK_H_LIMIT = 0x323AF + 1 + CJK_H_LIMIT = 0x323AF + 1, + + // 323B0;;Lo;0;L;;;;;N;;;;; + // 3347B;;Lo;0;L;;;;;N;;;;; + CJK_J_BASE = 0x323B0, + CJK_J_LIMIT = 0x3347B + 1 // when adding to this list, look for all occurrences (in project) of CJK_C_BASE and CJK_C_LIMIT // to check for code that needs changing. From a570224ed11ffe226d5b01a323c4649c135063d7 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 15:46:54 +0200 Subject: [PATCH 02/15] UnicodeData.txt lines according to L2/24-165 --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..e937be696 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,5 @@ +323B0;;Lo;0;L;;;;;N;;;;; +3347B;;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 7f4ff1768e037bac19244a924c134b8cc7135c6e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:26:59 +0200 Subject: [PATCH 03/15] Script=Han --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..281f098cf 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +323B0..3347B; Han # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From e70c3c15ac6bb69646c05cec928627c7a1dbae54 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:12:51 +0200 Subject: [PATCH 04/15] Blocks.txt and ShortBlockNames.txt for Extension J --- unicodetools/data/ucd/dev/Blocks.txt | 1 + .../src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 1517dde0c..1f2ffad15 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -367,6 +367,7 @@ FFF0..FFFF; Specials 2F800..2FA1F; CJK Compatibility Ideographs Supplement 30000..3134F; CJK Unified Ideographs Extension G 31350..323AF; CJK Unified Ideographs Extension H +323B0..3347F; CJK Unified Ideographs Extension J E0000..E007F; Tags E0100..E01EF; Variation Selectors Supplement F0000..FFFFF; Supplementary Private Use Area-A diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index eaa03a0f7..fff149279 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -60,6 +60,7 @@ CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I +CJK_Ext_J ; CJK_Unified_Ideographs_Extension_J Diacriticals ; Combining_Diacritical_Marks Diacriticals_Ext ; Combining_Diacritical_Marks_Extended Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols From 2db7693114746b81f7f423c8bfba12d005a57ac1 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:27:47 +0200 Subject: [PATCH 05/15] Ideographic, Unified_Ideograph --- unicodetools/data/ucd/dev/PropList.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 395aa448d..0f0be5481 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,5 @@ +323B0..3347B; Ideographic +323B0..3347B; Unified_Ideograph # PropList-16.0.0.txt # Date: 2024-05-08, 03:40:06 GMT # © 2024 Unicode®, Inc. From ba58ace453eb7b4dcc95b08de7c5603cad51fbdc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:52:37 +0200 Subject: [PATCH 06/15] Do not refer to versions from the future (also fix a typo) --- unicodetools/src/main/java/org/unicode/text/UCD/UCD.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 48a05ad70..0ca83d21a 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1406,9 +1406,10 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { } } // 323B0..3347B; CJK Unified Ideographs Extension H - if (rCompositeVersion >= 0x110000) { + // TODO(egg): This should be 0x110000, but the tools are still at 16.0. + if (rCompositeVersion >= 0x100000) { if (ch <= CJK_J_BASE) { - return ch; // Extension H first char + return ch; // Extension J first char } if (ch < CJK_J_LIMIT) { return CJK_J_BASE; From f8d623a41290f002a2823e6575e0f78d900ceb25 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:54:44 +0200 Subject: [PATCH 07/15] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++--- unicodetools/data/ucd/dev/PropList.txt | 12 ++++----- .../data/ucd/dev/PropertyValueAliases.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 7 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 4 +-- .../data/ucd/dev/VerticalOrientation.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++--- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 6 ++--- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++--- 16 files changed, 60 insertions(+), 61 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..b492d6846 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# Date: 2024-07-24, 14:53:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2056,7 +2056,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FADF ; 16.0 # SPLATTER 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE +323B0..3347B ; 16.0 # [4300] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 5185 +# Total code points: 9485 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 88df29b3b..7dbc12abb 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-02, 15:02:37 GMT +# Date: 2024-07-24, 14:53:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1439,9 +1439,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Alphabetic # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 142759 +# Total code points: 147059 # ================================================ @@ -6960,9 +6960,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; ID_Start # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 141269 +# Total code points: 145569 # ================================================ @@ -8367,10 +8367,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; ID_Continue # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 148841 # ================================================ @@ -9146,9 +9146,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; XID_Start # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 141246 +# Total code points: 145546 # ================================================ @@ -10554,10 +10554,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; XID_Continue # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 148822 # ================================================ @@ -12809,9 +12809,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Grapheme_Base # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 152738 +# Total code points: 157038 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..cec93bdd7 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-07-24, 14:53:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2675,8 +2675,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 2FA20..2FFFD ; W # Cn [1502] .. 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; W # Cn [5] .. -31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -323B0..3FFFD ; W # Cn [56398] .. +31350..3347B ; W # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +3347C..3FFFD ; W # Cn [52098] .. E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9dc61d95d..722632f7c 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:19 GMT +# Date: 2024-07-24, 14:53:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3664,8 +3664,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 2FA20..2FFFD ; ID # Cn [1502] .. 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; ID # Cn [5] .. -31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -323B0..3FFFD ; ID # Cn [56398] .. +31350..3347B ; ID # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +3347C..3FFFD ; ID # Cn [52098] .. E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 0f0be5481..dfaf284dc 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,7 +1,5 @@ -323B0..3347B; Ideographic -323B0..3347B; Unified_Ideograph # PropList-16.0.0.txt -# Date: 2024-05-08, 03:40:06 GMT +# Date: 2024-07-24, 14:54:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -885,9 +883,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Ideographic # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 106477 +# Total code points: 110777 # ================================================ @@ -1362,9 +1360,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Unified_Ideograph # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 97680 +# Total code points: 101980 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 0c42297a1..4d78d6ae8 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2024-06-06, 22:21:34 GMT +# Date: 2024-07-24, 14:54:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -211,6 +211,7 @@ blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I +blk; CJK_Ext_J ; CJK_Unified_Ideographs_Extension_J blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement blk; CJK_Strokes ; CJK_Strokes blk; CJK_Symbols ; CJK_Symbols_And_Punctuation diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 281f098cf..b957ea0ab 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -323B0..3347B; Han # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-07-24, 14:54:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1603,9 +1602,9 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Han # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 99030 +# Total code points: 103330 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index e937be696..02d5f0939 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,3 @@ -323B0;;Lo;0;L;;;;;N;;;;; -3347B;;Lo;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -39775,6 +39773,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 3134A;;Lo;0;L;;;;;N;;;;; 31350;;Lo;0;L;;;;;N;;;;; 323AF;;Lo;0;L;;;;;N;;;;; +323B0;;Lo;0;L;;;;;N;;;;; +3347B;;Lo;0;L;;;;;N;;;;; E0001;LANGUAGE TAG;Cf;0;BN;;;;;N;;;;; E0020;TAG SPACE;Cf;0;BN;;;;;N;;;;; E0021;TAG EXCLAMATION MARK;Cf;0;BN;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..7ef5ec0d3 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-07-24, 14:54:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2493,8 +2493,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 2FA20..2FFFD ; U # Cn [1502] .. 30000..3134A ; U # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; U # Cn [5] .. -31350..323AF ; U # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -323B0..3FFFD ; U # Cn [56398] .. +31350..3347B ; U # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +3347C..3FFFD ; U # Cn [52098] .. E0001 ; R # Cf LANGUAGE TAG E0020..E007F ; R # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; R # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 39fdb57c0..957d42188 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-13, 20:53:44 GMT +# Date: 2024-07-24, 14:54:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2583,9 +2583,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2EBF0..2EE5D ; OLetter # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; OLetter # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 136908 +# Total code points: 141208 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..0f44f9c41 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# Date: 2024-07-24, 14:53:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1210,11 +1210,11 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 2EBF0..2EE5D ; L # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; L # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; L # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 811051 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..9788b271a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# Date: 2024-07-24, 14:53:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2053,14 +2053,14 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 2EBF0..2EE5D ; 0 # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; 0 # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; 0 # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; 0 # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; 0 # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B E0001 ; 0 # Cf LANGUAGE TAG E0020..E007F ; 0 # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 817281 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..f93b06876 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-24, 14:53:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2577,9 +2577,9 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 2EBF0..2EE5D ; W # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; W # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# The above property value applies to 60482 code points not listed here. +# The above property value applies to 56182 code points not listed here. # Total code points: 182615 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..8d5cfbb55 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-24, 14:53:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -740,14 +740,14 @@ FFFE..FFFF ; Cn # [2] .. 2EE5E..2F7FF ; Cn # [2466] .. 2FA1E..2FFFF ; Cn # [1506] .. 3134B..3134F ; Cn # [5] .. -323B0..E0000 ; Cn # [711761] .. +3347C..E0000 ; Cn # [707461] .. E0002..E001F ; Cn # [30] .. E0080..E00FF ; Cn # [128] .. E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 815233 # ================================================ @@ -2706,9 +2706,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2EBF0..2EE5D ; Lo # [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; Lo # [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# Total code points: 136477 +# Total code points: 140777 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 866536783..6c99a620b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:14 GMT +# Date: 2024-07-24, 14:53:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1852,9 +1852,9 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 2EBF0..2EE5D ; ID # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +31350..3347B ; ID # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B -# The above property value applies to 61865 code points not listed here. +# The above property value applies to 57565 code points not listed here. # Total code points: 172421 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..e133fa8a2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-07-24, 14:53:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -45028,7 +45028,7 @@ FFFD ; REPLACEMENT CHARACTER 2EBF0..2EE5D ; CJK UNIFIED IDEOGRAPH-* 2F800..2FA1D ; CJK COMPATIBILITY IDEOGRAPH-* 30000..3134A ; CJK UNIFIED IDEOGRAPH-* -31350..323AF ; CJK UNIFIED IDEOGRAPH-* +31350..3347B ; CJK UNIFIED IDEOGRAPH-* E0001 ; LANGUAGE TAG E0020 ; TAG SPACE E0021 ; TAG EXCLAMATION MARK @@ -45367,6 +45367,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 159298 # EOF From eaf1bbe42a265cc37473ca9dca492737de2f13b3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 16:55:13 +0200 Subject: [PATCH 08/15] GenerateEnums --- .../src/main/java/org/unicode/props/UcdPropertyValues.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 37020e727..867e3dbc6 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -235,6 +235,7 @@ public enum Block_Values implements Named { CJK_Unified_Ideographs_Extension_G("CJK_Ext_G"), CJK_Unified_Ideographs_Extension_H("CJK_Ext_H"), CJK_Unified_Ideographs_Extension_I("CJK_Ext_I"), + CJK_Unified_Ideographs_Extension_J("CJK_Ext_J"), CJK_Radicals_Supplement("CJK_Radicals_Sup"), CJK_Strokes("CJK_Strokes"), CJK_Symbols_And_Punctuation("CJK_Symbols"), From 0e1acb593d7fe907521535772c6679e43ccd938e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 24 Jul 2024 17:03:24 +0200 Subject: [PATCH 09/15] Regenerate UCD again --- unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index f93b06876..9872f2fec 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-07-24, 14:53:43 GMT +# Date: 2024-07-24, 15:02:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36,6 +36,7 @@ # 30000..3134F CJK_Unified_Ideographs_Extension_G # 31350..323AF CJK_Unified_Ideographs_Extension_H +# 323B0..3347F CJK_Unified_Ideographs_Extension_J # @missing: 30000..3FFFD; Wide # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 6c99a620b..c3319015a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-07-24, 14:53:45 GMT +# Date: 2024-07-24, 15:02:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -59,6 +59,7 @@ # 30000..3134F CJK_Unified_Ideographs_Extension_G # 31350..323AF CJK_Unified_Ideographs_Extension_H +# 323B0..3347F CJK_Unified_Ideographs_Extension_J # @missing: 30000..3FFFD; Ideographic # ================================================ From 53eed13bd4839cc919a8dcc80688b56530425738 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 13:15:19 +0100 Subject: [PATCH 10/15] drop 2, for Unicode Version 0x11 --- unicodetools/src/main/java/org/unicode/text/UCD/UCD.java | 5 ++--- .../src/main/java/org/unicode/text/UCD/UCD_Types.java | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 0ca83d21a..116fa87d2 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1405,9 +1405,8 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { return CJK_H_BASE; } } - // 323B0..3347B; CJK Unified Ideographs Extension H - // TODO(egg): This should be 0x110000, but the tools are still at 16.0. - if (rCompositeVersion >= 0x100000) { + // 323B0..33479; CJK Unified Ideographs Extension H + if (rCompositeVersion >= 0x110000) { if (ch <= CJK_J_BASE) { return ch; // Extension J first char } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 9d6931335..075873959 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -88,9 +88,9 @@ public interface UCD_Types { CJK_H_LIMIT = 0x323AF + 1, // 323B0;;Lo;0;L;;;;;N;;;;; - // 3347B;;Lo;0;L;;;;;N;;;;; + // 33479;;Lo;0;L;;;;;N;;;;; CJK_J_BASE = 0x323B0, - CJK_J_LIMIT = 0x3347B + 1 + CJK_J_LIMIT = 0x33479 + 1 // when adding to this list, look for all occurrences (in project) of CJK_C_BASE and CJK_C_LIMIT // to check for code that needs changing. From 33a09bc6126786158bcddc244509b4d93d26ed10 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Nov 2024 13:17:25 +0100 Subject: [PATCH 11/15] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 7 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 32 +++++++++++-------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 8 +++-- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++-- unicodetools/data/ucd/dev/PropList.txt | 10 ++++-- unicodetools/data/ucd/dev/Scripts.txt | 8 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +- .../data/ucd/dev/VerticalOrientation.txt | 8 +++-- .../dev/auxiliary/SentenceBreakProperty.txt | 7 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 7 ++-- .../dev/extracted/DerivedCombiningClass.txt | 7 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 7 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 7 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 6 ++-- 15 files changed, 80 insertions(+), 53 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 50558c9d9..815c26998 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-10-16, 13:48:00 GMT +# Date: 2024-11-13, 12:15:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2065,8 +2065,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) -323B0..3347B ; 17.0 # [4300] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-3347B +323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 +3347B ; 17.0 # -# Total code points: 4300 +# Total code points: 4299 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 5a797fd32..6f5d44b7a 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-16, 13:48:25 GMT +# Date: 2024-11-13, 12:16:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1439,9 +1439,10 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Alphabetic # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; Alphabetic # Lo -# Total code points: 147059 +# Total code points: 147058 # ================================================ @@ -6960,9 +6961,10 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; ID_Start # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; ID_Start # Lo -# Total code points: 145569 +# Total code points: 145568 # ================================================ @@ -8367,10 +8369,11 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; ID_Continue # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; ID_Continue # Lo E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 148841 +# Total code points: 148840 # ================================================ @@ -9146,9 +9149,10 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; XID_Start # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; XID_Start # Lo -# Total code points: 145546 +# Total code points: 145545 # ================================================ @@ -10554,10 +10558,11 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; XID_Continue # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; XID_Continue # Lo E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 148822 +# Total code points: 148821 # ================================================ @@ -12810,9 +12815,10 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Grapheme_Base # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; Grapheme_Base # Lo -# Total code points: 157030 +# Total code points: 157029 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index cec93bdd7..a7780724c 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-07-24, 14:53:48 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-11-13, 12:16:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2675,7 +2675,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 2FA20..2FFFD ; W # Cn [1502] .. 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; W # Cn [5] .. -31350..3347B ; W # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; W # Cn +3347B ; W # Lo 3347C..3FFFD ; W # Cn [52098] .. E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9fe224582..02d35180f 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-16, 13:48:33 GMT +# Date: 2024-11-13, 12:16:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3659,7 +3659,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 2FA20..2FFFD ; ID # Cn [1502] .. 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; ID # Cn [5] .. -31350..3347B ; ID # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; ID # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; ID # Cn +3347B ; ID # Lo 3347C..3FFFD ; ID # Cn [52098] .. E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index de391096b..687287340 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-10-16, 13:48:45 GMT +# Date: 2024-11-13, 12:16:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -883,7 +883,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Ideographic # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; Ideographic # Cn +3347B ; Ideographic # Lo # Total code points: 110777 @@ -1365,7 +1367,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Unified_Ideograph # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Unified_Ideograph # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; Unified_Ideograph # Cn +3347B ; Unified_Ideograph # Lo # Total code points: 101980 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index b957ea0ab..b6ce051ac 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ -# Scripts-16.0.0.txt -# Date: 2024-07-24, 14:54:18 GMT +# Scripts-17.0.0.txt +# Date: 2024-11-13, 12:16:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1602,7 +1602,9 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Han # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Han # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; Han # Cn +3347B ; Han # Lo # Total code points: 103330 diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 02d5f0939..747725ddf 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -39774,7 +39774,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 31350;;Lo;0;L;;;;;N;;;;; 323AF;;Lo;0;L;;;;;N;;;;; 323B0;;Lo;0;L;;;;;N;;;;; -3347B;;Lo;0;L;;;;;N;;;;; +33479;;Lo;0;L;;;;;N;;;;; +3347B;;Lo;0;L;;;;;N;;;;; E0001;LANGUAGE TAG;Cf;0;BN;;;;;N;;;;; E0020;TAG SPACE;Cf;0;BN;;;;;N;;;;; E0021;TAG EXCLAMATION MARK;Cf;0;BN;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 7ef5ec0d3..8eb5bd177 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-07-24, 14:54:22 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-11-13, 12:16:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2493,7 +2493,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 2FA20..2FFFD ; U # Cn [1502] .. 30000..3134A ; U # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; U # Cn [5] .. -31350..3347B ; U # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; U # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347A ; U # Cn +3347B ; U # Lo 3347C..3FFFD ; U # Cn [52098] .. E0001 ; R # Cf LANGUAGE TAG E0020..E007F ; R # Cf [96] TAG SPACE..CANCEL TAG diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 08defdac8..30e8509a6 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-16, 13:49:06 GMT +# Date: 2024-11-13, 12:16:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2583,9 +2583,10 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2EBF0..2EE5D ; OLetter # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; OLetter # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; OLetter # Lo -# Total code points: 141208 +# Total code points: 141207 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 3cc653aac..507ef9d9c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-16, 13:48:23 GMT +# Date: 2024-11-13, 12:16:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1210,11 +1210,12 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 2EBF0..2EE5D ; L # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; L # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; L # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; L # Lo F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 811051 code points not listed here. +# The above property value applies to 811052 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index cfc9af177..861480651 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-16, 13:48:25 GMT +# Date: 2024-11-13, 12:16:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2053,14 +2053,15 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 2EBF0..2EE5D ; 0 # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; 0 # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; 0 # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; 0 # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; 0 # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; 0 # Lo E0001 ; 0 # Cf LANGUAGE TAG E0020..E007F ; 0 # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 817281 code points not listed here. +# The above property value applies to 817282 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c64a6999b..73e8fa31c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-16, 13:48:28 GMT +# Date: 2024-11-13, 12:16:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2578,9 +2578,10 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 2EBF0..2EE5D ; W # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; W # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; W # Lo -# The above property value applies to 56182 code points not listed here. +# The above property value applies to 56183 code points not listed here. # Total code points: 182615 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index fa21f8b3d..5b8731509 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-16, 13:48:28 GMT +# Date: 2024-11-13, 12:16:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -740,6 +740,7 @@ FFFE..FFFF ; Cn # [2] .. 2EE5E..2F7FF ; Cn # [2466] .. 2FA1E..2FFFF ; Cn # [1506] .. 3134B..3134F ; Cn # [5] .. +3347A ; Cn # 3347C..E0000 ; Cn # [707461] .. E0002..E001F ; Cn # [30] .. E0080..E00FF ; Cn # [128] .. @@ -747,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 815233 +# Total code points: 815234 # ================================================ @@ -2706,9 +2707,10 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2EBF0..2EE5D ; Lo # [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; Lo # [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; Lo # [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; Lo # -# Total code points: 140777 +# Total code points: 140776 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 4643f2145..c9be27a5c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-16, 13:48:29 GMT +# Date: 2024-11-13, 12:16:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1851,9 +1851,10 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 2EBF0..2EE5D ; ID # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -31350..3347B ; ID # Lo [8492] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-3347B +31350..33479 ; ID # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 +3347B ; ID # Lo -# The above property value applies to 57565 code points not listed here. +# The above property value applies to 57566 code points not listed here. # Total code points: 172421 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 85ffd0ad7..df3f0aff2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-10-16, 13:48:30 GMT +# Date: 2024-11-13, 12:16:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -45028,7 +45028,7 @@ FFFD ; REPLACEMENT CHARACTER 2EBF0..2EE5D ; CJK UNIFIED IDEOGRAPH-* 2F800..2FA1D ; CJK COMPATIBILITY IDEOGRAPH-* 30000..3134A ; CJK UNIFIED IDEOGRAPH-* -31350..3347B ; CJK UNIFIED IDEOGRAPH-* +31350..33479 ; CJK UNIFIED IDEOGRAPH-* E0001 ; LANGUAGE TAG E0020 ; TAG SPACE E0021 ; TAG EXCLAMATION MARK @@ -45367,6 +45367,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159298 +# Total code points: 159296 # EOF From ddb07ffa52bf59b3bede16a1ea8ba6b183f7dbf5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:01:27 +0100 Subject: [PATCH 12/15] Remove stray 3347B --- unicodetools/data/ucd/dev/UnicodeData.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 747725ddf..c355b391c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -39775,7 +39775,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 323AF;;Lo;0;L;;;;;N;;;;; 323B0;;Lo;0;L;;;;;N;;;;; 33479;;Lo;0;L;;;;;N;;;;; -3347B;;Lo;0;L;;;;;N;;;;; E0001;LANGUAGE TAG;Cf;0;BN;;;;;N;;;;; E0020;TAG SPACE;Cf;0;BN;;;;;N;;;;; E0021;TAG EXCLAMATION MARK;Cf;0;BN;;;;;N;;;;; From 90abf0fa6e5ea14229ec058711d0fbc2f2ebfeea Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:23:28 +0100 Subject: [PATCH 13/15] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++--- .../data/ucd/dev/DerivedCoreProperties.txt | 20 +++++++------------ unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 ++---- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++---- unicodetools/data/ucd/dev/PropList.txt | 8 +++----- unicodetools/data/ucd/dev/Scripts.txt | 5 ++--- .../data/ucd/dev/VerticalOrientation.txt | 6 ++---- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 ++--- .../dev/extracted/DerivedCombiningClass.txt | 5 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++--- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++------ .../ucd/dev/extracted/DerivedLineBreak.txt | 5 ++--- 13 files changed, 34 insertions(+), 57 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 815c26998..b87eae5db 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-13, 12:15:48 GMT +# Date: 2024-11-14, 15:19:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2066,8 +2066,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -3347B ; 17.0 # -# Total code points: 4299 +# Total code points: 4298 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 6f5d44b7a..70ff32da8 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-13, 12:16:05 GMT +# Date: 2024-11-14, 15:19:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1440,9 +1440,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; Alphabetic # Lo -# Total code points: 147058 +# Total code points: 147057 # ================================================ @@ -6962,9 +6961,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; ID_Start # Lo -# Total code points: 145568 +# Total code points: 145567 # ================================================ @@ -8370,10 +8368,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; ID_Continue # Lo E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 148840 +# Total code points: 148839 # ================================================ @@ -9150,9 +9147,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; XID_Start # Lo -# Total code points: 145545 +# Total code points: 145544 # ================================================ @@ -10559,10 +10555,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; XID_Continue # Lo E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 148821 +# Total code points: 148820 # ================================================ @@ -12816,9 +12811,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; Grapheme_Base # Lo -# Total code points: 157029 +# Total code points: 157028 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a7780724c..49b43d158 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 12:16:09 GMT +# Date: 2024-11-14, 15:19:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2676,9 +2676,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; W # Cn [5] .. 31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; W # Cn -3347B ; W # Lo -3347C..3FFFD ; W # Cn [52098] .. +3347A..3FFFD ; W # Cn [52100] .. E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 02d35180f..f44f8fd74 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-13, 12:16:10 GMT +# Date: 2024-11-14, 15:20:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3660,9 +3660,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; ID # Cn [5] .. 31350..33479 ; ID # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; ID # Cn -3347B ; ID # Lo -3347C..3FFFD ; ID # Cn [52098] .. +3347A..3FFFD ; ID # Cn [52100] .. E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 687287340..01b49f395 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-13, 12:16:16 GMT +# Date: 2024-11-14, 15:20:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -884,8 +884,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; Ideographic # Cn -3347B ; Ideographic # Lo +3347A..3347B ; Ideographic # Cn [2] .. # Total code points: 110777 @@ -1368,8 +1367,7 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Unified_Ideograph # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; Unified_Ideograph # Cn -3347B ; Unified_Ideograph # Lo +3347A..3347B ; Unified_Ideograph # Cn [2] .. # Total code points: 101980 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index b6ce051ac..68bea7881 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-13, 12:16:29 GMT +# Date: 2024-11-14, 15:20:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1603,8 +1603,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Han # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; Han # Cn -3347B ; Han # Lo +3347A..3347B ; Han # Cn [2] .. # Total code points: 103330 diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 8eb5bd177..284f498df 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-13, 12:16:31 GMT +# Date: 2024-11-14, 15:20:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2494,9 +2494,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 30000..3134A ; U # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 3134B..3134F ; U # Cn [5] .. 31350..33479 ; U # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A ; U # Cn -3347B ; U # Lo -3347C..3FFFD ; U # Cn [52098] .. +3347A..3FFFD ; U # Cn [52100] .. E0001 ; R # Cf LANGUAGE TAG E0020..E007F ; R # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; R # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 30e8509a6..a3bec69ca 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-13, 12:16:29 GMT +# Date: 2024-11-14, 15:20:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2584,9 +2584,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; OLetter # Lo -# Total code points: 141207 +# Total code points: 141206 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 507ef9d9c..ff03712c9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-13, 12:16:03 GMT +# Date: 2024-11-14, 15:19:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1211,11 +1211,10 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; L # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; L # Lo F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 811052 code points not listed here. +# The above property value applies to 811053 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 861480651..05d005692 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-13, 12:16:05 GMT +# Date: 2024-11-14, 15:19:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2054,14 +2054,13 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 2F800..2FA1D ; 0 # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; 0 # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; 0 # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; 0 # Lo E0001 ; 0 # Cf LANGUAGE TAG E0020..E007F ; 0 # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 817282 code points not listed here. +# The above property value applies to 817283 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 73e8fa31c..6eeea193d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-13, 12:16:06 GMT +# Date: 2024-11-14, 15:19:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2579,9 +2579,8 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; W # Lo -# The above property value applies to 56183 code points not listed here. +# The above property value applies to 56184 code points not listed here. # Total code points: 182615 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 5b8731509..85d004b91 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-13, 12:16:07 GMT +# Date: 2024-11-14, 15:19:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -740,15 +740,14 @@ FFFE..FFFF ; Cn # [2] .. 2EE5E..2F7FF ; Cn # [2466] .. 2FA1E..2FFFF ; Cn # [1506] .. 3134B..3134F ; Cn # [5] .. -3347A ; Cn # -3347C..E0000 ; Cn # [707461] .. +3347A..E0000 ; Cn # [707463] .. E0002..E001F ; Cn # [30] .. E0080..E00FF ; Cn # [128] .. E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 815234 +# Total code points: 815235 # ================================================ @@ -2708,9 +2707,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Lo # [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; Lo # -# Total code points: 140776 +# Total code points: 140775 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c9be27a5c..04ddf6eb6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-13, 12:16:08 GMT +# Date: 2024-11-14, 15:19:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1852,9 +1852,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 2F800..2FA1D ; ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347B ; ID # Lo -# The above property value applies to 57566 code points not listed here. +# The above property value applies to 57567 code points not listed here. # Total code points: 172421 # ================================================ From 03ed0fb67661c47647a2af66f10a530b6c55362f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:17:06 +0100 Subject: [PATCH 14/15] More remnants --- unicodetools/data/ucd/dev/PropList.txt | 2 -- unicodetools/data/ucd/dev/Scripts.txt | 1 - 2 files changed, 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 01b49f395..5644d1f62 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -884,7 +884,6 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A..3347B ; Ideographic # Cn [2] .. # Total code points: 110777 @@ -1367,7 +1366,6 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Unified_Ideograph # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A..3347B ; Unified_Ideograph # Cn [2] .. # Total code points: 101980 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 68bea7881..91776cea3 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1603,7 +1603,6 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Han # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -3347A..3347B ; Han # Cn [2] .. # Total code points: 103330 From f2ffad5476bbf91a77669d324b1afcc01545bafa Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 14 Nov 2024 16:24:41 +0100 Subject: [PATCH 15/15] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/Scripts.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 5644d1f62..b2677c958 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-14, 15:20:06 GMT +# Date: 2024-11-14, 15:24:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 110777 +# Total code points: 110775 # ================================================ @@ -1367,7 +1367,7 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Unified_Ideograph # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 101980 +# Total code points: 101978 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 91776cea3..3371f833d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-14, 15:20:18 GMT +# Date: 2024-11-14, 15:24:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1604,7 +1604,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Han # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 103330 +# Total code points: 103328 # ================================================