From 582ea2b8b9c0655ce89b6329cdac0add2d86f862 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 27 Oct 2023 19:26:23 +0200 Subject: [PATCH 1/6] UnicodeData.txt from L2/23-220 --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 9c89d3d50..17c1aa13b 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,5 @@ +31E4;CJK STROKE HZXG;So;0;ON;;;;;N;;;;; +31E5;CJK STROKE SZP;So;0;ON;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From bce536014abc7c6a084ad06631f5ae03b64def4c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 27 Oct 2023 19:29:45 +0200 Subject: [PATCH 2/6] LB=ID --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index dc75435f1..bd7bd56b7 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-25, 12:52:56 GMT +# Date: 2023-10-27, 17:28:42 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1520,6 +1520,7 @@ 3196..319F ; ID # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; ID # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31C0..31E3 ; ID # So [36] CJK STROKE T..CJK STROKE Q +31E4..31E5 ; ID # So [2] CJK STROKE HZXG..CJK STROKE SZP 31EF ; ID # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; CJ # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; ID # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU From 8d20656f3eee5feb9a62b1047bad31348a4760f7 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 27 Oct 2023 19:30:13 +0200 Subject: [PATCH 3/6] Scripts.txt --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 800611d20..99b28dc87 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +31E4..31E5;Common # Scripts-16.0.0.txt # Date: 2023-10-25, 12:53:25 GMT # © 2023 Unicode®, Inc. From ff50161670e9fddc50d2a6f4e0ebe1378e3d0565 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 27 Oct 2023 19:37:21 +0200 Subject: [PATCH 4/6] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 6 +++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++--- unicodetools/data/ucd/dev/Scripts.txt | 7 +++---- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ++-- unicodetools/data/ucd/dev/VerticalOrientation.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 10 +++++----- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 6 +++--- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 10 +++++----- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 10 +++++----- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 6 ++++-- 13 files changed, 43 insertions(+), 40 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index c396d009c..bf62b3c68 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-25, 12:52:16 GMT +# Date: 2023-10-27, 17:36:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2013,6 +2013,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 0C5C ; 16.0 # TELUGU ARCHAIC SHRII 0CDC ; 16.0 # KANNADA ARCHAIC SHRII 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE +31E4..31E5 ; 16.0 # [2] CJK STROKE HZXG..CJK STROKE SZP A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO 10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA @@ -2029,6 +2030,6 @@ A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK -# Total code points: 304 +# Total code points: 306 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 2e73593f4..891136c55 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-25, 12:52:46 GMT +# Date: 2023-10-27, 17:36:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -11790,7 +11790,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 3192..3195 ; Grapheme_Base # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; Grapheme_Base # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU @@ -12662,7 +12662,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147908 +# Total code points: 147910 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 96bbaecc1..450ed2bb1 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-10-25, 12:52:55 GMT +# Date: 2023-10-27, 17:36:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1488,6 +1488,7 @@ 3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q +31E4..31E5 ; N # So [2] CJK STROKE HZXG..CJK STROKE SZP 31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index bd7bd56b7..78bb32a63 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-27, 17:28:42 GMT +# Date: 2023-10-27, 17:36:45 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1519,8 +1519,7 @@ 3192..3195 ; ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; ID # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; ID # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; ID # So [36] CJK STROKE T..CJK STROKE Q -31E4..31E5 ; ID # So [2] CJK STROKE HZXG..CJK STROKE SZP +31C0..31E5 ; ID # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; ID # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; CJ # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; ID # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 99b28dc87..d3aa0ff79 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -31E4..31E5;Common # Scripts-16.0.0.txt -# Date: 2023-10-25, 12:53:25 GMT +# Date: 2023-10-27, 17:37:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -399,7 +398,7 @@ 3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK -31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; Common # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; Common # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN 322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO @@ -632,7 +631,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8311 +# Total code points: 8313 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 17c1aa13b..95f4e5496 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,3 @@ -31E4;CJK STROKE HZXG;So;0;ON;;;;;N;;;;; -31E5;CJK STROKE SZP;So;0;ON;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -11716,6 +11714,8 @@ 31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;; 31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;; 31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;; +31E4;CJK STROKE HZXG;So;0;ON;;;;;N;;;;; +31E5;CJK STROKE SZP;So;0;ON;;;;;N;;;;; 31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;; 31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;; 31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 4219b2fbf..60548fa30 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-25, 12:53:29 GMT +# Date: 2023-10-27, 17:37:18 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1291,8 +1291,8 @@ 3192..3195 ; U # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; U # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; U # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; U # So [36] CJK STROKE T..CJK STROKE Q -31E4..31EE ; U # Cn [11] .. +31C0..31E5 ; U # So [38] CJK STROKE T..CJK STROKE SZP +31E6..31EE ; U # Cn [9] .. 31EF ; U # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; Tu # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; U # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2a39d1660..cb61f0d05 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-25, 12:52:43 GMT +# Date: 2023-10-27, 17:36:31 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1197,8 +1197,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820232 code points not listed here. -# Total code points: 1096260 +# The above property value applies to 820230 code points not listed here. +# Total code points: 1096258 # ================================================ @@ -1804,7 +1804,7 @@ FF1A ; CS # Po FULLWIDTH COLON 309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN 30FB ; ON # Po KATAKANA MIDDLE DOT -31C0..31E3 ; ON # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; ON # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; ON # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU 3250 ; ON # So PARTNERSHIP SIGN @@ -1973,7 +1973,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6040 +# Total code points: 6042 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 475a4338a..3280d0fe7 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-25, 12:52:46 GMT +# Date: 2023-10-27, 17:36:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1042,7 +1042,7 @@ 3192..3195 ; 0 # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; 0 # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; 0 # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; 0 # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; 0 # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; 0 # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; 0 # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; 0 # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU @@ -2031,7 +2031,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826462 code points not listed here. +# The above property value applies to 826460 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index b9b084bdd..369fce342 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-25, 12:52:49 GMT +# Date: 2023-10-27, 17:36:37 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1145,6 +1145,7 @@ 2E5C ; N # Pe BOTTOM HALF RIGHT PARENTHESIS 2E5D ; N # Pd OBLIQUE HYPHEN 303F ; N # So IDEOGRAPHIC HALF FILL SPACE +31E4..31E5 ; N # So [2] CJK STROKE HZXG..CJK STROKE SZP 4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU @@ -2070,7 +2071,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765980 code points not listed here. +# The above property value applies to 765978 code points not listed here. # Total code points: 792618 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 0fcfe447b..3ef8c2565 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-25, 12:52:49 GMT +# Date: 2023-10-27, 17:36:37 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -289,7 +289,7 @@ 3100..3104 ; Cn # [5] .. 3130 ; Cn # 318F ; Cn # -31E4..31EE ; Cn # [11] .. +31E6..31EE ; Cn # [9] .. 321F ; Cn # A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. @@ -735,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824414 +# Total code points: 824412 # ================================================ @@ -4143,7 +4143,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 303E..303F ; So # [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE 3190..3191 ; So # [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK -31C0..31E3 ; So # [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; So # [38] CJK STROKE T..CJK STROKE SZP 31EF ; So # IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO @@ -4236,7 +4236,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6644 +# Total code points: 6646 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 0ffc06dbb..e995d837a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-25, 12:52:51 GMT +# Date: 2023-10-27, 17:36:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762422 code points not listed here. -# Total code points: 899890 +# The above property value applies to 762420 code points not listed here. +# Total code points: 899888 # ================================================ @@ -1685,7 +1685,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3192..3195 ; ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; ID # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; ID # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; ID # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; ID # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; ID # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3200..321E ; ID # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; ID # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN @@ -1849,7 +1849,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61977 code points not listed here. -# Total code points: 172567 +# Total code points: 172569 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index d69094d10..dc9db5fbf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-25, 12:52:52 GMT +# Date: 2023-10-27, 17:36:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -11689,6 +11689,8 @@ 31E1 ; CJK STROKE HZZZG 31E2 ; CJK STROKE PG 31E3 ; CJK STROKE Q +31E4 ; CJK STROKE HZXG +31E5 ; CJK STROKE SZP 31EF ; IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0 ; KATAKANA LETTER SMALL KU 31F1 ; KATAKANA LETTER SMALL SI @@ -44480,6 +44482,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150117 +# Total code points: 150119 # EOF From 3ded4cf8239b1fbbae47039a1a027fc86f667ab0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 1 Nov 2023 16:32:01 -0700 Subject: [PATCH 5/6] EAW=W --- unicodetools/data/ucd/dev/EastAsianWidth.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 2d6adaa4b..f70d001c3 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1488,7 +1488,7 @@ 3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q -31E4..31E5 ; N # So [2] CJK STROKE HZXG..CJK STROKE SZP +31E4..31E5 ; W # So [2] CJK STROKE HZXG..CJK STROKE SZP 31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU From ece6956137db98a925d22acd20cd8a212be5de8c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 1 Nov 2023 16:33:27 -0700 Subject: [PATCH 6/6] Regenerate UCD --- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++--- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 9 ++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index f70d001c3..e18260048 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-01, 23:30:17 GMT +# Date: 2023-11-01, 23:33:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1487,8 +1487,7 @@ 3192..3195 ; W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q -31E4..31E5 ; W # So [2] CJK STROKE HZXG..CJK STROKE SZP +31C0..31E5 ; W # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index d835896a2..dd9a6a3b3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-01, 23:30:14 GMT +# Date: 2023-11-01, 23:32:58 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1145,7 +1145,6 @@ 2E5C ; N # Pe BOTTOM HALF RIGHT PARENTHESIS 2E5D ; N # Pd OBLIQUE HYPHEN 303F ; N # So IDEOGRAPHIC HALF FILL SPACE -31E4..31E5 ; N # So [2] CJK STROKE HZXG..CJK STROKE SZP 4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU @@ -2072,7 +2071,7 @@ E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG # The above property value applies to 765971 code points not listed here. -# Total code points: 792611 +# Total code points: 792609 # ================================================ @@ -2402,7 +2401,7 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3192..3195 ; W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; W # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU @@ -2543,7 +2542,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182424 +# Total code points: 182426 # ================================================