From ae3a7aa24ebedf2781775c9f054427a459d0ef93 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 2 Nov 2023 00:16:22 +0100 Subject: [PATCH 1/5] Seven emoji (#598) --- unicodetools/data/ucd/dev/DerivedAge.txt | 13 +++++++--- .../data/ucd/dev/DerivedCoreProperties.txt | 13 +++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 11 ++++----- unicodetools/data/ucd/dev/LineBreak.txt | 21 ++++++++-------- unicodetools/data/ucd/dev/Scripts.txt | 13 +++++----- unicodetools/data/ucd/dev/UnicodeData.txt | 7 ++++++ .../data/ucd/dev/VerticalOrientation.txt | 20 +++++++--------- .../ucd/dev/extracted/DerivedBidiClass.txt | 17 +++++++------ .../dev/extracted/DerivedCombiningClass.txt | 13 +++++----- .../dev/extracted/DerivedEastAsianWidth.txt | 17 +++++++------ .../dev/extracted/DerivedGeneralCategory.txt | 24 +++++++++---------- .../ucd/dev/extracted/DerivedLineBreak.txt | 14 +++++------ .../data/ucd/dev/extracted/DerivedName.txt | 11 +++++++-- 13 files changed, 102 insertions(+), 92 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index c396d009c..132f1d482 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-25, 12:52:16 GMT +# Date: 2023-11-01, 17:19:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2028,7 +2028,14 @@ A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK - -# Total code points: 304 +1FA89 ; 16.0 # HARP +1FA8F ; 16.0 # SHOVEL +1FABE ; 16.0 # LEAFLESS TREE +1FAC6 ; 16.0 # FINGERPRINT +1FADC ; 16.0 # ROOT VEGETABLE +1FADF ; 16.0 # SPLATTER +1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES + +# Total code points: 311 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 2e73593f4..50b24adb6 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-25, 12:52:46 GMT +# Date: 2023-11-01, 17:19:49 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -12643,11 +12643,10 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Grapheme_Base # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; Grapheme_Base # So [9] YO-YO..FLUTE -1FA90..1FABD ; Grapheme_Base # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; Grapheme_Base # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; Grapheme_Base # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; Grapheme_Base # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; Grapheme_Base # So [10] YO-YO..HARP +1FA8F..1FAC6 ; Grapheme_Base # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; Grapheme_Base # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; Grapheme_Base # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; Grapheme_Base # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; Grapheme_Base # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; Grapheme_Base # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON @@ -12662,7 +12661,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147908 +# Total code points: 147915 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 96bbaecc1..208fcb808 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-10-25, 12:52:55 GMT +# Date: 2023-11-01, 22:21:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2611,11 +2611,10 @@ FFFD ; A # So REPLACEMENT CHARACTER 1FA00..1FA53 ; N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; W # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; W # So [9] YO-YO..FLUTE -1FA90..1FABD ; W # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; W # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; W # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; W # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; W # So [10] YO-YO..HARP +1FA8F..1FAC6 ; W # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; W # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; W # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index dc75435f1..a4c7b5f90 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-25, 12:52:56 GMT +# Date: 2023-11-01, 17:09:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3592,17 +3592,16 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1FA6E..1FA6F ; ID # Cn [2] .. 1FA70..1FA7C ; ID # So [13] BALLET SHOES..CRUTCH 1FA7D..1FA7F ; ID # Cn [3] .. -1FA80..1FA88 ; ID # So [9] YO-YO..FLUTE -1FA89..1FA8F ; ID # Cn [7] .. -1FA90..1FABD ; ID # So [46] RINGED PLANET..WING -1FABE ; ID # Cn -1FABF..1FAC2 ; ID # So [4] GOOSE..PEOPLE HUGGING +1FA80..1FA89 ; ID # So [10] YO-YO..HARP +1FA8A..1FA8E ; ID # Cn [5] .. +1FA8F..1FAC2 ; ID # So [52] SHOVEL..PEOPLE HUGGING 1FAC3..1FAC5 ; EB # So [3] PREGNANT MAN..PERSON WITH CROWN -1FAC6..1FACD ; ID # Cn [8] .. -1FACE..1FADB ; ID # So [14] MOOSE..PEA POD -1FADC..1FADF ; ID # Cn [4] .. -1FAE0..1FAE8 ; ID # So [9] MELTING FACE..SHAKING FACE -1FAE9..1FAEF ; ID # Cn [7] .. +1FAC6 ; ID # So FINGERPRINT +1FAC7..1FACD ; ID # Cn [7] .. +1FACE..1FADC ; ID # So [15] MOOSE..ROOT VEGETABLE +1FADD..1FADE ; ID # Cn [2] .. +1FADF..1FAE9 ; ID # So [11] SPLATTER..FACE WITH BAGS UNDER EYES +1FAEA..1FAEF ; ID # Cn [6] .. 1FAF0..1FAF8 ; EB # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FAF9..1FAFF ; ID # Cn [7] .. 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 800611d20..c5bf640d3 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-10-25, 12:53:25 GMT +# Date: 2023-11-01, 17:20:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -619,11 +619,10 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Common # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; Common # So [9] YO-YO..FLUTE -1FA90..1FABD ; Common # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; Common # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; Common # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; Common # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; Common # So [10] YO-YO..HARP +1FA8F..1FAC6 ; Common # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; Common # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; Common # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; Common # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON @@ -631,7 +630,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8311 +# Total code points: 8318 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 9c89d3d50..f821e5cef 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -34037,6 +34037,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FA86;NESTING DOLLS;So;0;ON;;;;;N;;;;; 1FA87;MARACAS;So;0;ON;;;;;N;;;;; 1FA88;FLUTE;So;0;ON;;;;;N;;;;; +1FA89;HARP;So;0;ON;;;;;N;;;;; +1FA8F;SHOVEL;So;0;ON;;;;;N;;;;; 1FA90;RINGED PLANET;So;0;ON;;;;;N;;;;; 1FA91;CHAIR;So;0;ON;;;;;N;;;;; 1FA92;RAZOR;So;0;ON;;;;;N;;;;; @@ -34083,6 +34085,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FABB;HYACINTH;So;0;ON;;;;;N;;;;; 1FABC;JELLYFISH;So;0;ON;;;;;N;;;;; 1FABD;WING;So;0;ON;;;;;N;;;;; +1FABE;LEAFLESS TREE;So;0;ON;;;;;N;;;;; 1FABF;GOOSE;So;0;ON;;;;;N;;;;; 1FAC0;ANATOMICAL HEART;So;0;ON;;;;;N;;;;; 1FAC1;LUNGS;So;0;ON;;;;;N;;;;; @@ -34090,6 +34093,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FAC3;PREGNANT MAN;So;0;ON;;;;;N;;;;; 1FAC4;PREGNANT PERSON;So;0;ON;;;;;N;;;;; 1FAC5;PERSON WITH CROWN;So;0;ON;;;;;N;;;;; +1FAC6;FINGERPRINT;So;0;ON;;;;;N;;;;; 1FACE;MOOSE;So;0;ON;;;;;N;;;;; 1FACF;DONKEY;So;0;ON;;;;;N;;;;; 1FAD0;BLUEBERRIES;So;0;ON;;;;;N;;;;; @@ -34104,6 +34108,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FAD9;JAR;So;0;ON;;;;;N;;;;; 1FADA;GINGER ROOT;So;0;ON;;;;;N;;;;; 1FADB;PEA POD;So;0;ON;;;;;N;;;;; +1FADC;ROOT VEGETABLE;So;0;ON;;;;;N;;;;; +1FADF;SPLATTER;So;0;ON;;;;;N;;;;; 1FAE0;MELTING FACE;So;0;ON;;;;;N;;;;; 1FAE1;SALUTING FACE;So;0;ON;;;;;N;;;;; 1FAE2;FACE WITH OPEN EYES AND HAND OVER MOUTH;So;0;ON;;;;;N;;;;; @@ -34113,6 +34119,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1FAE6;BITING LIP;So;0;ON;;;;;N;;;;; 1FAE7;BUBBLES;So;0;ON;;;;;N;;;;; 1FAE8;SHAKING FACE;So;0;ON;;;;;N;;;;; +1FAE9;FACE WITH BAGS UNDER EYES;So;0;ON;;;;;N;;;;; 1FAF0;HAND WITH INDEX FINGER AND THUMB CROSSED;So;0;ON;;;;;N;;;;; 1FAF1;RIGHTWARDS HAND;So;0;ON;;;;;N;;;;; 1FAF2;LEFTWARDS HAND;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 4219b2fbf..ce38ba8d1 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-25, 12:53:29 GMT +# Date: 2023-11-01, 17:20:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2428,16 +2428,14 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1FA6E..1FA6F ; U # Cn [2] .. 1FA70..1FA7C ; U # So [13] BALLET SHOES..CRUTCH 1FA7D..1FA7F ; U # Cn [3] .. -1FA80..1FA88 ; U # So [9] YO-YO..FLUTE -1FA89..1FA8F ; U # Cn [7] .. -1FA90..1FABD ; U # So [46] RINGED PLANET..WING -1FABE ; U # Cn -1FABF..1FAC5 ; U # So [7] GOOSE..PERSON WITH CROWN -1FAC6..1FACD ; U # Cn [8] .. -1FACE..1FADB ; U # So [14] MOOSE..PEA POD -1FADC..1FADF ; U # Cn [4] .. -1FAE0..1FAE8 ; U # So [9] MELTING FACE..SHAKING FACE -1FAE9..1FAEF ; U # Cn [7] .. +1FA80..1FA89 ; U # So [10] YO-YO..HARP +1FA8A..1FA8E ; U # Cn [5] .. +1FA8F..1FAC6 ; U # So [56] SHOVEL..FINGERPRINT +1FAC7..1FACD ; U # Cn [7] .. +1FACE..1FADC ; U # So [15] MOOSE..ROOT VEGETABLE +1FADD..1FADE ; U # Cn [2] .. +1FADF..1FAE9 ; U # So [11] SPLATTER..FACE WITH BAGS UNDER EYES +1FAEA..1FAEF ; U # Cn [6] .. 1FAF0..1FAF8 ; U # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FAF9..1FAFF ; U # Cn [7] .. 1FB00..1FB92 ; R # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2a39d1660..42b6d4e95 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-25, 12:52:43 GMT +# Date: 2023-11-01, 17:19:47 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1197,8 +1197,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820232 code points not listed here. -# Total code points: 1096260 +# The above property value applies to 820225 code points not listed here. +# Total code points: 1096253 # ================================================ @@ -1964,16 +1964,15 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; ON # So [9] YO-YO..FLUTE -1FA90..1FABD ; ON # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; ON # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; ON # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; ON # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; ON # So [10] YO-YO..HARP +1FA8F..1FAC6 ; ON # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; ON # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; ON # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; ON # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6040 +# Total code points: 6047 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 475a4338a..c3bf46992 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-25, 12:52:46 GMT +# Date: 2023-11-01, 17:19:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2007,11 +2007,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; 0 # So [9] YO-YO..FLUTE -1FA90..1FABD ; 0 # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; 0 # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; 0 # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; 0 # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; 0 # So [10] YO-YO..HARP +1FA8F..1FAC6 ; 0 # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; 0 # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; 0 # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; 0 # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; 0 # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; 0 # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON @@ -2031,7 +2030,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826462 code points not listed here. +# The above property value applies to 826455 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index b9b084bdd..33ee28c58 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-25, 12:52:49 GMT +# Date: 2023-11-01, 22:21:05 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2070,8 +2070,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765980 code points not listed here. -# Total code points: 792618 +# The above property value applies to 765973 code points not listed here. +# Total code points: 792611 # ================================================ @@ -2526,11 +2526,10 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 1F93C..1F945 ; W # So [10] WRESTLERS..GOAL NET 1F947..1F9FF ; W # So [185] FIRST PLACE MEDAL..NAZAR AMULET 1FA70..1FA7C ; W # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; W # So [9] YO-YO..FLUTE -1FA90..1FABD ; W # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; W # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; W # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; W # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; W # So [10] YO-YO..HARP +1FA8F..1FAC6 ; W # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; W # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; W # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 20000..2A6DF ; W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -2543,7 +2542,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182417 +# Total code points: 182424 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 0fcfe447b..50455ef42 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-25, 12:52:49 GMT +# Date: 2023-11-01, 17:19:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -711,11 +711,10 @@ FFFE..FFFF ; Cn # [2] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. -1FA89..1FA8F ; Cn # [7] .. -1FABE ; Cn # -1FAC6..1FACD ; Cn # [8] .. -1FADC..1FADF ; Cn # [4] .. -1FAE9..1FAEF ; Cn # [7] .. +1FA8A..1FA8E ; Cn # [5] .. +1FAC7..1FACD ; Cn # [7] .. +1FADD..1FADE ; Cn # [2] .. +1FAEA..1FAEF ; Cn # [6] .. 1FAF9..1FAFF ; Cn # [7] .. 1FB93 ; Cn # 1FBCB..1FBEF ; Cn # [37] .. @@ -735,7 +734,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824414 +# Total code points: 824407 # ================================================ @@ -4227,16 +4226,15 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; So # [9] YO-YO..FLUTE -1FA90..1FABD ; So # [46] RINGED PLANET..WING -1FABF..1FAC5 ; So # [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; So # [14] MOOSE..PEA POD -1FAE0..1FAE8 ; So # [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; So # [10] YO-YO..HARP +1FA8F..1FAC6 ; So # [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; So # [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; So # [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; So # [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6644 +# Total code points: 6651 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 0ffc06dbb..e56d74720 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-25, 12:52:51 GMT +# Date: 2023-11-01, 17:19:52 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1833,11 +1833,11 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 1F9DE..1F9FF ; ID # So [34] GENIE..NAZAR AMULET 1FA60..1FA6D ; ID # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ID # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; ID # So [9] YO-YO..FLUTE -1FA90..1FABD ; ID # So [46] RINGED PLANET..WING -1FABF..1FAC2 ; ID # So [4] GOOSE..PEOPLE HUGGING -1FACE..1FADB ; ID # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; ID # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; ID # So [10] YO-YO..HARP +1FA8F..1FAC2 ; ID # So [52] SHOVEL..PEOPLE HUGGING +1FAC6 ; ID # So FINGERPRINT +1FACE..1FADC ; ID # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; ID # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 20000..2A6DF ; ID # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; ID # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; ID # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1848,7 +1848,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61977 code points not listed here. +# The above property value applies to 61970 code points not listed here. # Total code points: 172567 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index d69094d10..97dd0db7c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-25, 12:52:52 GMT +# Date: 2023-11-01, 17:19:52 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -43836,6 +43836,8 @@ FFFD ; REPLACEMENT CHARACTER 1FA86 ; NESTING DOLLS 1FA87 ; MARACAS 1FA88 ; FLUTE +1FA89 ; HARP +1FA8F ; SHOVEL 1FA90 ; RINGED PLANET 1FA91 ; CHAIR 1FA92 ; RAZOR @@ -43882,6 +43884,7 @@ FFFD ; REPLACEMENT CHARACTER 1FABB ; HYACINTH 1FABC ; JELLYFISH 1FABD ; WING +1FABE ; LEAFLESS TREE 1FABF ; GOOSE 1FAC0 ; ANATOMICAL HEART 1FAC1 ; LUNGS @@ -43889,6 +43892,7 @@ FFFD ; REPLACEMENT CHARACTER 1FAC3 ; PREGNANT MAN 1FAC4 ; PREGNANT PERSON 1FAC5 ; PERSON WITH CROWN +1FAC6 ; FINGERPRINT 1FACE ; MOOSE 1FACF ; DONKEY 1FAD0 ; BLUEBERRIES @@ -43903,6 +43907,8 @@ FFFD ; REPLACEMENT CHARACTER 1FAD9 ; JAR 1FADA ; GINGER ROOT 1FADB ; PEA POD +1FADC ; ROOT VEGETABLE +1FADF ; SPLATTER 1FAE0 ; MELTING FACE 1FAE1 ; SALUTING FACE 1FAE2 ; FACE WITH OPEN EYES AND HAND OVER MOUTH @@ -43912,6 +43918,7 @@ FFFD ; REPLACEMENT CHARACTER 1FAE6 ; BITING LIP 1FAE7 ; BUBBLES 1FAE8 ; SHAKING FACE +1FAE9 ; FACE WITH BAGS UNDER EYES 1FAF0 ; HAND WITH INDEX FINGER AND THUMB CROSSED 1FAF1 ; RIGHTWARDS HAND 1FAF2 ; LEFTWARDS HAND @@ -44480,6 +44487,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150117 +# Total code points: 150124 # EOF From f6e70b5b708ccfe50a297a6d0793c7d8e7edcac1 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 2 Nov 2023 13:36:15 -0700 Subject: [PATCH 2/5] =?UTF-8?q?Three=20Latin=20=CE=BBs=20(#525)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unicodetools/data/ucd/dev/CaseFolding.txt | 4 +- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +- .../data/ucd/dev/DerivedCoreProperties.txt | 62 +++++++++++-------- .../ucd/dev/DerivedNormalizationProps.txt | 14 +++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +- unicodetools/data/ucd/dev/LineBreak.txt | 4 +- unicodetools/data/ucd/dev/Scripts.txt | 6 +- unicodetools/data/ucd/dev/UnicodeData.txt | 5 +- .../data/ucd/dev/VerticalOrientation.txt | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 9 ++- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 +- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 +- .../dev/extracted/DerivedCombiningClass.txt | 6 +- .../dev/extracted/DerivedEastAsianWidth.txt | 6 +- .../dev/extracted/DerivedGeneralCategory.txt | 13 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +-- .../data/ucd/dev/extracted/DerivedName.txt | 7 ++- 17 files changed, 100 insertions(+), 71 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 50c19fe78..2f1047b36 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ # CaseFolding-16.0.0.txt -# Date: 2023-10-24, 18:13:17 GMT +# Date: 2023-11-02, 18:04:12 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1246,6 +1246,8 @@ A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S +A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA +A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 132f1d482..224746b79 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-01, 17:19:29 GMT +# Date: 2023-11-02, 18:04:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2014,6 +2014,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 0CDC ; 16.0 # KANNADA ARCHAIC SHRII 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO 10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA 10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA @@ -2036,6 +2037,6 @@ A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER 1FADF ; 16.0 # SPLATTER 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES -# Total code points: 311 +# Total code points: 314 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 50b24adb6..1e6234e67 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-01, 17:19:49 GMT +# Date: 2023-11-02, 18:04:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -835,7 +835,7 @@ A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Alphabetic # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1420,7 +1420,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138612 +# Total code points: 138615 # ================================================ @@ -2057,6 +2057,7 @@ A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2117,7 +2118,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2568 +# Total code points: 2569 # ================================================ @@ -2731,6 +2732,8 @@ A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPIT A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Uppercase # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -2779,7 +2782,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1976 +# Total code points: 1978 # ================================================ @@ -2890,7 +2893,7 @@ A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETT A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Cased # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2964,7 +2967,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4575 +# Total code points: 4578 # ================================================ @@ -4080,6 +4083,8 @@ A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HOR A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -4094,7 +4099,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1458 +# Total code points: 1460 # ================================================ @@ -4175,7 +4180,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 018C ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH TOPBAR 0192 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH HOOK 0195 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HV -0199..019A ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +0199..019B ; Changes_When_Uppercased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE 019E ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG 01A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN 01A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OI @@ -4715,6 +4720,7 @@ A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIA A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Uppercased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -4733,7 +4739,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1550 +# Total code points: 1552 # ================================================ @@ -4814,7 +4820,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 018C ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH TOPBAR 0192 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH HOOK 0195 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HV -0199..019A ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +0199..019B ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE 019E ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG 01A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN 01A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OI @@ -5353,6 +5359,7 @@ A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIA A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Titlecased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -5371,7 +5378,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1477 +# Total code points: 1479 # ================================================ @@ -5988,6 +5995,8 @@ A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HOR A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H AB70..ABBF ; Changes_When_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST @@ -6005,7 +6014,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1531 +# Total code points: 1533 # ================================================ @@ -6021,8 +6030,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 00D8..00F6 ; Changes_When_Casemapped # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..0137 ; Changes_When_Casemapped # L& [64] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER K WITH CEDILLA 0139..018C ; Changes_When_Casemapped # L& [84] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER D WITH TOPBAR -018E..019A ; Changes_When_Casemapped # L& [13] LATIN CAPITAL LETTER REVERSED E..LATIN SMALL LETTER L WITH BAR -019C..01A9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER ESH +018E..01A9 ; Changes_When_Casemapped # L& [28] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER ESH 01AC..01B9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER T WITH HOOK..LATIN SMALL LETTER EZH REVERSED 01BC..01BD ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER TONE FIVE..LATIN SMALL LETTER TONE FIVE 01BF ; Changes_When_Casemapped # L& LATIN LETTER WYNN @@ -6120,7 +6128,7 @@ A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH D A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D6..A7D9 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN SMALL LETTER SIGMOID S +A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -6147,7 +6155,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2977 +# Total code points: 2981 # ================================================ @@ -6526,7 +6534,7 @@ A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; ID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -6915,7 +6923,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137189 +# Total code points: 137192 # ================================================ @@ -7602,7 +7610,7 @@ A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; ID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -8295,7 +8303,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140399 +# Total code points: 140402 # ================================================ @@ -8670,7 +8678,7 @@ A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; XID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -9064,7 +9072,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137166 +# Total code points: 137169 # ================================================ @@ -9746,7 +9754,7 @@ A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; XID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -10445,7 +10453,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140380 +# Total code points: 140383 # ================================================ @@ -11842,7 +11850,7 @@ A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; Grapheme_Base # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Grapheme_Base # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Grapheme_Base # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Grapheme_Base # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Grapheme_Base # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -12661,7 +12669,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147915 +# Total code points: 147918 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 876faa2eb..be1b382fe 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2023-10-25, 12:52:52 GMT +# Date: 2023-11-02, 18:04:54 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -5381,6 +5381,8 @@ A7CC ; NFKC_CF; A7CD # L& LATIN CAPITAL LETTER S WITH D A7D0 ; NFKC_CF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; NFKC_CF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_CF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; NFKC_CF; A7DB # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; NFKC_CF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2 ; NFKC_CF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_CF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_CF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -9040,7 +9042,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] .... -# Total code points: 10516 +# Total code points: 10518 # ================================================ @@ -11477,6 +11479,8 @@ A7CC ; NFKC_SCF; A7CD # L& LATIN CAPITAL LETTER S WITH A7D0 ; NFKC_SCF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; NFKC_SCF; A7DB # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; NFKC_SCF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2 ; NFKC_SCF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_SCF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_SCF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -15136,7 +15140,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10478 +# Total code points: 10480 # ================================================ @@ -15909,6 +15913,8 @@ A7CB..A7CC ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER RAM A7D0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -16150,6 +16156,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10516 +# Total code points: 10518 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 208fcb808..0884ae367 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-01, 22:21:11 GMT +# Date: 2023-11-02, 18:04:57 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1546,7 +1546,7 @@ A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index a4c7b5f90..927bbfec8 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-01, 17:09:28 GMT +# Date: 2023-11-02, 18:04:59 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1581,7 +1581,7 @@ A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; AL # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; AL # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index c5bf640d3..f10867ae6 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-01, 17:20:11 GMT +# Date: 2023-11-02, 18:05:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -676,7 +676,7 @@ A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; Latin # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Latin # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Latin # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -699,7 +699,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1484 +# Total code points: 1487 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f821e5cef..d99f77a7a 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -409,7 +409,7 @@ 0198;LATIN CAPITAL LETTER K WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER K HOOK;;;0199; 0199;LATIN SMALL LETTER K WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER K HOOK;;0198;;0198 019A;LATIN SMALL LETTER L WITH BAR;Ll;0;L;;;;;N;LATIN SMALL LETTER BARRED L;;023D;;023D -019B;LATIN SMALL LETTER LAMBDA WITH STROKE;Ll;0;L;;;;;N;LATIN SMALL LETTER BARRED LAMBDA;;;; +019B;LATIN SMALL LETTER LAMBDA WITH STROKE;Ll;0;L;;;;;N;LATIN SMALL LETTER BARRED LAMBDA;;A7DC;;A7DC 019C;LATIN CAPITAL LETTER TURNED M;Lu;0;L;;;;;N;;;;026F; 019D;LATIN CAPITAL LETTER N WITH LEFT HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER N HOOK;;;0272; 019E;LATIN SMALL LETTER N WITH LONG RIGHT LEG;Ll;0;L;;;;;N;;;0220;;0220 @@ -14276,6 +14276,9 @@ A7D6;LATIN CAPITAL LETTER MIDDLE SCOTS S;Lu;0;L;;;;;N;;;;A7D7; A7D7;LATIN SMALL LETTER MIDDLE SCOTS S;Ll;0;L;;;;;N;;;A7D6;;A7D6 A7D8;LATIN CAPITAL LETTER SIGMOID S;Lu;0;L;;;;;N;;;;A7D9; A7D9;LATIN SMALL LETTER SIGMOID S;Ll;0;L;;;;;N;;;A7D8;;A7D8 +A7DA;LATIN CAPITAL LETTER LAMBDA;Lu;0;L;;;;;N;;;;A7DB; +A7DB;LATIN SMALL LETTER LAMBDA;Ll;0;L;;;;;N;;;A7DA;;A7DA +A7DC;LATIN CAPITAL LETTER LAMBDA WITH STROKE;Lu;0;L;;;;;N;;;;019B; A7F2;MODIFIER LETTER CAPITAL C;Lm;0;L; 0043;;;;N;;;;; A7F3;MODIFIER LETTER CAPITAL F;Lm;0;L; 0046;;;;N;;;;; A7F4;MODIFIER LETTER CAPITAL Q;Lm;0;L; 0051;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index ce38ba8d1..770e7efc3 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-01, 17:20:14 GMT +# Date: 2023-11-02, 18:05:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1358,7 +1358,7 @@ A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; R # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; R # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; R # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; R # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; R # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; R # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; R # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 17e9ced38..37b39bed1 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-25, 12:53:26 GMT +# Date: 2023-11-02, 18:05:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1242,6 +1242,7 @@ A7D3 ; Lower # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lower # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lower # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Lower # L& LATIN SMALL LETTER LAMBDA A7F2..A7F4 ; Lower # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1302,7 +1303,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2521 +# Total code points: 2522 # ================================================ @@ -1914,6 +1915,8 @@ A7CB..A7CC ; Upper # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL L A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Upper # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Upper # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Upper # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -1962,7 +1965,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1961 +# Total code points: 1963 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 65682200c..5632ffc40 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-25, 12:53:29 GMT +# Date: 2023-11-02, 18:05:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -988,7 +988,7 @@ A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; ALetter # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; ALetter # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; ALetter # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; ALetter # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1329,7 +1329,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29712 +# Total code points: 29715 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 42b6d4e95..2bb43edb3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-01, 17:19:47 GMT +# Date: 2023-11-02, 18:04:45 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -664,7 +664,7 @@ A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; L # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1197,7 +1197,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820225 code points not listed here. +# The above property value applies to 820222 code points not listed here. # Total code points: 1096253 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c3bf46992..0527445ea 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-01, 17:19:48 GMT +# Date: 2023-11-02, 18:04:47 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1095,7 +1095,7 @@ A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; 0 # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; 0 # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; 0 # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; 0 # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; 0 # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; 0 # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; 0 # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -2030,7 +2030,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826455 code points not listed here. +# The above property value applies to 826452 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 33ee28c58..0fd26f476 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-01, 22:21:05 GMT +# Date: 2023-11-02, 18:04:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1183,7 +1183,7 @@ A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; N # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -2070,7 +2070,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765973 code points not listed here. +# The above property value applies to 765970 code points not listed here. # Total code points: 792611 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 50455ef42..e1b983d86 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-01, 17:19:50 GMT +# Date: 2023-11-02, 18:04:51 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -298,7 +298,7 @@ A6F8..A6FF ; Cn # [8] .. A7CE..A7CF ; Cn # [2] .. A7D2 ; Cn # A7D4 ; Cn # -A7DA..A7F1 ; Cn # [24] .. +A7DD..A7F1 ; Cn # [21] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. @@ -734,7 +734,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824407 +# Total code points: 824404 # ================================================ @@ -1345,6 +1345,8 @@ A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S +A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA +A7DC ; Lu # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -1390,7 +1392,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1856 +# Total code points: 1858 # ================================================ @@ -2007,6 +2009,7 @@ A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Ll # LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Ll # LATIN SMALL LETTER SIGMOID S +A7DB ; Ll # LATIN SMALL LETTER LAMBDA A7F6 ; Ll # LATIN SMALL LETTER REVERSED HALF H A7FA ; Ll # LATIN LETTER SMALL CAPITAL TURNED M AB30..AB5A ; Ll # [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG @@ -2058,7 +2061,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2257 +# Total code points: 2258 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index e56d74720..644dcf2cc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-01, 17:19:52 GMT +# Date: 2023-11-02, 18:04:53 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762422 code points not listed here. -# Total code points: 899890 +# The above property value applies to 762419 code points not listed here. +# Total code points: 899887 # ================================================ @@ -1141,7 +1141,7 @@ A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; AL # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1605,7 +1605,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 21961 +# Total code points: 21964 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 97dd0db7c..d86e783fb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-01, 17:19:52 GMT +# Date: 2023-11-02, 18:04:54 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -14249,6 +14249,9 @@ A7D6 ; LATIN CAPITAL LETTER MIDDLE SCOTS S A7D7 ; LATIN SMALL LETTER MIDDLE SCOTS S A7D8 ; LATIN CAPITAL LETTER SIGMOID S A7D9 ; LATIN SMALL LETTER SIGMOID S +A7DA ; LATIN CAPITAL LETTER LAMBDA +A7DB ; LATIN SMALL LETTER LAMBDA +A7DC ; LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2 ; MODIFIER LETTER CAPITAL C A7F3 ; MODIFIER LETTER CAPITAL F A7F4 ; MODIFIER LETTER CAPITAL Q @@ -44487,6 +44490,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150124 +# Total code points: 150127 # EOF From 4d1462be3e9b38bab5b7c6e8751bea9d90554329 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 2 Nov 2023 15:35:20 -0700 Subject: [PATCH 3/5] Blank character for Khitan Small Script (#599) --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 26 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 5 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + .../data/ucd/dev/VerticalOrientation.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++--- .../dev/extracted/DerivedCombiningClass.txt | 6 ++--- .../dev/extracted/DerivedEastAsianWidth.txt | 10 +++---- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++-- 15 files changed, 57 insertions(+), 48 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 224746b79..fc1a93c77 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-02, 18:04:14 GMT +# Date: 2023-11-02, 20:54:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2025,6 +2025,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO 11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE +18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CEB0..1CEB3 ; 16.0 # [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN @@ -2037,6 +2038,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FADF ; 16.0 # SPLATTER 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES -# Total code points: 314 +# Total code points: 315 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1e6234e67..a9026a3f8 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-02, 18:04:48 GMT +# Date: 2023-11-02, 20:54:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1301,7 +1301,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Alphabetic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1420,7 +1420,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138615 +# Total code points: 138616 # ================================================ @@ -6815,7 +6815,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -6923,7 +6923,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137192 +# Total code points: 137193 # ================================================ @@ -8158,7 +8158,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8303,7 +8303,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140402 +# Total code points: 140403 # ================================================ @@ -8964,7 +8964,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -9072,7 +9072,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137169 +# Total code points: 137170 # ================================================ @@ -10308,7 +10308,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10453,7 +10453,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140383 +# Total code points: 140384 # ================================================ @@ -12467,7 +12467,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FF0..16FF1 ; Grapheme_Base # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Grapheme_Base # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -12669,7 +12669,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147918 +# Total code points: 147919 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 0884ae367..7a381c952 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 18:04:57 GMT +# Date: 2023-11-02, 20:54:26 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2329,6 +2329,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 927bbfec8..fd3b6f0e5 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-02, 18:04:59 GMT +# Date: 2023-11-02, 20:54:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3235,6 +3235,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f10867ae6..30dc63c9b 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-02, 18:05:28 GMT +# Date: 2023-11-02, 20:54:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2951,8 +2951,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER 18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; Khitan_Small_Script # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -# Total code points: 471 +# Total code points: 472 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index d99f77a7a..c4b40b142 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -27380,6 +27380,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD3;KHITAN SMALL SCRIPT CHARACTER-18CD3;Lo;0;L;;;;;N;;;;; 18CD4;KHITAN SMALL SCRIPT CHARACTER-18CD4;Lo;0;L;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; +18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; 18D08;;Lo;0;L;;;;;N;;;;; 1AFF0;KATAKANA LETTER MINNAN TONE-2;Lm;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 770e7efc3..e27b3906d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-02, 18:05:32 GMT +# Date: 2023-11-02, 20:54:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2162,7 +2162,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 187F8..187FF ; U # Cn [8] .. 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CD6..18CFF ; U # Cn [42] .. +18CD6..18CFE ; U # Cn [41] .. +18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D08 ; U # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 18D09..18D7F ; U # Cn [119] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 37b39bed1..2b952b494 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-02, 18:05:29 GMT +# Date: 2023-11-02, 20:54:48 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2486,7 +2486,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; OLetter # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; OLetter # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2559,7 +2559,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132831 +# Total code points: 132832 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 5632ffc40..a94eebe59 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-02, 18:05:32 GMT +# Date: 2023-11-02, 20:54:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1237,6 +1237,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK +18CFF ; ALetter # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -1329,7 +1330,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29715 +# Total code points: 29716 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2bb43edb3..6af7d2a38 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-02, 18:04:45 GMT +# Date: 2023-11-02, 20:54:19 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1084,7 +1084,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1197,7 +1197,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820222 code points not listed here. +# The above property value applies to 820221 code points not listed here. # Total code points: 1096253 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 0527445ea..03a0d7905 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-02, 18:04:47 GMT +# Date: 2023-11-02, 20:54:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1814,7 +1814,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; 0 # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; 0 # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2030,7 +2030,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826452 code points not listed here. +# The above property value applies to 826451 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 0fd26f476..808770a64 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 18:04:50 GMT +# Date: 2023-11-02, 20:54:22 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2070,8 +2070,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765970 code points not listed here. -# Total code points: 792611 +# The above property value applies to 765969 code points not listed here. +# Total code points: 792610 # ================================================ @@ -2474,7 +2474,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; W # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; W # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2542,7 +2542,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 60482 code points not listed here. -# Total code points: 182424 +# Total code points: 182425 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index e1b983d86..e13243dcc 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-02, 18:04:51 GMT +# Date: 2023-11-02, 20:54:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -568,7 +568,7 @@ FFFE..FFFF ; Cn # [2] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. 187F8..187FF ; Cn # [8] .. -18CD6..18CFF ; Cn # [42] .. +18CD6..18CFE ; Cn # [41] .. 18D09..1AFEF ; Cn # [8935] .. 1AFF4 ; Cn # 1AFFC ; Cn # @@ -734,7 +734,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824404 +# Total code points: 824403 # ================================================ @@ -2619,7 +2619,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Lo # [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2686,7 +2686,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132400 +# Total code points: 132401 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 644dcf2cc..93139ad6c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-02, 18:04:53 GMT +# Date: 2023-11-02, 20:54:24 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762419 code points not listed here. -# Total code points: 899887 +# The above property value applies to 762418 code points not listed here. +# Total code points: 899886 # ================================================ @@ -1442,6 +1442,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16F50 ; AL # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1605,7 +1606,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 21964 +# Total code points: 21965 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index d86e783fb..35886cefb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-02, 18:04:54 GMT +# Date: 2023-11-02, 20:54:24 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -37575,6 +37575,7 @@ FFFD ; REPLACEMENT CHARACTER 18AFE ; TANGUT COMPONENT-767 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* +18CFF ; KHITAN SMALL SCRIPT CHARACTER-* 18D00..18D08 ; TANGUT IDEOGRAPH-* 1AFF0 ; KATAKANA LETTER MINNAN TONE-2 1AFF1 ; KATAKANA LETTER MINNAN TONE-3 @@ -44490,6 +44491,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150127 +# Total code points: 150128 # EOF From d75d6c634bfa41de3806177b502678329f569027 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 2 Nov 2023 15:53:33 -0700 Subject: [PATCH 4/5] Two arrow symbols used in Egyptology (#593) --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 5 +++-- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++++-- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 2 ++ unicodetools/data/ucd/dev/VerticalOrientation.txt | 3 ++- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 10 ++++++---- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 6 ++++-- 13 files changed, 45 insertions(+), 28 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index fc1a93c77..7ce11dca2 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-11-02, 20:54:00 GMT +# Date: 2023-11-02, 22:36:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2030,6 +2030,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; 16.0 # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1FA89 ; 16.0 # HARP 1FA8F ; 16.0 # SHOVEL 1FABE ; 16.0 # LEAFLESS TREE @@ -2038,6 +2039,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FADF ; 16.0 # SPLATTER 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES -# Total code points: 315 +# Total code points: 317 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index a9026a3f8..712020964 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-02, 20:54:21 GMT +# Date: 2023-11-02, 22:37:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -12648,6 +12648,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F860..1F887 ; Grapheme_Base # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; Grapheme_Base # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; Grapheme_Base # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Grapheme_Base # So [13] BALLET SHOES..CRUTCH @@ -12669,7 +12670,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147919 +# Total code points: 147921 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 7a381c952..6e3055855 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 20:54:26 GMT +# Date: 2023-11-02, 22:37:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2603,6 +2603,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; N # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; N # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C..1F93A ; W # So [47] PINCHED FINGERS..FENCER 1F93B ; N # So MODERN PENTATHLON diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index fd3b6f0e5..fa042d08c 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-02, 20:54:27 GMT +# Date: 2023-11-02, 22:37:08 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3561,7 +3561,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F8AE..1F8AF ; ID # Cn [2] .. 1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK -1F8B3..1F8FF ; ID # Cn [77] .. +1F8B3..1F8BF ; ID # Cn [13] .. +1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F8C2..1F8FF ; ID # Cn [62] .. 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C ; EB # So PINCHED FINGERS 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 30dc63c9b..ab7582768 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-02, 20:54:48 GMT +# Date: 2023-11-02, 22:37:26 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -616,6 +616,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; Common # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; Common # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Common # So [13] BALLET SHOES..CRUTCH @@ -630,7 +631,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8318 +# Total code points: 8320 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index c4b40b142..b36c88b57 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -33665,6 +33665,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F8B0;ARROW POINTING UPWARDS THEN NORTH WEST;So;0;ON;;;;;N;;;;; 1F8B1;ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST;So;0;ON;;;;;N;;;;; 1F8B2;RIGHTWARDS ARROW WITH LOWER HOOK;So;0;ON;;;;;N;;;;; +1F8C0;LEFTWARDS ARROW FROM DOWNWARDS ARROW;So;0;ON;;;;;N;;;;; +1F8C1;RIGHTWARDS ARROW FROM DOWNWARDS ARROW;So;0;ON;;;;;N;;;;; 1F900;CIRCLED CROSS FORMEE WITH FOUR DOTS;So;0;ON;;;;;N;;;;; 1F901;CIRCLED CROSS FORMEE WITH TWO DOTS;So;0;ON;;;;;N;;;;; 1F902;CIRCLED CROSS FORMEE;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index e27b3906d..ea33b82a5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-02, 20:54:50 GMT +# Date: 2023-11-02, 22:37:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2422,6 +2422,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1F860..1F887 ; R # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; R # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; R # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; R # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F9FF ; U # So [256] CIRCLED CROSS FORMEE WITH FOUR DOTS..NAZAR AMULET 1FA00..1FA53 ; U # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FA54..1FA5F ; U # Cn [12] .. diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 6af7d2a38..c7a34de4d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-02, 20:54:19 GMT +# Date: 2023-11-02, 22:37:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1197,8 +1197,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820221 code points not listed here. -# Total code points: 1096253 +# The above property value applies to 820219 code points not listed here. +# Total code points: 1096251 # ================================================ @@ -1961,6 +1961,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; ON # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH @@ -1972,7 +1973,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6047 +# Total code points: 6049 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 03a0d7905..f1094f700 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-02, 20:54:21 GMT +# Date: 2023-11-02, 22:37:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2004,6 +2004,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F860..1F887 ; 0 # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; 0 # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; 0 # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; 0 # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH @@ -2030,7 +2031,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826451 code points not listed here. +# The above property value applies to 826449 code points not listed here. # Total code points: 1113182 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 808770a64..4b8aaada8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 20:54:22 GMT +# Date: 2023-11-02, 22:37:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2059,6 +2059,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; N # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; N # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F93B ; N # So MODERN PENTATHLON 1F946 ; N # So RIFLE @@ -2070,7 +2071,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 765969 code points not listed here. +# The above property value applies to 765967 code points not listed here. # Total code points: 792610 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index e13243dcc..f4887b07e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-02, 20:54:23 GMT +# Date: 2023-11-02, 22:37:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -707,7 +707,8 @@ FFFE..FFFF ; Cn # [2] .. 1F85A..1F85F ; Cn # [6] .. 1F888..1F88F ; Cn # [8] .. 1F8AE..1F8AF ; Cn # [2] .. -1F8B3..1F8FF ; Cn # [77] .. +1F8B3..1F8BF ; Cn # [13] .. +1F8C2..1F8FF ; Cn # [62] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. @@ -734,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824403 +# Total code points: 824401 # ================================================ @@ -4226,6 +4227,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B2 ; So # [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; So # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH @@ -4237,7 +4239,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6651 +# Total code points: 6653 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 93139ad6c..d55083a99 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-02, 20:54:24 GMT +# Date: 2023-11-02, 22:37:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1601,12 +1601,13 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 21965 +# Total code points: 21967 # ================================================ @@ -1849,8 +1850,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61970 code points not listed here. -# Total code points: 172567 +# The above property value applies to 61968 code points not listed here. +# Total code points: 172565 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 35886cefb..5723b4043 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-02, 20:54:24 GMT +# Date: 2023-11-02, 22:37:04 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -43464,6 +43464,8 @@ FFFD ; REPLACEMENT CHARACTER 1F8B0 ; ARROW POINTING UPWARDS THEN NORTH WEST 1F8B1 ; ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F8B2 ; RIGHTWARDS ARROW WITH LOWER HOOK +1F8C0 ; LEFTWARDS ARROW FROM DOWNWARDS ARROW +1F8C1 ; RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900 ; CIRCLED CROSS FORMEE WITH FOUR DOTS 1F901 ; CIRCLED CROSS FORMEE WITH TWO DOTS 1F902 ; CIRCLED CROSS FORMEE @@ -44491,6 +44493,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150128 +# Total code points: 150130 # EOF From 974e99297fc2dc071aea2d6ff75da36c08be04e2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 2 Nov 2023 21:08:32 -0700 Subject: [PATCH 5/5] InSC-InPC-gc invariant test (#536) --- .../data/ucd/dev/DerivedCoreProperties.txt | 27 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 +++-- .../data/ucd/dev/IndicPositionalCategory.txt | 7 +++-- .../data/ucd/dev/IndicSyllabicCategory.txt | 6 +++-- unicodetools/data/ucd/dev/LineBreak.txt | 6 +++-- unicodetools/data/ucd/dev/PropList.txt | 6 +++-- unicodetools/data/ucd/dev/Scripts.txt | 6 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- .../data/ucd/dev/VerticalOrientation.txt | 6 +++-- .../dev/auxiliary/GraphemeBreakProperty.txt | 10 ++++--- .../dev/auxiliary/SentenceBreakProperty.txt | 6 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 10 ++++--- .../dev/extracted/DerivedCombiningClass.txt | 6 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 6 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 7 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 6 +++-- .../unicode/text/UCD/UnicodeInvariantTest.txt | 8 ++++++ 19 files changed, 96 insertions(+), 51 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 712020964..7de895986 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-02, 22:37:01 GMT +# Date: 2023-11-02, 22:55:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1175,7 +1175,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B8 ; Alphabetic # Lo TAKRI LETTER ARCHAIC KHA 11700..1171A ; Alphabetic # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Alphabetic # Mc AHOM VOWEL SIGN E @@ -3381,7 +3383,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA -1171D..1171F ; Case_Ignorable # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Case_Ignorable # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Case_Ignorable # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Case_Ignorable # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -3475,7 +3478,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2723 +# Total code points: 2722 # ================================================ @@ -8005,7 +8008,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; ID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; ID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; ID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; ID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; ID_Continue # Mc AHOM VOWEL SIGN E @@ -10155,7 +10160,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; XID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; XID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; XID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; XID_Continue # Mc AHOM VOWEL SIGN E @@ -10813,7 +10820,8 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA -1171D..1171F ; Grapheme_Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Grapheme_Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Grapheme_Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Grapheme_Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Grapheme_Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Grapheme_Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -10892,7 +10900,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2134 +# Total code points: 2133 # ================================================ @@ -12325,6 +12333,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 116C0..116C9 ; Grapheme_Base # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; Grapheme_Base # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; Grapheme_Base # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171E ; Grapheme_Base # Mc AHOM CONSONANT SIGN MEDIAL RA 11720..11721 ; Grapheme_Base # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; Grapheme_Base # Mc AHOM VOWEL SIGN E 11730..11739 ; Grapheme_Base # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE @@ -12670,7 +12679,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 147921 +# Total code points: 147922 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 6e3055855..7db5471be 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 22:37:07 GMT +# Date: 2023-11-02, 22:55:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2137,7 +2137,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; N # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; N # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; N # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; N # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index d29c2eec9..9904abd1f 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ # IndicPositionalCategory-16.0.0.txt -# Date: 2023-10-24, 20:42:05 GMT +# Date: 2023-11-02, 22:55:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -161,7 +161,6 @@ 0CF3 ; Right # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; Right # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3E..0D40 ; Right # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II -0D41..0D42 ; Right # Mn [2] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN UU 0D57 ; Right # Mc MALAYALAM AU LENGTH MARK 0D82..0D83 ; Right # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA 0DCF..0DD1 ; Right # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA @@ -621,7 +620,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 0C62..0C63 ; Bottom # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0CBC ; Bottom # Mn KANNADA SIGN NUKTA 0CE2..0CE3 ; Bottom # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D43..0D44 ; Bottom # Mn [2] MALAYALAM VOWEL SIGN VOCALIC R..MALAYALAM VOWEL SIGN VOCALIC RR +0D41..0D44 ; Bottom # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D62..0D63 ; Bottom # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0DD4 ; Bottom # Mn SINHALA VOWEL SIGN KETTI PAA-PILLA 0DD6 ; Bottom # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA @@ -796,7 +795,7 @@ A9BF ; Bottom_And_Left # Mc JAVANESE CONSONANT SIGN CAKRA # Indic_Positional_Category=Top_And_Bottom_And_Left 103C ; Top_And_Bottom_And_Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA -1171E ; Top_And_Bottom_And_Left # Mn AHOM CONSONANT SIGN MEDIAL RA +1171E ; Top_And_Bottom_And_Left # Mc AHOM CONSONANT SIGN MEDIAL RA # Indic_Positional_Category=Overstruck diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 2cc7ff452..0bc45f818 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-10-24, 20:42:05 GMT +# Date: 2023-11-02, 22:55:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1051,7 +1051,9 @@ A9BD ; Consonant_Medial # Mn JAVANESE CONSONANT SIGN KERET A9BE..A9BF ; Consonant_Medial # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA AA33..AA34 ; Consonant_Medial # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA35..AA36 ; Consonant_Medial # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -1171D..1171F ; Consonant_Medial # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Consonant_Medial # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Consonant_Medial # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Consonant_Medial # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11940 ; Consonant_Medial # Mc DIVES AKURU MEDIAL YA 11942 ; Consonant_Medial # Mc DIVES AKURU MEDIAL RA 11A3B..11A3E ; Consonant_Medial # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index fa042d08c..e6c671db9 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-02, 22:37:08 GMT +# Date: 2023-11-02, 22:55:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3008,7 +3008,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; SA # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; SA # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; SA # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; SA # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; SA # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; SA # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; SA # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; SA # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index a77d2d267..631e67402 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-10-24, 21:03:48 GMT +# Date: 2023-10-25, 13:24:19 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -765,7 +765,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA 116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Other_Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index ab7582768..7f1996daa 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-11-02, 22:37:26 GMT +# Date: 2023-11-02, 22:56:02 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2618,7 +2618,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 11700..1171A ; Ahom # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; Ahom # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Ahom # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Ahom # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Ahom # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Ahom # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Ahom # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Ahom # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index b36c88b57..ae6611741 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -20879,7 +20879,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11719;AHOM LETTER JHA;Lo;0;L;;;;;N;;;;; 1171A;AHOM LETTER ALTERNATE BA;Lo;0;L;;;;;N;;;;; 1171D;AHOM CONSONANT SIGN MEDIAL LA;Mn;0;NSM;;;;;N;;;;; -1171E;AHOM CONSONANT SIGN MEDIAL RA;Mn;0;NSM;;;;;N;;;;; +1171E;AHOM CONSONANT SIGN MEDIAL RA;Mc;0;L;;;;;N;;;;; 1171F;AHOM CONSONANT SIGN MEDIAL LIGATING RA;Mn;0;NSM;;;;;N;;;;; 11720;AHOM VOWEL SIGN A;Mc;0;L;;;;;N;;;;; 11721;AHOM VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index ea33b82a5..0ee0cfade 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-11-02, 22:37:29 GMT +# Date: 2023-11-02, 22:56:06 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1963,7 +1963,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 116C0..116C9 ; R # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; R # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; R # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; R # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; R # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; R # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; R # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; R # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; R # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; R # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 2d25d9af1..f2ee0f839 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-10-25, 12:52:55 GMT +# Date: 2023-11-02, 22:55:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -381,7 +381,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 116AD ; Extend # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Extend # Mn TAKRI SIGN NUKTA -1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -461,7 +462,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2139 +# Total code points: 2138 # ================================================ @@ -605,6 +606,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 116AC ; SpacingMark # Mc TAKRI SIGN VISARGA 116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +1171E ; SpacingMark # Mc AHOM CONSONANT SIGN MEDIAL RA 11726 ; SpacingMark # Mc AHOM VOWEL SIGN E 1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II 11838 ; SpacingMark # Mc DOGRA SIGN VISARGA @@ -637,7 +639,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 395 +# Total code points: 396 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 2b952b494..cb0e3925d 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-11-02, 20:54:48 GMT +# Date: 2023-11-02, 22:56:03 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -461,7 +461,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B6 ; Extend # Mc TAKRI SIGN VIRAMA 116B7 ; Extend # Mn TAKRI SIGN NUKTA -1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Extend # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Extend # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Extend # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index a94eebe59..2a4e4af90 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-11-02, 20:54:50 GMT +# Date: 2023-11-02, 22:56:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -497,7 +497,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B6 ; Extend # Mc TAKRI SIGN VIRAMA 116B7 ; Extend # Mn TAKRI SIGN NUKTA -1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Extend # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Extend # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Extend # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index c7a34de4d..0a8c86a3e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-11-02, 22:37:00 GMT +# Date: 2023-11-02, 22:55:20 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -943,6 +943,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171E ; L # Mc AHOM CONSONANT SIGN MEDIAL RA 11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; L # Mc AHOM VOWEL SIGN E 11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE @@ -1198,7 +1199,7 @@ F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. # The above property value applies to 820219 code points not listed here. -# Total code points: 1096251 +# Total code points: 1096252 # ================================================ @@ -2300,7 +2301,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 116AD ; NSM # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; NSM # Mn TAKRI SIGN NUKTA -1171D..1171F ; NSM # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -2375,7 +2377,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2002 +# Total code points: 2001 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index f1094f700..143a81168 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-11-02, 22:37:01 GMT +# Date: 2023-11-02, 22:55:22 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1636,7 +1636,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 116C0..116C9 ; 0 # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; 0 # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; 0 # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; 0 # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; 0 # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; 0 # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; 0 # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; 0 # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; 0 # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; 0 # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 4b8aaada8..329e2b4fd 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-11-02, 22:37:03 GMT +# Date: 2023-11-02, 22:55:25 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1661,7 +1661,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; N # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; N # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; N # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; N # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index f4887b07e..33b38404b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-11-02, 22:37:03 GMT +# Date: 2023-11-02, 22:55:26 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2967,7 +2967,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 116AD ; Mn # TAKRI VOWEL SIGN AA 116B0..116B5 ; Mn # [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Mn # TAKRI SIGN NUKTA -1171D..1171F ; Mn # [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Mn # AHOM CONSONANT SIGN MEDIAL LA +1171F ; Mn # AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Mn # [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Mn # [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Mn # [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -3042,7 +3043,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1994 +# Total code points: 1993 # ================================================ @@ -3210,6 +3211,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 116AC ; Mc # TAKRI SIGN VISARGA 116AE..116AF ; Mc # [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B6 ; Mc # TAKRI SIGN VIRAMA +1171E ; Mc # AHOM CONSONANT SIGN MEDIAL RA 11720..11721 ; Mc # [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; Mc # AHOM VOWEL SIGN E 1182C..1182E ; Mc # [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II @@ -3243,7 +3245,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 452 +# Total code points: 453 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 4ae6df297..1df4e026a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-10-24, 21:03:40 GMT +# Date: 2023-11-02, 22:55:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -491,7 +491,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 116AD ; T # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; T # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; T # Mn TAKRI SIGN NUKTA -1171D..1171F ; T # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; T # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; T # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; T # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; T # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; T # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -572,6 +573,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2159 +# Total code points: 2158 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index d55083a99..c13282bef 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-02, 22:37:04 GMT +# Date: 2023-11-02, 22:55:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2678,7 +2678,9 @@ AADB..AADC ; SA # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; SA # Lm TAI VIET SYMBOL SAM AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 11700..1171A ; SA # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; SA # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; SA # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; SA # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; SA # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; SA # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; SA # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; SA # Mc AHOM VOWEL SIGN E diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 8ee8762b3..51e3329d3 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -879,3 +879,11 @@ Let $ideohack = [〆 〇 〡-〩] # \P{kRSUnicode=∅} = \P{kTotalStrokes=∅} # \P{kHanyuPinyin=∅} ⊇ \P{kMandarin=∅} + +# InPC-InSC-gc invariants +# See https://www.unicode.org/L2/L2023/23200-category-invariants.pdf. +\p{InPC=/(Left|Right)/} ⊆ [\p{gc=Mc}\p{gc=Lo}] +[\P{InPC=NA}&\p{gc=Mc}] ⊆ \p{InPC=/(Left|Right)/} +[\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{gc=Mn}\p{gc=Lo}] +\p{gc=Mn} ⊆ \P{InPC=/(Left|Right)/} +[\p{gc=Lo}&\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{InSC=Consonant_Preceding_Repha}\p{InSC=Consonant_Prefixed}] \ No newline at end of file