From 062d385fcfdb6fa251b432ad8b41e6f004d85307 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 17 Feb 2024 01:11:39 +0100 Subject: [PATCH 01/13] 5 modifier click letters --- unicodetools/data/ucd/dev/UnicodeData.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 08593a319..f71d5a1e5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,8 @@ +107BB;MODIFIER LETTER SMALL TURNED T;Lm;0;L; 0287;;;;N;;;;; +107BC;MODIFIER LETTER INVERTED GLOTTAL STOP;Lm;0;L; 0296;;;;N;;;;; +107BD;MODIFIER LETTER SMALL ESH WITH DOUBLE BAR;Lm;0;L; 1DF0B;;;;N;;;;; +107BE;MODIFIER LETTER STRETCHED C;Lm;0;L; 0297;;;;N;;;;; +107BF;MODIFIER LETTER SMALL TURNED K;Lm;0;L; 029E;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 0fd6a72c188a517cd82dd32e35d34de01f8cbc8f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 17 Feb 2024 01:19:42 +0100 Subject: [PATCH 02/13] lb=AL --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 1d8bd89ec..449a79ad1 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-02-02, 23:11:30 GMT +# Date: 2024-02-17, 00:17:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2729,6 +2729,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10780..10785 ; AL # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; AL # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; AL # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107BB..107BF ; AL # Lm [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K 10800..10805 ; AL # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; AL # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; AL # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO From ee0894efad74da64df7a751b23410c4e2bd5a47f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 17 Feb 2024 01:21:29 +0100 Subject: [PATCH 03/13] Looks Latin to me --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 2e0b4fd4b..0e9627ded 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +107BB..107BF; Latin # Scripts-16.0.0.txt # Date: 2024-02-02, 23:11:49 GMT # © 2024 Unicode®, Inc. From aa6728b924a908d79839cd181158472c64e7fccd Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 17 Feb 2024 01:28:02 +0100 Subject: [PATCH 04/13] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 30 +++++++++---------- .../ucd/dev/DerivedNormalizationProps.txt | 28 +++++++++++------ unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++-- .../data/ucd/dev/NormalizationTest.txt | 7 ++++- unicodetools/data/ucd/dev/Scripts.txt | 7 ++--- unicodetools/data/ucd/dev/UnicodeData.txt | 10 +++---- .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++-- .../dev/extracted/DerivedCombiningClass.txt | 6 ++-- .../extracted/DerivedDecompositionType.txt | 6 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++---- .../data/ucd/dev/extracted/DerivedName.txt | 9 ++++-- 18 files changed, 92 insertions(+), 72 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index d116dccaa..39fa3ac31 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-02-02, 23:11:18 GMT +# Date: 2024-02-17, 00:24:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2020,6 +2020,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO +107BB..107BF ; 16.0 # [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K 10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA 10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA 10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN @@ -2059,6 +2060,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5187 +# Total code points: 5192 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1e054c4f2..fdda38569 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-02-02, 23:11:24 GMT +# Date: 2024-02-17, 00:24:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1007,7 +1007,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Alphabetic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Alphabetic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Alphabetic # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; Alphabetic # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; Alphabetic # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; Alphabetic # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142761 +# Total code points: 142766 # ================================================ @@ -3338,7 +3338,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10376..1037A ; Case_Ignorable # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII 10780..10785 ; Case_Ignorable # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Case_Ignorable # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Case_Ignorable # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Case_Ignorable # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10A01..10A03 ; Case_Ignorable # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Case_Ignorable # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Case_Ignorable # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA @@ -3505,7 +3505,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2754 # ================================================ @@ -6689,7 +6689,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; ID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; ID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; ID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; ID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; ID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; ID_Start # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; ID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141271 +# Total code points: 141276 # ================================================ @@ -7844,7 +7844,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; ID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; ID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; ID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; ID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; ID_Continue # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; ID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144543 +# Total code points: 144548 # ================================================ @@ -8875,7 +8875,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; XID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; XID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; XID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; XID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; XID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Start # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141248 +# Total code points: 141253 # ================================================ @@ -10031,7 +10031,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; XID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; XID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; XID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; XID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Continue # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144524 +# Total code points: 144529 # ================================================ @@ -12257,7 +12257,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10760..10767 ; Grapheme_Base # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Grapheme_Base # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Grapheme_Base # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Grapheme_Base # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; Grapheme_Base # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; Grapheme_Base # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; Grapheme_Base # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -12811,7 +12811,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152740 +# Total code points: 152745 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index b526bd1d0..0c4d1bcbe 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2024-02-02, 23:11:27 GMT +# Date: 2024-02-17, 00:24:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1644,7 +1644,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 105E4 ; NFKD_QC; N # Lo TODHRI LETTER U 10781..10785 ; NFKD_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKD_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; NFKD_QC; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; NFKD_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 1109A ; NFKD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFKD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA @@ -1753,7 +1753,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17085 +# Total code points: 17090 # ================================================ @@ -2072,7 +2072,7 @@ FFE9..FFEC ; NFKC_QC; N # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNW FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10781..10785 ; NFKC_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKC_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; NFKC_QC; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; NFKC_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 1CCD6..1CCEF ; NFKC_QC; N # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; NFKC_QC; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D15E..1D164 ; NFKC_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -2164,7 +2164,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4964 +# Total code points: 4969 # ================================================ @@ -7047,6 +7047,11 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10559 # ================================================ @@ -13181,6 +13186,11 @@ FFF0..FFF8 ; NFKC_SCF; # Cn [9] ...... -# Total code points: 10516 +# Total code points: 10521 # ================================================ @@ -16194,7 +16204,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10559 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index dccf9c5a7..ea2094bad 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-02-02, 23:11:29 GMT +# Date: 2024-02-17, 00:24:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1877,7 +1877,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; N # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 449a79ad1..adee7807d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-02-17, 00:17:09 GMT +# Date: 2024-02-17, 00:25:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2728,8 +2728,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; AL # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; AL # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; AL # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL -107BB..107BF ; AL # Lm [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; AL # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; AL # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; AL # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; AL # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 1e88cecdf..125bc91d1 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-02-02, 23:11:32 GMT +# Date: 2024-02-17, 00:25:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -15208,6 +15208,11 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 107B8;107B8;107B8;01C2;01C2; # (𐞸; 𐞸; 𐞸; ǂ; ǂ; ) MODIFIER LETTER ALVEOLAR CLICK 107B9;107B9;107B9;1DF0A;1DF0A; # (𐞹; 𐞹; 𐞹; 𝼊; 𝼊; ) MODIFIER LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 107BA;107BA;107BA;1DF1E;1DF1E; # (𐞺; 𐞺; 𐞺; 𝼞; 𝼞; ) MODIFIER LETTER SMALL S WITH CURL +107BB;107BB;107BB;0287;0287; # (𐞻; 𐞻; 𐞻; ʇ; ʇ; ) MODIFIER LETTER SMALL TURNED T +107BC;107BC;107BC;0296;0296; # (𐞼; 𐞼; 𐞼; ʖ; ʖ; ) MODIFIER LETTER INVERTED GLOTTAL STOP +107BD;107BD;107BD;1DF0B;1DF0B; # (𐞽; 𐞽; 𐞽; 𝼋; 𝼋; ) MODIFIER LETTER SMALL ESH WITH DOUBLE BAR +107BE;107BE;107BE;0297;0297; # (𐞾; 𐞾; 𐞾; ʗ; ʗ; ) MODIFIER LETTER STRETCHED C +107BF;107BF;107BF;029E;029E; # (𐞿; 𐞿; 𐞿; ʞ; ʞ; ) MODIFIER LETTER SMALL TURNED K 1109A;1109A;11099 110BA;1109A;11099 110BA; # (𑂚; 𑂚; 𑂙◌𑂺; 𑂚; 𑂙◌𑂺; ) KAITHI LETTER DDDHA 1109C;1109C;1109B 110BA;1109C;1109B 110BA; # (𑂜; 𑂜; 𑂛◌𑂺; 𑂜; 𑂛◌𑂺; ) KAITHI LETTER RHA 110AB;110AB;110A5 110BA;110AB;110A5 110BA; # (𑂫; 𑂫; 𑂥◌𑂺; 𑂫; 𑂥◌𑂺; ) KAITHI LETTER VA diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 0e9627ded..56a84c1f2 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -107BB..107BF; Latin # Scripts-16.0.0.txt -# Date: 2024-02-02, 23:11:49 GMT +# Date: 2024-02-17, 00:25:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -697,13 +696,13 @@ FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10780..10785 ; Latin # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Latin # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Latin # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Latin # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1487 +# Total code points: 1492 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f71d5a1e5..fb299e910 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,8 +1,3 @@ -107BB;MODIFIER LETTER SMALL TURNED T;Lm;0;L; 0287;;;;N;;;;; -107BC;MODIFIER LETTER INVERTED GLOTTAL STOP;Lm;0;L; 0296;;;;N;;;;; -107BD;MODIFIER LETTER SMALL ESH WITH DOUBLE BAR;Lm;0;L; 1DF0B;;;;N;;;;; -107BE;MODIFIER LETTER STRETCHED C;Lm;0;L; 0297;;;;N;;;;; -107BF;MODIFIER LETTER SMALL TURNED K;Lm;0;L; 029E;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -18475,6 +18470,11 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 107B8;MODIFIER LETTER ALVEOLAR CLICK;Lm;0;L; 01C2;;;;N;;;;; 107B9;MODIFIER LETTER RETROFLEX CLICK WITH RETROFLEX HOOK;Lm;0;L; 1DF0A;;;;N;;;;; 107BA;MODIFIER LETTER SMALL S WITH CURL;Lm;0;L; 1DF1E;;;;N;;;;; +107BB;MODIFIER LETTER SMALL TURNED T;Lm;0;L; 0287;;;;N;;;;; +107BC;MODIFIER LETTER INVERTED GLOTTAL STOP;Lm;0;L; 0296;;;;N;;;;; +107BD;MODIFIER LETTER SMALL ESH WITH DOUBLE BAR;Lm;0;L; 1DF0B;;;;N;;;;; +107BE;MODIFIER LETTER STRETCHED C;Lm;0;L; 0297;;;;N;;;;; +107BF;MODIFIER LETTER SMALL TURNED K;Lm;0;L; 029E;;;;N;;;;; 10800;CYPRIOT SYLLABLE A;Lo;0;R;;;;;N;;;;; 10801;CYPRIOT SYLLABLE E;Lo;0;R;;;;;N;;;;; 10802;CYPRIOT SYLLABLE I;Lo;0;R;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 187a0888d..b0feaeed9 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-02-02, 23:11:51 GMT +# Date: 2024-02-17, 00:25:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1701,7 +1701,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10760..10767 ; R # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; R # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; R # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; R # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; R # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; R # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index cec52074f..42d6bc25b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-02-02, 23:11:49 GMT +# Date: 2024-02-17, 00:25:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2354,6 +2354,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10740..10755 ; OLetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; OLetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10781..10782 ; OLetter # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +107BB..107BF ; OLetter # Lm [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K 10800..10805 ; OLetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; OLetter # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; OLetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2585,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136910 +# Total code points: 136915 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 958bd3f6d..5f4a8f448 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-02-02, 23:11:51 GMT +# Date: 2024-02-17, 00:25:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1102,7 +1102,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10760..10767 ; ALetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; ALetter # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; ALetter # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; ALetter # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; ALetter # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; ALetter # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -1356,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33794 +# Total code points: 33799 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index d991ed1b8..7dec99988 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-02-02, 23:11:23 GMT +# Date: 2024-02-17, 00:24:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -829,7 +829,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; L # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; L # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; L # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; L # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 11000 ; L # Mc BRAHMI SIGN CANDRABINDU 11002 ; L # Mc BRAHMI SIGN VISARGA 11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA @@ -1219,7 +1219,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815349 code points not listed here. +# The above property value applies to 815344 code points not listed here. # Total code points: 1095518 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 639fce0ae..fe9231690 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-02-02, 23:11:23 GMT +# Date: 2024-02-17, 00:24:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1399,7 +1399,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10760..10767 ; 0 # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; 0 # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; 0 # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; 0 # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; 0 # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; 0 # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; 0 # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; 0 # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821579 code points not listed here. +# The above property value applies to 821574 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a49462856..be3b0a4a5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ # DerivedDecompositionType-16.0.0.txt -# Date: 2024-02-02, 23:11:25 GMT +# Date: 2024-02-17, 00:24:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -879,12 +879,12 @@ AB5C..AB5F ; Super # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMA AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 10781..10785 ; Super # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Super # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Super # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Super # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 1E030..1E050 ; Super # Lm [33] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL PALOCHKA 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 254 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 44585c48d..ce65ed091 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-02-02, 23:11:25 GMT +# Date: 2024-02-17, 00:24:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1404,7 +1404,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; N # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2103,7 +2103,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761097 code points not listed here. +# The above property value applies to 761092 code points not listed here. # Total code points: 792608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index b9105da02..90048cfd1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-02-02, 23:11:25 GMT +# Date: 2024-02-17, 00:24:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -394,7 +394,7 @@ FFFE..FFFF ; Cn # [2] .. 10768..1077F ; Cn # [24] .. 10786 ; Cn # 107B1 ; Cn # -107BB..107FF ; Cn # [69] .. +107C0..107FF ; Cn # [64] .. 10806..10807 ; Cn # [2] .. 10809 ; Cn # 10836 ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819531 +# Total code points: 819526 # ================================================ @@ -2156,7 +2156,7 @@ FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 10780..10785 ; Lm # [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lm # [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Lm # [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10D4E ; Lm # GARAY VOWEL LENGTH MARK 10D6F ; Lm # GARAY REDUPLICATION MARK 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM @@ -2173,7 +2173,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 409 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 3a3ecf9c1..2cae25fd2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-02-02, 23:11:26 GMT +# Date: 2024-02-17, 00:24:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757651 code points not listed here. -# Total code points: 895119 +# The above property value applies to 757646 code points not listed here. +# Total code points: 895114 # ================================================ @@ -1251,7 +1251,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; AL # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; AL # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; AL # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; AL # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10800..10805 ; AL # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; AL # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; AL # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -1611,7 +1611,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26685 +# Total code points: 26690 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 4778d471e..432232aaa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-02-02, 23:11:26 GMT +# Date: 2024-02-17, 00:24:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -29135,6 +29135,11 @@ FFFD ; REPLACEMENT CHARACTER 107B8 ; MODIFIER LETTER ALVEOLAR CLICK 107B9 ; MODIFIER LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 107BA ; MODIFIER LETTER SMALL S WITH CURL +107BB ; MODIFIER LETTER SMALL TURNED T +107BC ; MODIFIER LETTER INVERTED GLOTTAL STOP +107BD ; MODIFIER LETTER SMALL ESH WITH DOUBLE BAR +107BE ; MODIFIER LETTER STRETCHED C +107BF ; MODIFIER LETTER SMALL TURNED K 10800 ; CYPRIOT SYLLABLE A 10801 ; CYPRIOT SYLLABLE E 10802 ; CYPRIOT SYLLABLE I @@ -45369,6 +45374,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155000 +# Total code points: 155005 # EOF From f04a3ad64643b9ffa687b2f3a6cac7429b1782f8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 16:41:15 +0100 Subject: [PATCH 05/13] An invariant about Lowercase --- .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 69e23177d..3dcce4e73 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -586,6 +586,8 @@ Let $nonAlphabeticAvagrahas = [\N{TIBETAN MARK PALUTA}] # A punctuation mark. [\p{InSC=Avagraha} - $nonAlphabeticAvagrahas] ⊆ \p{Alphabetic} # Name-based checks. +Let $nonLowercaseSmallLetters = [ \p{name=/^LIMBU SMALL LETTER/} \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ] +[ \p{name=/\bSMALL LETTER\b}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} # Combining letters are often alphabetic (medievalist abbreviations). # The others are diacritic (cantillation marks, phonetics). From 22b578c300c1903c445d4b6b323800362007f5b0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 17:51:46 +0100 Subject: [PATCH 06/13] Test the modifier letters too --- .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 3dcce4e73..e8e82ad78 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -587,7 +587,9 @@ Let $nonAlphabeticAvagrahas = [\N{TIBETAN MARK PALUTA}] # A punctuation mark. # Name-based checks. Let $nonLowercaseSmallLetters = [ \p{name=/^LIMBU SMALL LETTER/} \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ] -[ \p{name=/\bSMALL LETTER\b}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} +Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] +[ \p{name=/\bSMALL LETTER\b/}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} +[ [\p{gc=Lm} & \p{name=/SMALL/}] - $nonLowercaseSmallModifierLetters ] ⊆ \p{Lowercase} # Combining letters are often alphabetic (medievalist abbreviations). # The others are diacritic (cantillation marks, phonetics). From 438ecd0406960367e06ac90b76d138612ee62131 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 18:13:56 +0100 Subject: [PATCH 07/13] Use the decompositions --- .../org/unicode/text/UCD/UnicodeInvariantTest.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index e8e82ad78..4fa139b32 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -596,6 +596,14 @@ Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] # See 177-C52. \p{name=/COMBINING .* LETTER/} ⊆ [\p{Alphabetic}\p{Diacritic}] +## Consistency of Lowercase with decompositions. +# Note that the same is not true of Uppercase. +# A non-lowercase character has non-lowercase characters in its decomposition, +# or it its decomposition is (㋍ etc.). +In [\P{Lowercase} - \p{dt=square}], \p{Lowercase} * toNFKD ≠ toNFKD +# A lowercase character decomposes to lowercase characters and nonspacing marks. +In \p{Lowercase}, [\p{Lowercase}\p{Mn}] * toNFKD = toNFKD + ## Joining_Type and Joining_Group # Where defined, the Joining_Group refines the Joining_Type. OnPairsOf \P{Joining_Group=No_Joining_Group}, EqualityOf Joining_Group ⇒ EqualityOf Joining_Type From 1e1d40ede3ff9ef843bfa310ab81dfafb559686a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 18:22:45 +0100 Subject: [PATCH 08/13] bad parser --- .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 4fa139b32..f5377a383 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -602,7 +602,7 @@ Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] # or it its decomposition is (㋍ etc.). In [\P{Lowercase} - \p{dt=square}], \p{Lowercase} * toNFKD ≠ toNFKD # A lowercase character decomposes to lowercase characters and nonspacing marks. -In \p{Lowercase}, [\p{Lowercase}\p{Mn}] * toNFKD = toNFKD +In \p{Lowercase}, [\p{Lowercase}\p{gc=Mn}] * toNFKD = toNFKD ## Joining_Type and Joining_Group # Where defined, the Joining_Group refines the Joining_Type. From 259c90f1f18adc04c254d2af5225f5a852fb287f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 18:30:46 +0100 Subject: [PATCH 09/13] that is just wrong. --- .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index f5377a383..c3a93fa5b 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -601,8 +601,6 @@ Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] # A non-lowercase character has non-lowercase characters in its decomposition, # or it its decomposition is (㋍ etc.). In [\P{Lowercase} - \p{dt=square}], \p{Lowercase} * toNFKD ≠ toNFKD -# A lowercase character decomposes to lowercase characters and nonspacing marks. -In \p{Lowercase}, [\p{Lowercase}\p{gc=Mn}] * toNFKD = toNFKD ## Joining_Type and Joining_Group # Where defined, the Joining_Group refines the Joining_Type. From 5c0fe280d7d6c92409e1625b3322b4faaf7e330a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 18:32:45 +0100 Subject: [PATCH 10/13] Other_Lowercase --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 1e3e2912b..df2a7f290 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1220,6 +1220,7 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107BB..107BF ; Other_Lowercase 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE # Total code points: 311 From 704b392398390dcbb6cf74beaa794d0fdb806f3a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 13 Mar 2024 18:39:24 +0100 Subject: [PATCH 11/13] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 10 +++++----- unicodetools/data/ucd/dev/PropList.txt | 7 +++---- .../data/ucd/dev/auxiliary/SentenceBreakProperty.txt | 9 ++++----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index fdda38569..e6b878938 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-02-17, 00:24:49 GMT +# Date: 2024-03-13, 17:35:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2100,7 +2100,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10780 ; Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Lowercase # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO @@ -2139,7 +2139,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2574 # ================================================ @@ -2942,7 +2942,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10780 ; Cased # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Cased # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Cased # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Cased # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Cased # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D50..10D65 ; Cased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA @@ -2988,7 +2988,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4583 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index df2a7f290..7db25493d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-03-12, 13:28:44 GMT +# Date: 2024-03-13, 17:35:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1219,11 +1219,10 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL -107BB..107BF ; Other_Lowercase +107B2..107BF ; Other_Lowercase # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 316 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 42d6bc25b..d54d8ea32 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-02-17, 00:25:37 GMT +# Date: 2024-03-13, 17:35:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1281,7 +1281,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10780 ; Lower # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Lower # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lower # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA ; Lower # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +107B2..107BF ; Lower # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K 10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D70..10D85 ; Lower # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO @@ -1320,7 +1320,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2527 # ================================================ @@ -2354,7 +2354,6 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10740..10755 ; OLetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; OLetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10781..10782 ; OLetter # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON -107BB..107BF ; OLetter # Lm [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K 10800..10805 ; OLetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; OLetter # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; OLetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2586,7 +2585,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136915 +# Total code points: 136910 # ================================================ From 145e168698f801c2d8d849a6c8dfad5517a15800 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 18:08:01 +0200 Subject: [PATCH 12/13] Reordering per L2/24-052R --- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 5665cbaca..e6ddd45bc 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -18470,9 +18470,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 107BA;MODIFIER LETTER SMALL S WITH CURL;Lm;0;L; 1DF1E;;;;N;;;;; 107BB;MODIFIER LETTER SMALL TURNED T;Lm;0;L; 0287;;;;N;;;;; 107BC;MODIFIER LETTER INVERTED GLOTTAL STOP;Lm;0;L; 0296;;;;N;;;;; -107BD;MODIFIER LETTER SMALL ESH WITH DOUBLE BAR;Lm;0;L; 1DF0B;;;;N;;;;; -107BE;MODIFIER LETTER STRETCHED C;Lm;0;L; 0297;;;;N;;;;; -107BF;MODIFIER LETTER SMALL TURNED K;Lm;0;L; 029E;;;;N;;;;; +107BD;MODIFIER LETTER STRETCHED C;Lm;0;L; 0297;;;;N;;;;; +107BE;MODIFIER LETTER SMALL TURNED K;Lm;0;L; 029E;;;;N;;;;; +107BF;MODIFIER LETTER SMALL ESH WITH DOUBLE BAR;Lm;0;L; 1DF0B;;;;N;;;;; 10800;CYPRIOT SYLLABLE A;Lo;0;R;;;;;N;;;;; 10801;CYPRIOT SYLLABLE E;Lo;0;R;;;;;N;;;;; 10802;CYPRIOT SYLLABLE I;Lo;0;R;;;;;N;;;;; From 6b9773d8a6fdbc068900cca51ebc75b565aa88e8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 18:11:29 +0200 Subject: [PATCH 13/13] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 4 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 20 +++++++++---------- .../ucd/dev/DerivedNormalizationProps.txt | 20 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- .../data/ucd/dev/NormalizationTest.txt | 8 ++++---- unicodetools/data/ucd/dev/PropList.txt | 4 ++-- unicodetools/data/ucd/dev/Scripts.txt | 4 ++-- .../data/ucd/dev/VerticalOrientation.txt | 4 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 4 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 4 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 4 ++-- .../dev/extracted/DerivedCombiningClass.txt | 4 ++-- .../extracted/DerivedDecompositionType.txt | 4 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 4 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 4 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 4 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 8 ++++---- 18 files changed, 56 insertions(+), 56 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 27450c7e8..9cdb24da4 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-06-06, 10:46:15 GMT +# Date: 2024-07-25, 16:09:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2018,7 +2018,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO -107BB..107BF ; 16.0 # [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL TURNED K +107BB..107BF ; 16.0 # [5] MODIFIER LETTER SMALL TURNED T..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA 10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA 10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index a932e6f0e..fbf020dd7 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-06-06, 10:46:34 GMT +# Date: 2024-07-25, 16:10:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1007,7 +1007,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Alphabetic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; Alphabetic # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; Alphabetic # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; Alphabetic # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; Alphabetic # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; Alphabetic # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -2100,7 +2100,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10780 ; Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; Lowercase # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; Lowercase # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO @@ -2942,7 +2942,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10780 ; Cased # Lm MODIFIER LETTER SMALL CAPITAL AA 10783..10785 ; Cased # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Cased # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; Cased # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; Cased # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D50..10D65 ; Cased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA @@ -3338,7 +3338,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10376..1037A ; Case_Ignorable # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII 10780..10785 ; Case_Ignorable # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Case_Ignorable # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; Case_Ignorable # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; Case_Ignorable # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10A01..10A03 ; Case_Ignorable # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Case_Ignorable # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Case_Ignorable # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA @@ -6689,7 +6689,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; ID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; ID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; ID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; ID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; ID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; ID_Start # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; ID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -7844,7 +7844,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; ID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; ID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; ID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; ID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; ID_Continue # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; ID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -8875,7 +8875,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; XID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; XID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; XID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; XID_Start # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; XID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Start # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -10031,7 +10031,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; XID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; XID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; XID_Continue # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; XID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Continue # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO @@ -12257,7 +12257,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10760..10767 ; Grapheme_Base # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10780..10785 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Grapheme_Base # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; Grapheme_Base # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; Grapheme_Base # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 10800..10805 ; Grapheme_Base # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; Grapheme_Base # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; Grapheme_Base # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 4a8facb35..8d5b80ba4 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2024-06-06, 10:46:37 GMT +# Date: 2024-07-25, 16:10:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1644,7 +1644,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 105E4 ; NFKD_QC; N # Lo TODHRI LETTER U 10781..10785 ; NFKD_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKD_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; NFKD_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; NFKD_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 1109A ; NFKD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFKD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA @@ -2072,7 +2072,7 @@ FFE9..FFEC ; NFKC_QC; N # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNW FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10781..10785 ; NFKC_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKC_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BF ; NFKC_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL TURNED K +107B2..107BF ; NFKC_QC; N # Lm [14] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL ESH WITH DOUBLE BAR 1CCD6..1CCEF ; NFKC_QC; N # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z 1CCF0..1CCF9 ; NFKC_QC; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D15E..1D164 ; NFKC_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -7049,9 +7049,9 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ......