From 0cc0abd65a0191214651631f28f30d9e733ff636 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 00:35:07 +0200 Subject: [PATCH 1/6] UnicodeData.txt lines from the proposal --- unicodetools/data/ucd/dev/UnicodeData.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..90c14d102 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,5 @@ +AB6C;LATIN CAPITAL LETTER SCRIPT R;Lu;0;L;;;;;N;;;;AB4B; +AB6D;LATIN CAPITAL LETTER SCRIPT R WITH RING;Lu;0;L;;;;;N;;;;AB6F; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -15029,8 +15031,8 @@ AB47;LATIN SMALL LETTER R WITHOUT HANDLE;Ll;0;L;;;;;N;;;;; AB48;LATIN SMALL LETTER DOUBLE R;Ll;0;L;;;;;N;;;;; AB49;LATIN SMALL LETTER R WITH CROSSED-TAIL;Ll;0;L;;;;;N;;;;; AB4A;LATIN SMALL LETTER DOUBLE R WITH CROSSED-TAIL;Ll;0;L;;;;;N;;;;; -AB4B;LATIN SMALL LETTER SCRIPT R;Ll;0;L;;;;;N;;;;; -AB4C;LATIN SMALL LETTER SCRIPT R WITH RING;Ll;0;L;;;;;N;;;;; +AB4B;LATIN SMALL LETTER SCRIPT R;Ll;0;L;;;;;N;;;AB6C;;AB6C +AB4C;LATIN SMALL LETTER SCRIPT R WITH RING;Ll;0;L;;;;;N;;;AB6D;;AB6D AB4D;LATIN SMALL LETTER BASELINE ESH;Ll;0;L;;;;;N;;;;; AB4E;LATIN SMALL LETTER U WITH SHORT RIGHT LEG;Ll;0;L;;;;;N;;;;; AB4F;LATIN SMALL LETTER U BAR WITH SHORT RIGHT LEG;Ll;0;L;;;;;N;;;;; From 60cb8bc23485160ba71f961a8d6dd063d41ebb80 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 01:05:04 +0200 Subject: [PATCH 2/6] Bad uppercase mapping --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 90c14d102..93d838d8a 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,5 @@ AB6C;LATIN CAPITAL LETTER SCRIPT R;Lu;0;L;;;;;N;;;;AB4B; -AB6D;LATIN CAPITAL LETTER SCRIPT R WITH RING;Lu;0;L;;;;;N;;;;AB6F; +AB6D;LATIN CAPITAL LETTER SCRIPT R WITH RING;Lu;0;L;;;;;N;;;;AB4C; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 0ecab6ffe7eeb6ba2e6496c8a32b1f6aec43acf5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 01:07:22 +0200 Subject: [PATCH 3/6] lb=AL --- unicodetools/data/ucd/dev/LineBreak.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce258217..1a420d968 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-03, 23:06:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1704,6 +1704,7 @@ AB5C..AB5F ; AL # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMA AB60..AB68 ; AL # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; AL # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; AL # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; AL # Lu [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; AL # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; AL # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP From 0ded1a814bff23ace515f48552f7ec9df2f074cc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 01:16:13 +0200 Subject: [PATCH 4/6] Latin --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..8807f2eee 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +AB6C..AB6D ; Latin # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From bb88c492290ea57d23d8a966ccf7b9e556398485 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 01:17:38 +0200 Subject: [PATCH 5/6] Regenerate UCD --- unicodetools/data/ucd/dev/CaseFolding.txt | 6 ++- unicodetools/data/ucd/dev/DerivedAge.txt | 14 +++++- .../data/ucd/dev/DerivedCoreProperties.txt | 44 ++++++++++++------- .../ucd/dev/DerivedNormalizationProps.txt | 15 ++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++- unicodetools/data/ucd/dev/Scripts.txt | 8 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +-- .../data/ucd/dev/VerticalOrientation.txt | 5 ++- .../dev/auxiliary/SentenceBreakProperty.txt | 7 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 7 +-- .../ucd/dev/extracted/DerivedBidiClass.txt | 7 +-- .../dev/extracted/DerivedCombiningClass.txt | 7 +-- .../dev/extracted/DerivedEastAsianWidth.txt | 7 +-- .../dev/extracted/DerivedGeneralCategory.txt | 11 ++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 11 ++--- .../data/ucd/dev/extracted/DerivedName.txt | 8 ++-- 16 files changed, 105 insertions(+), 63 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 1b7a9c156..7d99ca9ae 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ -# CaseFolding-16.0.0.txt -# Date: 2024-04-30, 21:48:11 GMT +# CaseFolding-17.0.0.txt +# Date: 2024-10-03, 23:16:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1249,6 +1249,8 @@ A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H +AB6C; C; AB4B; # LATIN CAPITAL LETTER SCRIPT R +AB6D; C; AB4C; # LATIN CAPITAL LETTER SCRIPT R WITH RING AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E AB72; C; 13A2; # CHEROKEE SMALL LETTER I diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..41e7e66b3 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-03, 23:16:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2059,4 +2059,14 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Total code points: 5185 +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +AB6C..AB6D ; 17.0 # [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING + +# Total code points: 2 + # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1..429255ab5 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-03, 23:17:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -926,6 +926,7 @@ AB30..AB5A ; Alphabetic # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMA AB5C..AB5F ; Alphabetic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; Alphabetic # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; Alphabetic # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; Alphabetic # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Alphabetic # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; Alphabetic # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142761 # ================================================ @@ -2756,6 +2757,7 @@ A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Uppercase # L& LATIN CAPITAL LETTER REVERSED HALF H +AB6C..AB6D ; Uppercase # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 104B0..104D3 ; Uppercase # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA @@ -2803,7 +2805,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1978 +# Total code points: 1980 # ================================================ @@ -2923,6 +2925,7 @@ AB30..AB5A ; Cased # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LE AB5C..AB5F ; Cased # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; Cased # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; Cased # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; Cased # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Cased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2988,7 +2991,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4580 # ================================================ @@ -4116,6 +4119,7 @@ A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED HALF H +AB6C..AB6D ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 104B0..104D3 ; Changes_When_Lowercased # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA @@ -4129,7 +4133,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1460 +# Total code points: 1462 # ================================================ @@ -4752,6 +4756,7 @@ A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCO A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED HALF H +AB4B..AB4C ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER SCRIPT R..LATIN SMALL LETTER SCRIPT R WITH RING AB53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Uppercased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Uppercased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST @@ -4769,7 +4774,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1552 +# Total code points: 1554 # ================================================ @@ -5391,6 +5396,7 @@ A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCO A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED HALF H +AB4B..AB4C ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER SCRIPT R..LATIN SMALL LETTER SCRIPT R WITH RING AB53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Titlecased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Titlecased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST @@ -5408,7 +5414,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1479 +# Total code points: 1481 # ================================================ @@ -6028,6 +6034,7 @@ A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H +AB6C..AB6D ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Changes_When_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6044,7 +6051,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1533 +# Total code points: 1535 # ================================================ @@ -6160,7 +6167,9 @@ A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +AB4B..AB4C ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER SCRIPT R..LATIN SMALL LETTER SCRIPT R WITH RING AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI +AB6C..AB6D ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6185,7 +6194,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2981 +# Total code points: 2985 # ================================================ @@ -6615,6 +6624,7 @@ AB30..AB5A ; ID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL AB5C..AB5F ; ID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; ID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; ID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; ID_Start # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; ID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; ID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH @@ -6962,7 +6972,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141271 # ================================================ @@ -7750,6 +7760,7 @@ AB30..AB5A ; ID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SM AB5C..AB5F ; ID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; ID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; ID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; ID_Continue # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; ID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; ID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -8370,7 +8381,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144543 # ================================================ @@ -8796,6 +8807,7 @@ AB30..AB5A ; XID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMAL AB5C..AB5F ; XID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; XID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; XID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; XID_Start # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; XID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH @@ -9148,7 +9160,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141248 # ================================================ @@ -9931,6 +9943,7 @@ AB30..AB5A ; XID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN S AB5C..AB5F ; XID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; XID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; XID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; XID_Continue # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; XID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; XID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -10557,7 +10570,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144524 # ================================================ @@ -12072,6 +12085,7 @@ AB5C..AB5F ; Grapheme_Base # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LE AB60..AB68 ; Grapheme_Base # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; Grapheme_Base # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; Grapheme_Base # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Grapheme_Base # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; Grapheme_Base # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -12812,7 +12826,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152732 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index ce636abb5..a56a9bf45 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ -# DerivedNormalizationProps-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedNormalizationProps-17.0.0.txt +# Date: 2024-10-03, 23:17:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -5464,6 +5464,8 @@ AB5D ; NFKC_CF; AB37 # Lm MODIFIER LETTER SMALL L WITH AB5E ; NFKC_CF; 026B # Lm MODIFIER LETTER SMALL L WITH MIDDLE TILDE AB5F ; NFKC_CF; AB52 # Lm MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; NFKC_CF; 028D # Lm MODIFIER LETTER SMALL TURNED W +AB6C ; NFKC_CF; AB4B # L& LATIN CAPITAL LETTER SCRIPT R +AB6D ; NFKC_CF; AB4C # L& LATIN CAPITAL LETTER SCRIPT R WITH RING AB70 ; NFKC_CF; 13A0 # L& CHEROKEE SMALL LETTER A AB71 ; NFKC_CF; 13A1 # L& CHEROKEE SMALL LETTER E AB72 ; NFKC_CF; 13A2 # L& CHEROKEE SMALL LETTER I @@ -9148,7 +9150,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] .... -# Total code points: 10554 +# Total code points: 10556 # ================================================ @@ -11598,6 +11600,8 @@ AB5D ; NFKC_SCF; AB37 # Lm MODIFIER LETTER SMALL L WITH AB5E ; NFKC_SCF; 026B # Lm MODIFIER LETTER SMALL L WITH MIDDLE TILDE AB5F ; NFKC_SCF; AB52 # Lm MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; NFKC_SCF; 028D # Lm MODIFIER LETTER SMALL TURNED W +AB6C ; NFKC_SCF; AB4B # L& LATIN CAPITAL LETTER SCRIPT R +AB6D ; NFKC_SCF; AB4C # L& LATIN CAPITAL LETTER SCRIPT R WITH RING AB70 ; NFKC_SCF; 13A0 # L& CHEROKEE SMALL LETTER A AB71 ; NFKC_SCF; 13A1 # L& CHEROKEE SMALL LETTER E AB72 ; NFKC_SCF; 13A2 # L& CHEROKEE SMALL LETTER I @@ -15282,7 +15286,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10516 +# Total code points: 10518 # ================================================ @@ -16062,6 +16066,7 @@ A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REV A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; Changes_When_NFKC_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA F900..FA0D ; Changes_When_NFKC_Casefolded # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 @@ -16300,6 +16305,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10554 +# Total code points: 10556 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..e52f9496b 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-10-03, 23:17:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1673,6 +1673,7 @@ AB5C..AB5F ; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMA AB60..AB68 ; N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; N # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; N # Lu [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 8807f2eee..7b7d2dd35 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -AB6C..AB6D ; Latin -# Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Scripts-17.0.0.txt +# Date: 2024-10-03, 23:17:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -692,6 +691,7 @@ AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMA AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA AB66..AB68 ; Latin # L& [3] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; Latin # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z @@ -703,7 +703,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1487 +# Total code points: 1489 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 93d838d8a..67cca3056 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,5 +1,3 @@ -AB6C;LATIN CAPITAL LETTER SCRIPT R;Lu;0;L;;;;;N;;;;AB4B; -AB6D;LATIN CAPITAL LETTER SCRIPT R WITH RING;Lu;0;L;;;;;N;;;;AB4C; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -15032,7 +15030,7 @@ AB48;LATIN SMALL LETTER DOUBLE R;Ll;0;L;;;;;N;;;;; AB49;LATIN SMALL LETTER R WITH CROSSED-TAIL;Ll;0;L;;;;;N;;;;; AB4A;LATIN SMALL LETTER DOUBLE R WITH CROSSED-TAIL;Ll;0;L;;;;;N;;;;; AB4B;LATIN SMALL LETTER SCRIPT R;Ll;0;L;;;;;N;;;AB6C;;AB6C -AB4C;LATIN SMALL LETTER SCRIPT R WITH RING;Ll;0;L;;;;;N;;;AB6D;;AB6D +AB4C;LATIN SMALL LETTER SCRIPT R WITH RING;Ll;0;L;;;;;N;;;AB6D;;AB6D AB4D;LATIN SMALL LETTER BASELINE ESH;Ll;0;L;;;;;N;;;;; AB4E;LATIN SMALL LETTER U WITH SHORT RIGHT LEG;Ll;0;L;;;;;N;;;;; AB4F;LATIN SMALL LETTER U BAR WITH SHORT RIGHT LEG;Ll;0;L;;;;;N;;;;; @@ -15064,6 +15062,8 @@ AB68;LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE;Ll;0;L;;;;;N;;;;; AB69;MODIFIER LETTER SMALL TURNED W;Lm;0;L; 028D;;;;N;;;;; AB6A;MODIFIER LETTER LEFT TACK;Sk;0;ON;;;;;N;;;;; AB6B;MODIFIER LETTER RIGHT TACK;Sk;0;ON;;;;;N;;;;; +AB6C;LATIN CAPITAL LETTER SCRIPT R;Lu;0;L;;;;;N;;;;AB4B; +AB6D;LATIN CAPITAL LETTER SCRIPT R WITH RING;Lu;0;L;;;;;N;;;;AB4C; AB70;CHEROKEE SMALL LETTER A;Ll;0;L;;;;;N;;;13A0;;13A0 AB71;CHEROKEE SMALL LETTER E;Ll;0;L;;;;;N;;;13A1;;13A1 AB72;CHEROKEE SMALL LETTER I;Ll;0;L;;;;;N;;;13A2;;13A2 diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..da2b4e106 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-10-03, 23:17:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1482,6 +1482,7 @@ AB5C..AB5F ; R # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMA AB60..AB68 ; R # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; R # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; R # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; R # Lu [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; R # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; R # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; R # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6b..f59620539 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-03, 23:17:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1935,6 +1935,7 @@ A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Upper # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Upper # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Upper # L& LATIN CAPITAL LETTER REVERSED HALF H +AB6C..AB6D ; Upper # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA @@ -1982,7 +1983,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1963 +# Total code points: 1965 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0d..47b4ca2f8 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-10-03, 23:17:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1041,6 +1041,7 @@ AB5B ; ALetter # Sk MODIFIER BREVE WITH INVERTED BREVE AB5C..AB5F ; ALetter # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; ALetter # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; ALetter # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; ALetter # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; ALetter # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH @@ -1355,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33793 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..546c8d67b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-03, 23:16:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -752,6 +752,7 @@ AB5B ; L # Sk MODIFIER BREVE WITH INVERTED BREVE AB5C..AB5F ; L # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB68 ; L # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; L # Lm MODIFIER LETTER SMALL TURNED W +AB6C..AB6D ; L # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; L # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -1214,7 +1215,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815349 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..1d5d418c4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-03, 23:16:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1206,6 +1206,7 @@ AB5C..AB5F ; 0 # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U AB60..AB68 ; 0 # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; 0 # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; 0 # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; 0 # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; 0 # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; 0 # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; 0 # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -2060,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821579 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..9193771d0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-03, 23:17:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1304,6 +1304,7 @@ AB5C..AB5F ; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U AB60..AB68 ; N # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; N # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; N # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; N # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -2103,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761097 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..28801a955 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-03, 23:17:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -318,7 +318,7 @@ AB0F..AB10 ; Cn # [2] .. AB17..AB1F ; Cn # [9] .. AB27 ; Cn # AB2F ; Cn # -AB6C..AB6F ; Cn # [4] .. +AB6E..AB6F ; Cn # [2] .. ABEE..ABEF ; Cn # [2] .. ABFA..ABFF ; Cn # [6] .. D7A4..D7AF ; Cn # [12] .. @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819531 # ================================================ @@ -1361,6 +1361,7 @@ A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA A7DC ; Lu # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H +AB6C..AB6D ; Lu # [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 104B0..104D3 ; Lu # [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA @@ -1405,7 +1406,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1858 +# Total code points: 1860 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e92..6f0b63bc0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-03, 23:17:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757651 code points not listed here. +# Total code points: 895119 # ================================================ @@ -1180,6 +1180,7 @@ AB5C..AB5F ; AL # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL AB60..AB68 ; AL # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; AL # Lm MODIFIER LETTER SMALL TURNED W AB6A..AB6B ; AL # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB6C..AB6D ; AL # L& [2] LATIN CAPITAL LETTER SCRIPT R..LATIN CAPITAL LETTER SCRIPT R WITH RING AB70..ABBF ; AL # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; AL # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM FB00..FB06 ; AL # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST @@ -1613,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26681 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..7f2eb657d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-03, 23:17:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -15035,6 +15035,8 @@ AB68 ; LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE AB69 ; MODIFIER LETTER SMALL TURNED W AB6A ; MODIFIER LETTER LEFT TACK AB6B ; MODIFIER LETTER RIGHT TACK +AB6C ; LATIN CAPITAL LETTER SCRIPT R +AB6D ; LATIN CAPITAL LETTER SCRIPT R WITH RING AB70 ; CHEROKEE SMALL LETTER A AB71 ; CHEROKEE SMALL LETTER E AB72 ; CHEROKEE SMALL LETTER I @@ -45367,6 +45369,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155000 # EOF From ecc68ada9ded4e2dbdec4cf015960adbc1d57e5d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 4 Oct 2024 03:30:48 +0200 Subject: [PATCH 6/6] test --- .../text/UCD/AdditionComparisons/148.txt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/148.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/148.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/148.txt new file mode 100644 index 000000000..1439cac5b --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/148.txt @@ -0,0 +1,20 @@ +# LATIN CAPITAL LETTER SCRIPT R/LATIN CAPITAL LETTER SCRIPT R WITH RING +# https://github.com/unicode-org/utc-release-management/issues/148 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block Unicode_1_Name: +Propertywise [\x{AB4B} ꭋ \N{LATIN SMALL LETTER SCRIPT R} \x{AB4C} ꭌ \N{LATIN SMALL LETTER SCRIPT R WITH RING}] + : [\x{AB6C} \N{LATIN CAPITAL LETTER SCRIPT R} \x{AB6D} \N{LATIN CAPITAL LETTER SCRIPT R WITH RING}] +CorrespondTo [\x{019B} ƛ \N{LATIN SMALL LETTER LAMBDA WITH STROKE}] + : [\x{A7DC} Ƛ \N{LATIN CAPITAL LETTER LAMBDA WITH STROKE}] +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file