From 81dcf2ec0994edfa4bd4d18e2013d16cd171ecab Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 02:18:14 +0200 Subject: [PATCH 01/14] UnicodeData.txt lines from Kirk --- unicodetools/data/ucd/dev/UnicodeData.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 03e3d4e3f..306c25a4e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,22 @@ +1DF3E;LATIN SMALL LETTER BARRED TURNED H;Ll;0;L;;;;;N;;;;; +1DF3F;LATIN SMALL LETTER BARRED W;Ll;0;L;;;;;N;;;;; +1DFD8;MODIFIER LETTER SMALL N WITH LONG RIGHT LEG;Lm;0;L; 019E;;;;N;;;;; +1DFD9;MODIFIER LETTER SMALL R WITH LONG LEG;Lm;0;L; 027C;;;;N;;;;; +1DFDA;MODIFIER LETTER SMALL BARRED TURNED H;Lm;0;L; 1DF3E;;;;N;;;;; +1DFDB;MODIFIER LETTER SMALL J WITH STROKE;Lm;0;L; 0249;;;;N;;;;; +1DFDC;MODIFIER LETTER SMALL UPSILON WITH STROKE;Lm;0;L; 1D7F;;;;N;;;;; +1DFDD;MODIFIER LETTER SMALL BARRED W;Lm;0;L; 1DF3F;;;;N;;;;; +1DFDE;MODIFIER LETTER SMALL DB DIGRAPH;Lm;0;L; 0238;;;;N;;;;; +1DFDF;MODIFIER LETTER SMALL QP DIGRAPH;Lm;0;L; 0239;;;;N;;;;; +1DFE0;MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL;Lm;0;L; 1DF2B;;;;N;;;;; +1DFE1;MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF19;;;;N;;;;; +1DFE2;MODIFIER LETTER SMALL ESH WITH CURL;Lm;0;L; 0286;;;;N;;;;; +1DFE3;MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK;Lm;0;L; 1D98;;;;N;;;;; +1DFE4;MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL;Lm;0;L; 1DF2C;;;;N;;;;; +1DFE5;MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF1C;;;;N;;;;; +1DFE6;MODIFIER LETTER SMALL EZH WITH CURL;Lm;0;L; 0293;;;;N;;;;; +1DFE7;MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK;Lm;0;L; 1D9A;;;;N;;;;; +1DFE8;MODIFIER LETTER SMALL CAPITAL D;Lm;0;L; 1D05;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 11eb2440c43ab75fad9a1a357ab859a580056f03 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 02:20:35 +0200 Subject: [PATCH 02/14] lb=AL --- unicodetools/data/ucd/dev/LineBreak.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 2674b3af0..bf6ca9252 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-06-04, 23:17:00 GMT +# Date: 2024-06-05, 00:19:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3382,6 +3382,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF00..1DF09 ; AL # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; AL # Ll [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; AL # Ll [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; AL # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI From da0798c4ae585791f409e2c79966702bcb4ffde7 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 02:22:49 +0200 Subject: [PATCH 03/14] Latin --- unicodetools/data/ucd/dev/Scripts.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index ca5678177..5536cd04f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,5 @@ +1DF3E..1DF3F ; Latin +1DFD8..1DFE8 ; Latin # Scripts-16.0.0.txt # Date: 2024-06-04, 23:17:24 GMT # © 2024 Unicode®, Inc. From c0b22dd1efb254472c836792cba190a8ee96e683 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 02:23:27 +0200 Subject: [PATCH 04/14] Diacritic for the modifier letters --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 395aa448d..7f5af09fa 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -889,6 +889,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM # ================================================ +1DFD8..1DFE8 ; Diacritic 005E ; Diacritic # Sk CIRCUMFLEX ACCENT 0060 ; Diacritic # Sk GRAVE ACCENT 00A8 ; Diacritic # Sk DIAERESIS From d246c4c6ea75099142a97d0686b591bf43aeb800 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 02:25:08 +0200 Subject: [PATCH 05/14] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 35 +++++++++---- .../ucd/dev/DerivedNormalizationProps.txt | 49 ++++++++++++++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +- .../data/ucd/dev/NormalizationTest.txt | 19 ++++++- unicodetools/data/ucd/dev/PropList.txt | 6 +-- unicodetools/data/ucd/dev/Scripts.txt | 8 +-- unicodetools/data/ucd/dev/UnicodeData.txt | 38 +++++++------- .../data/ucd/dev/VerticalOrientation.txt | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 8 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++- .../dev/extracted/DerivedCombiningClass.txt | 6 ++- .../extracted/DerivedDecompositionType.txt | 5 +- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++- .../dev/extracted/DerivedGeneralCategory.txt | 14 ++++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 ++-- .../data/ucd/dev/extracted/DerivedName.txt | 23 ++++++++- 18 files changed, 182 insertions(+), 71 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index ca6915de6..cb7d32df7 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-06-04, 23:16:36 GMT +# Date: 2024-06-05, 00:24:09 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2046,6 +2046,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1DF1F..1DF24 ; 16.0 # [6] LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH 1DF2B..1DF2C ; 16.0 # [2] LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; 16.0 # [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; 16.0 # [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR @@ -2059,6 +2061,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5193 +# Total code points: 5212 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 76b89cab2..b3eaa6393 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-06-04, 23:16:54 GMT +# Date: 2024-06-05, 00:24:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1370,6 +1370,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; Alphabetic # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Alphabetic # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Alphabetic # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1440,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142767 +# Total code points: 142786 # ================================================ @@ -2134,10 +2136,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Lowercase # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Lowercase # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2577 +# Total code points: 2579 # ================================================ @@ -2979,13 +2982,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Cased # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Cased # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4586 +# Total code points: 4588 # ================================================ @@ -3480,6 +3484,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DFD8..1DFE8 ; Case_Ignorable # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -3502,7 +3507,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2766 # ================================================ @@ -6898,6 +6903,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; ID_Start # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; ID_Start # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; ID_Start # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -6958,7 +6965,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141277 +# Total code points: 141296 # ================================================ @@ -8285,6 +8292,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; ID_Continue # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; ID_Continue # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; ID_Continue # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -8365,7 +8374,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144549 +# Total code points: 144568 # ================================================ @@ -9082,6 +9091,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; XID_Start # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; XID_Start # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; XID_Start # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -9142,7 +9153,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141254 +# Total code points: 141273 # ================================================ @@ -10470,6 +10481,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; XID_Continue # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; XID_Continue # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; XID_Continue # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -10550,7 +10563,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144530 +# Total code points: 144549 # ================================================ @@ -12684,6 +12697,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; Grapheme_Base # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Grapheme_Base # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Grapheme_Base # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -12803,7 +12818,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152746 +# Total code points: 152765 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index ce636abb5..04437fb0f 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-06-05, 00:24:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1707,6 +1707,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD8..1DFE8 ; NFKD_QC; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; NFKD_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -1753,7 +1754,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17085 +# Total code points: 17102 # ================================================ @@ -2118,6 +2119,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD8..1DFE8 ; NFKC_QC; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; NFKC_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -2164,7 +2166,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4964 +# Total code points: 4981 # ================================================ @@ -8225,6 +8227,23 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10571 # ================================================ @@ -14359,6 +14378,23 @@ FFF0..FFF8 ; NFKC_SCF; # Cn [9] ...... -# Total code points: 10516 +# Total code points: 10533 # ================================================ @@ -16246,6 +16282,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10571 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 9cccdf5e8..6bd3c51f8 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-06-04, 23:16:59 GMT +# Date: 2024-06-05, 00:24:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2467,6 +2467,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF00..1DF09 ; N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; N # Ll [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; N # Ll [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..c4cc8216c 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# Date: 2024-06-05, 00:24:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -16284,6 +16284,23 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 1D7FD;1D7FD;1D7FD;0037;0037; # (𝟽; 𝟽; 𝟽; 7; 7; ) MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;1D7FE;1D7FE;0038;0038; # (𝟾; 𝟾; 𝟾; 8; 8; ) MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;1D7FF;1D7FF;0039;0039; # (𝟿; 𝟿; 𝟿; 9; 9; ) MATHEMATICAL MONOSPACE DIGIT NINE +1DFD8;1DFD8;1DFD8;019E;019E; # (𝿘; 𝿘; 𝿘; ƞ; ƞ; ) MODIFIER LETTER SMALL N WITH LONG RIGHT LEG +1DFD9;1DFD9;1DFD9;027C;027C; # (𝿙; 𝿙; 𝿙; ɼ; ɼ; ) MODIFIER LETTER SMALL R WITH LONG LEG +1DFDA;1DFDA;1DFDA;1DF3E;1DF3E; # (𝿚; 𝿚; 𝿚; 𝼾; 𝼾; ) MODIFIER LETTER SMALL BARRED TURNED H +1DFDB;1DFDB;1DFDB;0249;0249; # (𝿛; 𝿛; 𝿛; ɉ; ɉ; ) MODIFIER LETTER SMALL J WITH STROKE +1DFDC;1DFDC;1DFDC;1D7F;1D7F; # (𝿜; 𝿜; 𝿜; ᵿ; ᵿ; ) MODIFIER LETTER SMALL UPSILON WITH STROKE +1DFDD;1DFDD;1DFDD;1DF3F;1DF3F; # (𝿝; 𝿝; 𝿝; 𝼿; 𝼿; ) MODIFIER LETTER SMALL BARRED W +1DFDE;1DFDE;1DFDE;0238;0238; # (𝿞; 𝿞; 𝿞; ȸ; ȸ; ) MODIFIER LETTER SMALL DB DIGRAPH +1DFDF;1DFDF;1DFDF;0239;0239; # (𝿟; 𝿟; 𝿟; ȹ; ȹ; ) MODIFIER LETTER SMALL QP DIGRAPH +1DFE0;1DFE0;1DFE0;1DF2B;1DF2B; # (𝿠; 𝿠; 𝿠; 𝼫; 𝼫; ) MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL +1DFE1;1DFE1;1DFE1;1DF19;1DF19; # (𝿡; 𝿡; 𝿡; 𝼙; 𝼙; ) MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK +1DFE2;1DFE2;1DFE2;0286;0286; # (𝿢; 𝿢; 𝿢; ʆ; ʆ; ) MODIFIER LETTER SMALL ESH WITH CURL +1DFE3;1DFE3;1DFE3;1D98;1D98; # (𝿣; 𝿣; 𝿣; ᶘ; ᶘ; ) MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK +1DFE4;1DFE4;1DFE4;1DF2C;1DF2C; # (𝿤; 𝿤; 𝿤; 𝼬; 𝼬; ) MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL +1DFE5;1DFE5;1DFE5;1DF1C;1DF1C; # (𝿥; 𝿥; 𝿥; 𝼜; 𝼜; ) MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK +1DFE6;1DFE6;1DFE6;0293;0293; # (𝿦; 𝿦; 𝿦; ʓ; ʓ; ) MODIFIER LETTER SMALL EZH WITH CURL +1DFE7;1DFE7;1DFE7;1D9A;1D9A; # (𝿧; 𝿧; 𝿧; ᶚ; ᶚ; ) MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK +1DFE8;1DFE8;1DFE8;1D05;1D05; # (𝿨; 𝿨; 𝿨; ᴅ; ᴅ; ) MODIFIER LETTER SMALL CAPITAL D 1E030;1E030;1E030;0430;0430; # (𞀰; 𞀰; 𞀰; а; а; ) MODIFIER LETTER CYRILLIC SMALL A 1E031;1E031;1E031;0431;0431; # (𞀱; 𞀱; 𞀱; б; б; ) MODIFIER LETTER CYRILLIC SMALL BE 1E032;1E032;1E032;0432;0432; # (𞀲; 𞀲; 𞀲; в; в; ) MODIFIER LETTER CYRILLIC SMALL VE diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 7f5af09fa..ac48e5411 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-05-08, 03:40:06 GMT +# Date: 2024-06-05, 00:24:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -889,7 +889,6 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM # ================================================ -1DFD8..1DFE8 ; Diacritic 005E ; Diacritic # Sk CIRCUMFLEX ACCENT 0060 ; Diacritic # Sk GRAVE ACCENT 00A8 ; Diacritic # Sk DIAERESIS @@ -1142,6 +1141,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1DFD8..1DFE8 ; Diacritic # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Diacritic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE @@ -1151,7 +1151,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1195 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 5536cd04f..ab219c4a8 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,7 +1,5 @@ -1DF3E..1DF3F ; Latin -1DFD8..1DFE8 ; Latin # Scripts-16.0.0.txt -# Date: 2024-06-04, 23:17:24 GMT +# Date: 2024-06-05, 00:24:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,8 +700,10 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; Latin # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Latin # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Latin # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D -# Total code points: 1495 +# Total code points: 1514 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 306c25a4e..a26f9f1e8 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,22 +1,3 @@ -1DF3E;LATIN SMALL LETTER BARRED TURNED H;Ll;0;L;;;;;N;;;;; -1DF3F;LATIN SMALL LETTER BARRED W;Ll;0;L;;;;;N;;;;; -1DFD8;MODIFIER LETTER SMALL N WITH LONG RIGHT LEG;Lm;0;L; 019E;;;;N;;;;; -1DFD9;MODIFIER LETTER SMALL R WITH LONG LEG;Lm;0;L; 027C;;;;N;;;;; -1DFDA;MODIFIER LETTER SMALL BARRED TURNED H;Lm;0;L; 1DF3E;;;;N;;;;; -1DFDB;MODIFIER LETTER SMALL J WITH STROKE;Lm;0;L; 0249;;;;N;;;;; -1DFDC;MODIFIER LETTER SMALL UPSILON WITH STROKE;Lm;0;L; 1D7F;;;;N;;;;; -1DFDD;MODIFIER LETTER SMALL BARRED W;Lm;0;L; 1DF3F;;;;N;;;;; -1DFDE;MODIFIER LETTER SMALL DB DIGRAPH;Lm;0;L; 0238;;;;N;;;;; -1DFDF;MODIFIER LETTER SMALL QP DIGRAPH;Lm;0;L; 0239;;;;N;;;;; -1DFE0;MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL;Lm;0;L; 1DF2B;;;;N;;;;; -1DFE1;MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF19;;;;N;;;;; -1DFE2;MODIFIER LETTER SMALL ESH WITH CURL;Lm;0;L; 0286;;;;N;;;;; -1DFE3;MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK;Lm;0;L; 1D98;;;;N;;;;; -1DFE4;MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL;Lm;0;L; 1DF2C;;;;N;;;;; -1DFE5;MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF1C;;;;N;;;;; -1DFE6;MODIFIER LETTER SMALL EZH WITH CURL;Lm;0;L; 0293;;;;N;;;;; -1DFE7;MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK;Lm;0;L; 1D9A;;;;N;;;;; -1DFE8;MODIFIER LETTER SMALL CAPITAL D;Lm;0;L; 1D05;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -35683,6 +35664,25 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF2A;LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF2B;LATIN SMALL LETTER DEZH DIGRAPH WITH CURL;Ll;0;L;;;;;N;;;;; 1DF2C;LATIN SMALL LETTER TESH DIGRAPH WITH CURL;Ll;0;L;;;;;N;;;;; +1DF3E;LATIN SMALL LETTER BARRED TURNED H;Ll;0;L;;;;;N;;;;; +1DF3F;LATIN SMALL LETTER BARRED W;Ll;0;L;;;;;N;;;;; +1DFD8;MODIFIER LETTER SMALL N WITH LONG RIGHT LEG;Lm;0;L; 019E;;;;N;;;;; +1DFD9;MODIFIER LETTER SMALL R WITH LONG LEG;Lm;0;L; 027C;;;;N;;;;; +1DFDA;MODIFIER LETTER SMALL BARRED TURNED H;Lm;0;L; 1DF3E;;;;N;;;;; +1DFDB;MODIFIER LETTER SMALL J WITH STROKE;Lm;0;L; 0249;;;;N;;;;; +1DFDC;MODIFIER LETTER SMALL UPSILON WITH STROKE;Lm;0;L; 1D7F;;;;N;;;;; +1DFDD;MODIFIER LETTER SMALL BARRED W;Lm;0;L; 1DF3F;;;;N;;;;; +1DFDE;MODIFIER LETTER SMALL DB DIGRAPH;Lm;0;L; 0238;;;;N;;;;; +1DFDF;MODIFIER LETTER SMALL QP DIGRAPH;Lm;0;L; 0239;;;;N;;;;; +1DFE0;MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL;Lm;0;L; 1DF2B;;;;N;;;;; +1DFE1;MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF19;;;;N;;;;; +1DFE2;MODIFIER LETTER SMALL ESH WITH CURL;Lm;0;L; 0286;;;;N;;;;; +1DFE3;MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK;Lm;0;L; 1D98;;;;N;;;;; +1DFE4;MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL;Lm;0;L; 1DF2C;;;;N;;;;; +1DFE5;MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK;Lm;0;L; 1DF1C;;;;N;;;;; +1DFE6;MODIFIER LETTER SMALL EZH WITH CURL;Lm;0;L; 0293;;;;N;;;;; +1DFE7;MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK;Lm;0;L; 1D9A;;;;N;;;;; +1DFE8;MODIFIER LETTER SMALL CAPITAL D;Lm;0;L; 1D05;;;;N;;;;; 1E000;COMBINING GLAGOLITIC LETTER AZU;Mn;230;NSM;;;;;N;;;;; 1E001;COMBINING GLAGOLITIC LETTER BUKY;Mn;230;NSM;;;;;N;;;;; 1E002;COMBINING GLAGOLITIC LETTER VEDE;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index b7b034580..3ca1533f1 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-06-04, 23:17:26 GMT +# Date: 2024-06-05, 00:25:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2319,6 +2319,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF00..1DF09 ; R # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; R # Ll [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; R # Ll [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; R # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; R # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 97b158b72..f15502572 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-06-04, 23:17:24 GMT +# Date: 2024-06-05, 00:24:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1316,10 +1316,11 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Lower # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Lower # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2530 +# Total code points: 2532 # ================================================ @@ -2526,6 +2527,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DFD8..1DFE8 ; OLetter # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2584,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136925 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 432b4ec9e..d811dff0e 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-06-04, 23:17:26 GMT +# Date: 2024-06-05, 00:25:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1300,6 +1300,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ALetter # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; ALetter # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; ALetter # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; ALetter # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1354,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33799 +# Total code points: 33818 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index f84f21ac2..b4d9f7f35 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-06-04, 23:16:52 GMT +# Date: 2024-06-05, 00:24:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1174,6 +1174,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; L # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; L # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; L # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1213,7 +1215,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815343 code points not listed here. +# The above property value applies to 815324 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 2f0a4a5b3..59c85e7f4 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-06-04, 23:16:53 GMT +# Date: 2024-06-05, 00:24:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1935,6 +1935,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF00..1DF09 ; 0 # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; 0 # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; 0 # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; 0 # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; 0 # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2059,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821573 code points not listed here. +# The above property value applies to 821554 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac..6f24a8342 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ # DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-06-05, 00:24:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -880,11 +880,12 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 10781..10785 ; Super # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Super # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Super # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DFD8..1DFE8 ; Super # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E050 ; Super # Lm [33] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL PALOCHKA 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 266 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 44989f9b8..7ee38666f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-06-04, 23:16:56 GMT +# Date: 2024-06-05, 00:24:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,6 +1964,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF00..1DF09 ; N # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; N # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; N # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2102,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761091 code points not listed here. +# The above property value applies to 761072 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 5956fb1b6..cc6924f12 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-06-04, 23:16:56 GMT +# Date: 2024-06-05, 00:24:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -632,7 +632,9 @@ FFFE..FFFF ; Cn # [2] .. 1DA8C..1DA9A ; Cn # [15] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. -1DF2D..1DFFF ; Cn # [211] .. +1DF2D..1DF3D ; Cn # [17] .. +1DF40..1DFD7 ; Cn # [152] .. +1DFE9..1DFFF ; Cn # [23] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # @@ -746,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819525 +# Total code points: 819506 # ================================================ @@ -2070,9 +2072,10 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Ll # [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; Ll # [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2266 +# Total code points: 2268 # ================================================ @@ -2166,12 +2169,13 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1DFD8..1DFE8 ; Lm # [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 421 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 4d48be822..8071f65bb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-06-04, 23:16:57 GMT +# Date: 2024-06-05, 00:24:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757645 code points not listed here. -# Total code points: 895113 +# The above property value applies to 757626 code points not listed here. +# Total code points: 895094 # ================================================ @@ -1521,6 +1521,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF00..1DF09 ; AL # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; AL # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E..1DF3F ; AL # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; AL # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; AL # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1614,7 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26698 +# Total code points: 26717 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 513929e0c..e8e3a0241 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-06-04, 23:16:57 GMT +# Date: 2024-06-05, 00:24:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -41469,6 +41469,25 @@ FFFD ; REPLACEMENT CHARACTER 1DF2A ; LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1DF2B ; LATIN SMALL LETTER DEZH DIGRAPH WITH CURL 1DF2C ; LATIN SMALL LETTER TESH DIGRAPH WITH CURL +1DF3E ; LATIN SMALL LETTER BARRED TURNED H +1DF3F ; LATIN SMALL LETTER BARRED W +1DFD8 ; MODIFIER LETTER SMALL N WITH LONG RIGHT LEG +1DFD9 ; MODIFIER LETTER SMALL R WITH LONG LEG +1DFDA ; MODIFIER LETTER SMALL BARRED TURNED H +1DFDB ; MODIFIER LETTER SMALL J WITH STROKE +1DFDC ; MODIFIER LETTER SMALL UPSILON WITH STROKE +1DFDD ; MODIFIER LETTER SMALL BARRED W +1DFDE ; MODIFIER LETTER SMALL DB DIGRAPH +1DFDF ; MODIFIER LETTER SMALL QP DIGRAPH +1DFE0 ; MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL +1DFE1 ; MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK +1DFE2 ; MODIFIER LETTER SMALL ESH WITH CURL +1DFE3 ; MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK +1DFE4 ; MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL +1DFE5 ; MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK +1DFE6 ; MODIFIER LETTER SMALL EZH WITH CURL +1DFE7 ; MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK +1DFE8 ; MODIFIER LETTER SMALL CAPITAL D 1E000 ; COMBINING GLAGOLITIC LETTER AZU 1E001 ; COMBINING GLAGOLITIC LETTER BUKY 1E002 ; COMBINING GLAGOLITIC LETTER VEDE @@ -45375,6 +45394,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155006 +# Total code points: 155025 # EOF From db75d4d38d1a908eaaf048c20fa0afee46d0b6f9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 03:07:08 +0200 Subject: [PATCH 06/14] DoNotEmit.txt lines from Kirk --- unicodetools/data/ucd/dev/DoNotEmit.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unicodetools/data/ucd/dev/DoNotEmit.txt b/unicodetools/data/ucd/dev/DoNotEmit.txt index 168c127b8..36997352e 100644 --- a/unicodetools/data/ucd/dev/DoNotEmit.txt +++ b/unicodetools/data/ucd/dev/DoNotEmit.txt @@ -426,14 +426,18 @@ 02E1 0322; 1DA9; Precomposed_Form # MODIFIER LETTER SMALL L, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL L WITH RETROFLEX HOOK 1D48 0322; 1078B; Precomposed_Form # MODIFIER LETTER SMALL D, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL D WITH TAIL 1D57 0322; 107AF; Precomposed_Form # MODIFIER LETTER SMALL T, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL T WITH RETROFLEX HOOK +1DB4 0322; 1DFE3; Precomposed_Form # MODIFIER LETTER SMALL ESH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK 1DBB 0322; 1DBC; Precomposed_Form # MODIFIER LETTER SMALL Z, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK +1DBE 0322; 1DFE7; Precomposed_Form # MODIFIER LETTER SMALL EZH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK 207F 0322; 1DAF; Precomposed_Form # SUPERSCRIPT LATIN SMALL LETTER N, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL N WITH RETROFLEX HOOK 10787 0322; 10788; Precomposed_Form # MODIFIER LETTER SMALL DZ DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL DZ DIGRAPH WITH RETROFLEX HOOK +1078A 0322; 1DFE1; Precomposed_Form # MODIFIER LETTER SMALL DEZH DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK 1078C 0322; 1078D; Precomposed_Form # MODIFIER LETTER SMALL D WITH HOOK, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL D WITH HOOK AND TAIL 1079B 0322; 1079D; Precomposed_Form # MODIFIER LETTER SMALL L WITH BELT, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL L WITH RETROFLEX HOOK AND BELT 1079E 0322; 1079F; Precomposed_Form # MODIFIER LETTER SMALL LEZH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL LEZH WITH RETROFLEX HOOK 107A6 0322; 107A7; Precomposed_Form # MODIFIER LETTER SMALL TURNED R WITH LONG LEG, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL TURNED R WITH LONG LEG AND RETROFLEX HOOK 107AC 0322; 107AD; Precomposed_Form # MODIFIER LETTER SMALL TS DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL TS DIGRAPH WITH RETROFLEX HOOK +107AE 0322; 1DFE5; Precomposed_Form # MODIFIER LETTER SMALL TESH DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK 1DF20 0322; 1DF21; Precomposed_Form # LATIN SMALL LETTER D-LEZH DIGRAPH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER D-LEZH DIGRAPH WITH RETROFLEX HOOK 1DF22 0322; 1DF23; Precomposed_Form # LATIN SMALL LETTER TL DIGRAPH WITH BELT, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER TL DIGRAPH WITH RETROFLEX HOOK AND BELT From 2ae5fa884c2936068637ab411d078450fb4df1c5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 06:45:33 +0200 Subject: [PATCH 07/14] Other_Lowercase --- unicodetools/data/ucd/dev/PropList.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ac48e5411..ca553752e 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1201,7 +1201,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND # Total code points: 59 # ================================================ - +1DFD8..1DFE8 ; Other_Lowercase 00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR 00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y From 6fcff9d448e7c916ce39c46e2e8064967dfd04a5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 06:47:34 +0200 Subject: [PATCH 08/14] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 8 +++++--- unicodetools/data/ucd/dev/PropList.txt | 7 ++++--- .../data/ucd/dev/auxiliary/SentenceBreakProperty.txt | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index b3eaa6393..e25b5b0df 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-06-05, 00:24:28 GMT +# Date: 2024-06-05, 04:46:47 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2137,10 +2137,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Lowercase # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Lowercase # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Lowercase # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2579 +# Total code points: 2596 # ================================================ @@ -2983,13 +2984,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Cased # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Cased # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Cased # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4588 +# Total code points: 4605 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ca553752e..3d496c727 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-06-05, 00:24:43 GMT +# Date: 2024-06-05, 04:47:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1201,7 +1201,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND # Total code points: 59 # ================================================ -1DFD8..1DFE8 ; Other_Lowercase + 00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR 00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y @@ -1229,9 +1229,10 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DFD8..1DFE8 ; Other_Lowercase # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 328 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index f15502572..672b3ad7d 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-06-05, 00:24:58 GMT +# Date: 2024-06-05, 04:47:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1317,10 +1317,11 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Lower # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Lower # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W +1DFD8..1DFE8 ; Lower # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2532 +# Total code points: 2549 # ================================================ @@ -2527,7 +2528,6 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DFD8..1DFE8 ; OLetter # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2586,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136925 +# Total code points: 136908 # ================================================ From b5bc73e9d7bd6b0e88f6ac471123d367884e0427 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 18:22:39 +0200 Subject: [PATCH 09/14] Millerian reshuffling --- unicodetools/data/ucd/dev/UnicodeData.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index a26f9f1e8..f4d592f0c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -35666,12 +35666,12 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF2C;LATIN SMALL LETTER TESH DIGRAPH WITH CURL;Ll;0;L;;;;;N;;;;; 1DF3E;LATIN SMALL LETTER BARRED TURNED H;Ll;0;L;;;;;N;;;;; 1DF3F;LATIN SMALL LETTER BARRED W;Ll;0;L;;;;;N;;;;; -1DFD8;MODIFIER LETTER SMALL N WITH LONG RIGHT LEG;Lm;0;L; 019E;;;;N;;;;; -1DFD9;MODIFIER LETTER SMALL R WITH LONG LEG;Lm;0;L; 027C;;;;N;;;;; -1DFDA;MODIFIER LETTER SMALL BARRED TURNED H;Lm;0;L; 1DF3E;;;;N;;;;; -1DFDB;MODIFIER LETTER SMALL J WITH STROKE;Lm;0;L; 0249;;;;N;;;;; -1DFDC;MODIFIER LETTER SMALL UPSILON WITH STROKE;Lm;0;L; 1D7F;;;;N;;;;; -1DFDD;MODIFIER LETTER SMALL BARRED W;Lm;0;L; 1DF3F;;;;N;;;;; +1DFD8;MODIFIER LETTER SMALL BARRED TURNED H;Lm;0;L; 1DF3E;;;;N;;;;; +1DFD9;MODIFIER LETTER SMALL J WITH STROKE;Lm;0;L; 0249;;;;N;;;;; +1DFDA;MODIFIER LETTER SMALL UPSILON WITH STROKE;Lm;0;L; 1D7F;;;;N;;;;; +1DFDB;MODIFIER LETTER SMALL BARRED W;Lm;0;L; 1DF3F;;;;N;;;;; +1DFDC;MODIFIER LETTER SMALL N WITH LONG RIGHT LEG;Lm;0;L; 019E;;;;N;;;;; +1DFDD;MODIFIER LETTER SMALL R WITH LONG LEG;Lm;0;L; 027C;;;;N;;;;; 1DFDE;MODIFIER LETTER SMALL DB DIGRAPH;Lm;0;L; 0238;;;;N;;;;; 1DFDF;MODIFIER LETTER SMALL QP DIGRAPH;Lm;0;L; 0239;;;;N;;;;; 1DFE0;MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL;Lm;0;L; 1DF2B;;;;N;;;;; From f170bf0fb102173e2c79c739ee1688dec681a21b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 18:27:21 +0200 Subject: [PATCH 10/14] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 4 +-- .../data/ucd/dev/DerivedCoreProperties.txt | 20 ++++++------ .../ucd/dev/DerivedNormalizationProps.txt | 32 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- .../data/ucd/dev/NormalizationTest.txt | 14 ++++---- unicodetools/data/ucd/dev/PropList.txt | 6 ++-- unicodetools/data/ucd/dev/Scripts.txt | 4 +-- .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 4 +-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 4 +-- .../ucd/dev/extracted/DerivedBidiClass.txt | 4 +-- .../dev/extracted/DerivedCombiningClass.txt | 4 +-- .../extracted/DerivedDecompositionType.txt | 4 +-- .../dev/extracted/DerivedEastAsianWidth.txt | 4 +-- .../dev/extracted/DerivedGeneralCategory.txt | 4 +-- .../ucd/dev/extracted/DerivedLineBreak.txt | 4 +-- .../data/ucd/dev/extracted/DerivedName.txt | 14 ++++---- 18 files changed, 69 insertions(+), 69 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index cb7d32df7..538b4adfb 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-06-05, 00:24:09 GMT +# Date: 2024-07-25, 16:23:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2047,7 +2047,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1DF1F..1DF24 ; 16.0 # [6] LATIN SMALL LETTER D-ETH DIGRAPH..LATIN SMALL LETTER T-THETA DIGRAPH 1DF2B..1DF2C ; 16.0 # [2] LATIN SMALL LETTER DEZH DIGRAPH WITH CURL..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; 16.0 # [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; 16.0 # [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; 16.0 # [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index e25b5b0df..7880fca2c 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-06-05, 04:46:47 GMT +# Date: 2024-07-25, 16:24:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1371,7 +1371,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; Alphabetic # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Alphabetic # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; Alphabetic # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; Alphabetic # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2137,7 +2137,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Lowercase # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Lowercase # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; Lowercase # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; Lowercase # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA @@ -2984,7 +2984,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF2C ; Cased # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Cased # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; Cased # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; Cased # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z @@ -3486,7 +3486,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 -1DFD8..1DFE8 ; Case_Ignorable # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; Case_Ignorable # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -6906,7 +6906,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; ID_Start # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; ID_Start # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; ID_Start # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; ID_Start # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -8295,7 +8295,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; ID_Continue # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; ID_Continue # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; ID_Continue # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; ID_Continue # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -9094,7 +9094,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; XID_Start # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; XID_Start # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; XID_Start # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; XID_Start # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -10484,7 +10484,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; XID_Continue # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; XID_Continue # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; XID_Continue # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; XID_Continue # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -12700,7 +12700,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF2C ; Grapheme_Base # L& [34] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER TESH DIGRAPH WITH CURL 1DF3E..1DF3F ; Grapheme_Base # L& [2] LATIN SMALL LETTER BARRED TURNED H..LATIN SMALL LETTER BARRED W -1DFD8..1DFE8 ; Grapheme_Base # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; Grapheme_Base # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index 04437fb0f..4dec3e019 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ # DerivedNormalizationProps-16.0.0.txt -# Date: 2024-06-05, 00:24:32 GMT +# Date: 2024-07-25, 16:24:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1707,7 +1707,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -1DFD8..1DFE8 ; NFKD_QC; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; NFKD_QC; N # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; NFKD_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -2119,7 +2119,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -1DFD8..1DFE8 ; NFKC_QC; N # Lm [17] MODIFIER LETTER SMALL N WITH LONG RIGHT LEG..MODIFIER LETTER SMALL CAPITAL D +1DFD8..1DFE8 ; NFKC_QC; N # Lm [17] MODIFIER LETTER SMALL BARRED TURNED H..MODIFIER LETTER SMALL CAPITAL D 1E030..1E06D ; NFKC_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -8227,12 +8227,12 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... Date: Thu, 15 Aug 2024 17:55:27 +0200 Subject: [PATCH 11/14] A test that fails sensibly. --- .../unicode/text/UCD/AdditionComparisons.txt | 75 ++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt index 9db1fdf1d..db7da1b3f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt @@ -35,7 +35,80 @@ CorrespondTo [\N{OLD HUNGARIAN SMALL LETTER A}] ## Provisionally assigned. [placeholder for draft PRs] # Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. -Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status: +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping: + +Ignoring Block: +Propertywise [ \N{LATIN SMALL LETTER BARRED TURNED H} + \N{LATIN SMALL LETTER BARRED W} + ꬳ\N{LATIN SMALL LETTER BARRED E}] AreAlike +Ignoring Unicode_1_Name: # ʱ and ɦ have Unicode_1_Names. +Propertywise [ \N{MODIFIER LETTER SMALL N WITH LONG RIGHT LEG}] + : [ƞ\N{LATIN SMALL LETTER N WITH LONG RIGHT LEG}] + : [Ƞ\N{LATIN CAPITAL LETTER N WITH LONG RIGHT LEG}] +CorrespondTo [ʱ\N{MODIFIER LETTER SMALL H WITH HOOK}] + : [ɦ\N{LATIN SMALL LETTER H WITH HOOK}] + : [Ɦ\N{LATIN CAPITAL LETTER H WITH HOOK}] +end Ignoring; +Propertywise [ \N{MODIFIER LETTER SMALL DB DIGRAPH} + \N{MODIFIER LETTER SMALL QP DIGRAPH}] + : [ȸ\N{LATIN SMALL LETTER DB DIGRAPH} + ȹ\N{LATIN SMALL LETTER QP DIGRAPH}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +Ignoring Unicode_1_Name: # ʝ has a Unicode_1_Name. +# ᶨ is not Diacritic, but this seems to be an oversight. +# See https://github.com/unicode-org/properties/issues/315. +# TODO(egg): Remove the Ignoring clause if that gets accepted. +Ignoring Diacritic: +Propertywise [ \N{MODIFIER LETTER SMALL J WITH STROKE}] + : [ɉ\N{LATIN SMALL LETTER J WITH STROKE}] + : [Ɉ\N{LATIN CAPITAL LETTER J WITH STROKE}] +CorrespondTo [ᶨ\N{MODIFIER LETTER SMALL J WITH CROSSED-TAIL}] + : [ʝ\N{LATIN SMALL LETTER J WITH CROSSED-TAIL}] + : [Ʝ\N{LATIN CAPITAL LETTER J WITH CROSSED-TAIL}] +end Ignoring; +end Ignoring; +Ignoring Unicode_1_Name: # ʆ and ʓ have Unicode_1_Names. +Propertywise [ \N{MODIFIER LETTER SMALL R WITH LONG LEG} + \N{MODIFIER LETTER SMALL ESH WITH CURL} + \N{MODIFIER LETTER SMALL EZH WITH CURL} + \N{MODIFIER LETTER SMALL CAPITAL D}] + : [ɼ\N{LATIN SMALL LETTER R WITH LONG LEG} + ʆ\N{LATIN SMALL LETTER ESH WITH CURL} + ʓ\N{LATIN SMALL LETTER EZH WITH CURL} + ᴅ\N{LATIN LETTER SMALL CAPITAL D}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +end Ignoring; +Propertywise [ \N{MODIFIER LETTER SMALL UPSILON WITH STROKE} + \N{MODIFIER LETTER SMALL ESH WITH RETROFLEX HOOK} + \N{MODIFIER LETTER SMALL EZH WITH RETROFLEX HOOK}] + : [ᵿ\N{LATIN SMALL LETTER UPSILON WITH STROKE} + ᶘ\N{LATIN SMALL LETTER ESH WITH RETROFLEX HOOK} + ᶚ\N{LATIN SMALL LETTER EZH WITH RETROFLEX HOOK}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +Propertywise [ \N{MODIFIER LETTER SMALL DEZH DIGRAPH WITH RETROFLEX HOOK} + \N{MODIFIER LETTER SMALL TESH DIGRAPH WITH RETROFLEX HOOK}] + : [𝼙\N{LATIN SMALL LETTER DEZH DIGRAPH WITH RETROFLEX HOOK} + 𝼜\N{LATIN SMALL LETTER TESH DIGRAPH WITH RETROFLEX HOOK}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +Propertywise [ \N{MODIFIER LETTER SMALL DEZH DIGRAPH WITH CURL} + \N{MODIFIER LETTER SMALL TESH DIGRAPH WITH CURL}] + : # From L2/24-051: + [ \N{LATIN SMALL LETTER DEZH DIGRAPH WITH CURL} + \N{LATIN SMALL LETTER TESH DIGRAPH WITH CURL}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +Propertywise [ \N{MODIFIER LETTER SMALL BARRED TURNED H} + \N{MODIFIER LETTER SMALL BARRED W}] + : # From the same proposal: + [ \N{LATIN SMALL LETTER BARRED TURNED H} + \N{LATIN SMALL LETTER BARRED W}] +CorrespondTo [ʶ\N{MODIFIER LETTER SMALL CAPITAL INVERTED R}] + : [ʁ\N{LATIN LETTER SMALL CAPITAL INVERTED R}] +end Ignoring; end Ignoring; From d31d80b56fc3b4da8719f54128a3ddd8a742a359 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 15 Aug 2024 17:56:35 +0200 Subject: [PATCH 12/14] Soft_Dotted --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 0f7c34d27..126f21689 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1DFD9; Soft_Dotted # PropList-16.0.0.txt # Date: 2024-08-15, 13:22:57 GMT # © 2024 Unicode®, Inc. From 854753f186bc43792533cdc5655af6bfb98e2f9d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 15 Aug 2024 17:58:47 +0200 Subject: [PATCH 13/14] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 126f21689..502aafcbb 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -1DFD9; Soft_Dotted # PropList-16.0.0.txt -# Date: 2024-08-15, 13:22:57 GMT +# Date: 2024-08-15, 15:57:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1436,10 +1435,11 @@ E0001 ; Deprecated # Cf LANGUAGE TAG 1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J 1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J 1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK +1DFD9 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH STROKE 1E04C..1E04D ; Soft_Dotted # Lm [2] MODIFIER LETTER CYRILLIC SMALL BYELORUSSIAN-UKRAINIAN I..MODIFIER LETTER CYRILLIC SMALL JE 1E068 ; Soft_Dotted # Lm CYRILLIC SUBSCRIPT SMALL LETTER BYELORUSSIAN-UKRAINIAN I -# Total code points: 50 +# Total code points: 51 # ================================================ From 69a376c10bff00dd683b6700d0f2e6dc0c7b1c38 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 15 Oct 2024 21:25:43 +0200 Subject: [PATCH 14/14] Python 3.8 reached eol (#948) --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 8317ccb34..22c9beb61 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: [3.12] steps: - uses: actions/checkout@v3