From 879a32b2d8e55176fe5736bb7732158cef7c3109 Mon Sep 17 00:00:00 2001
From: Markus Scherer If there are any errors, then the UCA implementation is not compliant. These files contain test cases that include ill-formed strings, with surrogate code points.
Implementations that do not weight surrogate code points the same way as reserved code points
- may filter out such lines lines in the test cases, before testing for conformance.Testing
© COPY_YEAR Unicode, Inc. All Rights Reserved. diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt index 96274a852..4de1b642a 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt @@ -1,5 +1,5 @@ # CollationTest_NON_IGNORABLE.txt -# Date: 2024-05-02, 01:46:26 GMT +# Date: 2024-06-05, 18:49:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -677,8 +677,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |] 10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 |] 0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |] 10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |] -10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] -10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] +0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] +10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] +0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] +10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] 0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] 10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] 0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |] @@ -693,6 +695,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |] 10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |] 0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |] 1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |] +0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |] 1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |] 0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |] 0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |] @@ -840,6 +843,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 | 10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 |] 0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |] 10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |] +0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |] 1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |] 0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |] 3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |] @@ -1022,7 +1026,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 |] 089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 |] 0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |] 10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |] -0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] +0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] +0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] 0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |] 0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |] 0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 |] @@ -1167,7 +1172,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 |] 1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] -10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] +0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] +10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] 0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] @@ -1218,7 +1224,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] 10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 |] 0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |] 10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |] -10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] +0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] +10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] 0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |] 0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |] 0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 |] @@ -1594,11 +1601,11 @@ FE63 0062; # (﹣) SMALL HYPHEN-MINUS [020D 239A | 0020 0020 | 000F 0002 |] 1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0002 |] 1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0008 |] 1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 239A | 0020 0020 | 0002 0002 |] -10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |] -10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |] -10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |] -10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |] -10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |] +10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |] +10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |] +10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |] +10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |] +10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |] 2010 0021; # (‐) HYPHEN [0214 0269 | 0020 0020 | 0002 0002 |] 2011 0021; # (‑) NON-BREAKING HYPHEN [0214 0269 | 0020 0020 | 001B 0002 |] 2010 003F; # (‐) HYPHEN [0214 0270 | 0020 0020 | 0002 0002 |] @@ -3184,8 +3191,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [0269 | 0033 0020 | 0002 0002 |] 10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0269 | 0033 0020 | 0002 0002 |] 10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [0269 | 0033 0020 | 0002 0002 |] 10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0269 | 0033 0020 | 0002 0002 |] -10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] -10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |] +10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] +10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |] 10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] 10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0269 | 0033 0020 | 0002 0002 |] 10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0269 | 0033 0020 | 0002 0002 |] @@ -3410,7 +3417,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00 089F 0021; # (࢟) ARABIC HALF MADDA OVER MADDA [0269 | 0082 0020 | 0002 0002 |] 0AFC 0021; # (ૼ) GUJARATI SIGN MADDAH [0269 | 0082 0020 | 0002 0002 |] 10EAC 0021; # (𐺬) YEZIDI COMBINING MADDA MARK [0269 | 0082 0020 | 0002 0002 |] -0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |] +0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |] 0654 0021; # (ٔ) ARABIC HAMZA ABOVE [0269 | 0083 0020 | 0002 0002 |] 10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [0269 | 0083 0020 | 0002 0002 |] 0655 0021; # (ٕ) ARABIC HAMZA BELOW [0269 | 0084 0020 | 0002 0002 |] @@ -3434,7 +3441,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00 08F9 0021; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0269 | 0096 0020 | 0002 0002 |] 08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0269 | 0097 0020 | 0002 0002 |] 0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0269 | 0098 0020 | 0002 0002 |] -10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |] +10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |] 0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0269 | 0099 0020 | 0002 0002 |] 0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [0269 | 009A 0020 | 0002 0002 |] 0731 0021; # (ܱ) SYRIAC PTHAHA BELOW [0269 | 009B 0020 | 0002 0002 |] @@ -3492,7 +3499,7 @@ A6F1 0021; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0269 | 00B7 0020 | 0002 0002 1BE6 0021; # (᯦) BATAK SIGN TOMPI [0269 | 00C2 0020 | 0002 0002 |] 1C37 0021; # (᰷) LEPCHA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [0269 | 00C2 0020 | 0002 0002 |] -10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |] +10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |] 110BA 0021; # (𑂺) KAITHI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] 11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] 111CA 0021; # (𑇊) SHARADA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] @@ -3647,7 +3654,7 @@ ABEC 0021; # (꯬) MEETEI MAYEK LUM IYEK [0269 | 00CC 0020 | 0002 0002 |] 111CB 0021; # (𑇋) SHARADA VOWEL MODIFIER MARK [0269 | 00D0 0020 | 0002 0002 |] 111CC 0021; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0269 | 00D1 0020 | 0002 0002 |] 11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [0269 | 00D2 0020 | 0002 0002 |] -10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |] +10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |] 113D2 0021; # () TULU-TIGALARI GEMINATION MARK [0269 | 00D4 0020 | 0002 0002 |] 0E4E 0021; # (๎) THAI CHARACTER YAMAKKAN [0269 | 00D5 0020 | 0002 0002 |] 0E47 0021; # (็) THAI CHARACTER MAITAIKHU [0269 | 00D6 0020 | 0002 0002 |] @@ -4855,8 +4862,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [0270 | 0033 0020 | 0002 0002 |] 10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0270 | 0033 0020 | 0002 0002 |] 10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [0270 | 0033 0020 | 0002 0002 |] 10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0270 | 0033 0020 | 0002 0002 |] -10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] -10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |] +10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] +10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |] 10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] 10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0270 | 0033 0020 | 0002 0002 |] 10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0270 | 0033 0020 | 0002 0002 |] @@ -5081,7 +5088,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00 089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [0270 | 0082 0020 | 0002 0002 |] 0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [0270 | 0082 0020 | 0002 0002 |] 10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [0270 | 0082 0020 | 0002 0002 |] -0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |] +0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |] 0654 003F; # (ٔ) ARABIC HAMZA ABOVE [0270 | 0083 0020 | 0002 0002 |] 10EAB 003F; # (𐺫) YEZIDI COMBINING HAMZA MARK [0270 | 0083 0020 | 0002 0002 |] 0655 003F; # (ٕ) ARABIC HAMZA BELOW [0270 | 0084 0020 | 0002 0002 |] @@ -5105,7 +5112,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00 08F9 003F; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0270 | 0096 0020 | 0002 0002 |] 08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0270 | 0097 0020 | 0002 0002 |] 0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0270 | 0098 0020 | 0002 0002 |] -10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |] +10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |] 0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0270 | 0099 0020 | 0002 0002 |] 0730 003F; # (ܰ) SYRIAC PTHAHA ABOVE [0270 | 009A 0020 | 0002 0002 |] 0731 003F; # (ܱ) SYRIAC PTHAHA BELOW [0270 | 009B 0020 | 0002 0002 |] @@ -5163,7 +5170,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0270 | 00B7 0020 | 0002 0002 1BE6 003F; # (᯦) BATAK SIGN TOMPI [0270 | 00C2 0020 | 0002 0002 |] 1C37 003F; # (᰷) LEPCHA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [0270 | 00C2 0020 | 0002 0002 |] -10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |] +10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |] 110BA 003F; # (𑂺) KAITHI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] 11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] 111CA 003F; # (𑇊) SHARADA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] @@ -5318,7 +5325,7 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [0270 | 00CC 0020 | 0002 0002 |] 111CB 003F; # (𑇋) SHARADA VOWEL MODIFIER MARK [0270 | 00D0 0020 | 0002 0002 |] 111CC 003F; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0270 | 00D1 0020 | 0002 0002 |] 11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [0270 | 00D2 0020 | 0002 0002 |] -10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |] +10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |] 113D2 003F; # () TULU-TIGALARI GEMINATION MARK [0270 | 00D4 0020 | 0002 0002 |] 0E4E 003F; # (๎) THAI CHARACTER YAMAKKAN [0270 | 00D5 0020 | 0002 0002 |] 0E47 003F; # (็) THAI CHARACTER MAITAIKHU [0270 | 00D6 0020 | 0002 0002 |] @@ -58942,8 +58949,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 |] 10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 |] 10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 |] 10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 |] -10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] -10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |] +10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] +10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |] 10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] 10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 |] 10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 |] @@ -59029,8 +59036,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 |] 10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 |] 10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 |] 10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 |] -10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] -10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |] +10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] +10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |] 10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] 10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 |] 10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 |] @@ -59476,8 +59483,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 |] 0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 |] 10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 |] -0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |] -0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |] +0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |] +0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |] 0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 |] 10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 |] 0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 |] @@ -59523,9 +59530,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 |] 08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 |] 0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 |] -10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |] +10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |] 0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 |] -10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |] +10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |] 0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 |] 0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 |] 0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 |] @@ -59626,7 +59633,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008 1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 |] 1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |] -10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |] +10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |] 110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] 11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] 111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] @@ -59657,7 +59664,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |] 1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 |] 1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 |] -10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |] +10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |] 110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] 11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] 111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] @@ -59950,8 +59957,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 |] 111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 |] 11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 |] 11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 |] -10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |] -10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |] +10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |] +10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |] 113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 |] 113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 |] 0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 |] @@ -62026,8 +62033,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 |] 10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 |] 10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 |] 10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 |] -10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] -10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |] +10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] +10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |] 10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] 10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 |] 10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 |] @@ -62254,7 +62261,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 |] 0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 |] 10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 |] -0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |] +0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |] 0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 |] 10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 |] 0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 |] @@ -62278,7 +62285,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 |] 08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 |] 0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 |] -10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |] +10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |] 0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 |] 0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 |] 0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 |] @@ -62336,7 +62343,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002 1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 |] 1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 |] -10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |] +10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |] 110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] 11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] 111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] @@ -62491,7 +62498,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 |] 111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 |] 111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 |] 11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 |] -10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |] +10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |] 113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 |] 0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 |] 0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 |] @@ -102854,7 +102861,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |] 113C8 0041; # () TULU-TIGALARI VOWEL SIGN AU [3329 2380 | 0020 0020 | 0002 0008 |] 113C2 113C9 0062; # () TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI AU LENGTH MARK [3329 239A | 0020 0020 | 0002 0002 |] 113C8 0062; # () TULU-TIGALARI VOWEL SIGN AU [3329 239A | 0020 0020 | 0002 0002 |] +0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |] 113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |] +0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |] 113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |] 113CE 0021; # () TULU-TIGALARI SIGN VIRAMA [332A 0269 | 0020 0020 | 0002 0002 |] 113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 0269 | 0020 0020 | 0004 0002 |] @@ -102866,6 +102875,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |] 113CF 0041; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 2380 | 0020 0020 | 0004 0008 |] 113CE 0062; # () TULU-TIGALARI SIGN VIRAMA [332A 239A | 0020 0020 | 0002 0002 |] 113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 |] +0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |] 113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |] 113D0 0021; # () TULU-TIGALARI CONJOINER [332B 0269 | 0020 0020 | 0002 0002 |] 113D0 003F; # () TULU-TIGALARI CONJOINER [332B 0270 | 0020 0020 | 0002 0002 |] @@ -136655,12 +136665,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 |] 10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 |] 10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 |] 10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 |] -10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] -10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |] -10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |] -10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |] -10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |] -10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |] +0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] +10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] +10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |] +10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |] +10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |] +10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |] +10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |] 10D70 0021; # () GARAY SMALL LETTER A [4648 0269 | 0020 0020 | 0002 0002 |] 10D50 0021; # () GARAY CAPITAL LETTER A [4648 0269 | 0020 0020 | 0008 0002 |] 10D70 003F; # () GARAY SMALL LETTER A [4648 0270 | 0020 0020 | 0002 0002 |] @@ -155814,6 +155825,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 105C8 0041; # () TODHRI LETTER DHA [5236 2380 | 0020 0020 | 0002 0008 |] 105C8 0062; # () TODHRI LETTER DHA [5236 239A | 0020 0020 | 0002 0002 |] 105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 |] +105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 |] +105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 |] 105C9 0021; # () TODHRI LETTER EI [5237 0269 | 0020 0020 | 0002 0002 |] 105D2 0307 0021; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 0269 | 0020 0020 | 0002 0002 |] 105C9 003F; # () TODHRI LETTER EI [5237 0270 | 0020 0020 | 0002 0002 |] @@ -155958,6 +155971,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 105E3 0061; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0002 |] 105E3 0041; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0008 |] 105E3 0062; # () TODHRI LETTER THA [5251 239A | 0020 0020 | 0002 0002 |] +105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 |] +105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 |] 105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 |] 105DA 0307 0021; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 0269 | 0020 0020 | 0002 0002 |] 105E4 0021; # () TODHRI LETTER U [5252 0269 | 0020 0020 | 0002 0002 |] @@ -157213,6 +157228,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 1612E 0061; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0002 |] 1612E 0041; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0008 |] 1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 |] +0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |] 1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |] 1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 0269 | 0020 0020 | 0002 0002 |] 1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 0270 | 0020 0020 | 0002 0002 |] diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt index f236ee7be..d03e4ee81 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt @@ -1,5 +1,5 @@ # CollationTest_NON_IGNORABLE_SHORT.txt -# Date: 2024-05-02, 01:46:28 GMT +# Date: 2024-06-05, 18:49:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -677,7 +677,9 @@ A67D 0334 10D26 0334 0334 10D27 10D27 0334 +0334 10D6B 10D6B 0334 +0334 10D6D 10D6D 0334 0334 10F48 10F48 0334 @@ -693,6 +695,7 @@ A67D 0334 10F84 0334 0334 1E2AE 1E2AE 0334 +0334 1E5EE 1E5EE 0334 0316 0334 0334 0316 @@ -840,6 +843,7 @@ FE27 0334 10F83 0334 0334 10F85 10F85 0334 +0334 1E5EF 1E5EF 0334 0334 3099 3099 0334 @@ -1022,6 +1026,7 @@ FB1E 0334 089F 0334 0334 10EAC 10EAC 0334 +0334 0897 0897 0334 0334 0654 0654 0334 @@ -1167,6 +1172,7 @@ A6F1 0334 1C37 0334 0334 A9B3 A9B3 0334 +0334 10D6C 10D6C 0334 0334 110BA 110BA 0334 @@ -1218,6 +1224,7 @@ A9B3 0334 10A38 0334 0334 10A3A 10A3A 0334 +0334 10D6A 10D6A 0334 0334 0E48 0E48 0334 @@ -102854,7 +102861,9 @@ A8C4 0062 113C8 0041 113C2 113C9 0062 113C8 0062 +0334 113CE 113CE 0334 +0334 113CF 113CF 0334 113CE 0021 113CF 0021 @@ -102866,6 +102875,7 @@ A8C4 0062 113CF 0041 113CE 0062 113CF 0062 +0334 113D0 113D0 0334 113D0 0021 113D0 003F @@ -136655,6 +136665,7 @@ A6EF 0062 10D4F 0061 10D4F 0041 10D4F 0062 +0334 10D69 10D69 0334 10D69 0021 10D69 003F @@ -155814,6 +155825,8 @@ A4F7 0062 105C8 0041 105C8 0062 105C9 0334 +105D2 0307 0334 +105D2 0334 0307 105C9 0021 105D2 0307 0021 105C9 003F @@ -155958,6 +155971,8 @@ A4F7 0062 105E3 0061 105E3 0041 105E3 0062 +105DA 0307 0334 +105DA 0334 0307 105E4 0334 105DA 0307 0021 105E4 0021 @@ -157213,6 +157228,7 @@ A4F7 0062 1612E 0061 1612E 0041 1612E 0062 +0334 1612F 1612F 0334 1612F 0021 1612F 003F diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt index 8aeee7c00..5ba2ce5ae 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt @@ -1,5 +1,5 @@ # CollationTest_SHIFTED.txt -# Date: 2024-05-02, 01:46:28 GMT +# Date: 2024-06-05, 18:49:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -99,9 +99,9 @@ FF0D 003F; # (-) FULLWIDTH HYPHEN-MINUS [| | | 020D 0270 |] 1806 003F; # (᠆) MONGOLIAN TODO SOFT HYPHEN [| | | 0211 0270 |] 1807 0021; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0269 |] 1807 003F; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0270 |] -10D6E 0334; # () GARAY HYPHEN [| | | 0213 |] -10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |] -10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |] +10D6E 0334; # () GARAY HYPHEN [| | | 0213 |] +10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |] +10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |] 2010 0021; # (‐) HYPHEN [| | | 0214 0269 |] 2011 0021; # (‑) NON-BREAKING HYPHEN [| | | 0214 0269 |] 2010 003F; # (‐) HYPHEN [| | | 0214 0270 |] @@ -23365,8 +23365,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0269 |] 10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0269 |] 10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0269 |] 10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0269 |] -10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] -10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |] +10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] +10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |] 10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] 10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0269 |] 10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0269 |] @@ -23452,8 +23452,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0270 |] 10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0270 |] 10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0270 |] 10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0270 |] -10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] -10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |] +10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] +10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |] 10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] 10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0270 |] 10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0270 |] @@ -24941,8 +24941,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF 10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |] 10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |] -10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] -10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] +10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] @@ -24957,6 +24959,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF 10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |] 1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] @@ -25104,6 +25107,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 | 10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] +0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |] 1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |] 3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |] @@ -25286,7 +25290,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 | 089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 | FFFF FFFF |] 0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |] 10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |] -0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] +0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] +0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] 0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |] 0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |] 0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 | FFFF FFFF |] @@ -25431,7 +25436,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 | FFF 1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |] A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |] -10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] +0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] +10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] @@ -25482,7 +25488,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF 10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 | FFFF FFFF |] 0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |] 10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |] -10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] +0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] +10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] 0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |] 0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |] 0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 | FFFF FFFF |] @@ -25825,8 +25832,8 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF 089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [| 0082 | 0002 | FFFF 0270 |] 0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [| 0082 | 0002 | FFFF 0270 |] 10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [| 0082 | 0002 | FFFF 0270 |] -0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |] -0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |] +0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |] +0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |] 0654 0021; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0269 |] 10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 0083 | 0002 | FFFF 0269 |] 0654 003F; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0270 |] @@ -25872,10 +25879,10 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF 08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0269 |] 08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0270 |] 0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0269 |] -10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |] +10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |] 0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0270 |] -10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |] -10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |] +10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |] +10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |] 0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0269 |] 0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0270 |] 0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [| 009A | 0002 | FFFF 0269 |] @@ -25981,7 +25988,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 00B7 | 0002 | FFFF 0270 |] 1BE6 0021; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0269 |] 1C37 0021; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |] -10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |] +10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |] 110BA 0021; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] 11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] 111CA 0021; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] @@ -26012,7 +26019,7 @@ A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |] 1BE6 003F; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0270 |] 1C37 003F; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0270 |] -10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |] +10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |] 110BA 003F; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] 11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] 111CA 003F; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] @@ -26373,8 +26380,8 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [| 00CC | 0002 | FFFF 0270 |] 11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0269 |] 11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0270 |] 11A98 0334; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 004A | 0002 0002 | FFFF FFFF |] -10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |] -10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |] +10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |] +10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |] 113D2 0021; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0269 |] 113D2 003F; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0270 |] 113D2 0334; # () TULU-TIGALARI GEMINATION MARK [| 00D4 004A | 0002 0002 | FFFF FFFF |] @@ -37850,7 +37857,7 @@ FF0D 0061; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0002 | 020D FFFF |] 1B60 0061; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0002 | 0210 FFFF |] 1806 0061; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0002 | 0211 FFFF |] 1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0002 | 0212 FFFF |] -10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |] +10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |] 2010 0061; # (‐) HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |] 2011 0061; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |] 2012 0061; # (‒) FIGURE DASH [2380 | 0020 | 0002 | 0215 FFFF |] @@ -47089,7 +47096,7 @@ FF0D 0041; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0008 | 020D FFFF |] 1B60 0041; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0008 | 0210 FFFF |] 1806 0041; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0008 | 0211 FFFF |] 1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0008 | 0212 FFFF |] -10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |] +10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |] 2010 0041; # (‐) HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |] 2011 0041; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |] 2012 0041; # (‒) FIGURE DASH [2380 | 0020 | 0008 | 0215 FFFF |] @@ -56794,8 +56801,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 | FF 10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] @@ -56881,8 +56888,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 | FF 10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] -10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] -10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] +10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] +10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] @@ -57328,8 +57335,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] 0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] 10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] -0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |] -0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |] +0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |] +0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |] 0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |] 10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |] 0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 | FFFF FFFF |] @@ -57375,9 +57382,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 | FFFF FFFF |] 08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 | FFFF FFFF |] 0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] -10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] +10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] 0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] -10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] +10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] 0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 | FFFF FFFF |] 0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 | FFFF FFFF |] 0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 | FFFF FFFF |] @@ -57478,7 +57485,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008 1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] -10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] +10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] @@ -57509,7 +57516,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF 1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] -10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] +10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] @@ -57802,8 +57809,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 | FFFF FF 111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 | FFFF FFFF |] 11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 | FFFF FFFF |] 11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 | FFFF FFFF |] -10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |] -10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |] +10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |] +10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |] 113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 | FFFF FFFF |] 113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 | FFFF FFFF |] 0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 | FFFF FFFF |] @@ -58638,7 +58645,7 @@ FF0D 0062; # (-) FULLWIDTH HYPHEN-MINUS [239A | 0020 | 0002 | 020D FFFF |] 1B60 0062; # (᭠) BALINESE PAMENENG [239A | 0020 | 0002 | 0210 FFFF |] 1806 0062; # (᠆) MONGOLIAN TODO SOFT HYPHEN [239A | 0020 | 0002 | 0211 FFFF |] 1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [239A | 0020 | 0002 | 0212 FFFF |] -10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |] +10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |] 2010 0062; # (‐) HYPHEN [239A | 0020 | 0002 | 0214 FFFF |] 2011 0062; # (‑) NON-BREAKING HYPHEN [239A | 0020 | 0002 | 0214 FFFF |] 2012 0062; # (‒) FIGURE DASH [239A | 0020 | 0002 | 0215 FFFF |] @@ -68041,8 +68048,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 | FF 10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] @@ -68269,7 +68276,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] 0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] 10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] -0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |] +0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |] 0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 | FFFF FFFF |] 10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 | FFFF FFFF |] 0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 | FFFF FFFF |] @@ -68293,7 +68300,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 | FFFF FFFF |] 08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 | FFFF FFFF |] 0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] -10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] +10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] 0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 | FFFF FFFF |] 0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 | FFFF FFFF |] 0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 | FFFF FFFF |] @@ -68351,7 +68358,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002 1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] -10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] +10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] @@ -68506,7 +68513,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 | FFFF FF 111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 | FFFF FFFF |] 111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 | FFFF FFFF |] 11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 | FFFF FFFF |] -10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |] +10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |] 113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 | FFFF FFFF |] 0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 | FFFF FFFF |] 0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 | FFFF FFFF |] @@ -110915,7 +110922,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F 113CE 003F; # () TULU-TIGALARI SIGN VIRAMA [332A | 0020 | 0002 | FFFF 0270 |] 113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0269 |] 113CF 003F; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0270 |] +0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |] 113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |] +0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |] 113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |] 113CE 0061; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 113CE 0041; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] @@ -110925,6 +110934,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F 113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 | FFFF FFFF |] 113D0 0021; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0269 |] 113D0 003F; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0270 |] +0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |] 113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |] 113D0 0061; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 113D0 0041; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] @@ -147061,12 +147071,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 | FFFF 10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] 10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] -10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |] -10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |] -10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] -10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] -10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] -10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] +10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |] +10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |] +0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] +10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] +10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] +10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] +10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] 10D70 0021; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0269 |] 10D70 003F; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0270 |] 10D50 0021; # () GARAY CAPITAL LETTER A [4648 | 0020 | 0008 | FFFF 0269 |] @@ -167946,6 +167957,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 105D2 0307 003F; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 | 0002 | FFFF 0270 |] 105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] 105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] +105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] 105C9 0061; # () TODHRI LETTER EI [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105D2 0307 0061; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105D2 0591 0307 0061; # (֑) TODHRI LETTER I, HEBREW ACCENT ETNAHTA, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] @@ -168117,6 +168129,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 105DA 0307 003F; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 | 0002 | FFFF 0270 |] 105E4 003F; # () TODHRI LETTER U [5252 | 0020 | 0002 | FFFF 0270 |] 105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] +105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] 105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] 105DA 0307 0061; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105E4 0061; # () TODHRI LETTER U [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] @@ -169610,6 +169623,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] 1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0269 |] 1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0270 |] +0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |] 1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |] 1612F 0061; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 1612F 0041; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt index be9ccae62..4d1117edc 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt @@ -1,5 +1,5 @@ # CollationTest_SHIFTED_SHORT.txt -# Date: 2024-05-02, 01:46:29 GMT +# Date: 2024-06-05, 18:49:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -24941,7 +24941,9 @@ A67D 0334 10D26 0334 0334 10D27 10D27 0334 +0334 10D6B 10D6B 0334 +0334 10D6D 10D6D 0334 0334 10F48 10F48 0334 @@ -24957,6 +24959,7 @@ A67D 0334 10F84 0334 0334 1E2AE 1E2AE 0334 +0334 1E5EE 1E5EE 0334 0316 0334 0334 0316 @@ -25104,6 +25107,7 @@ FE27 0334 10F83 0334 0334 10F85 10F85 0334 +0334 1E5EF 1E5EF 0334 0334 3099 3099 0334 @@ -25286,6 +25290,7 @@ FB1E 0334 089F 0334 0334 10EAC 10EAC 0334 +0334 0897 0897 0334 0334 0654 0654 0334 @@ -25431,6 +25436,7 @@ A6F1 0334 1C37 0334 0334 A9B3 A9B3 0334 +0334 10D6C 10D6C 0334 0334 110BA 110BA 0334 @@ -25482,6 +25488,7 @@ A9B3 0334 10A38 0334 0334 10A3A 10A3A 0334 +0334 10D6A 10D6A 0334 0334 0E48 0E48 0334 @@ -110915,7 +110922,9 @@ A8C4 0062 113CE 003F 113CF 0021 113CF 003F +0334 113CE 113CE 0334 +0334 113CF 113CF 0334 113CE 0061 113CE 0041 @@ -110925,6 +110934,7 @@ A8C4 0062 113CF 0062 113D0 0021 113D0 003F +0334 113D0 113D0 0334 113D0 0061 113D0 0041 @@ -147063,6 +147073,7 @@ A6EF 0062 10D4F 0062 10D69 0021 10D69 003F +0334 10D69 10D69 0334 10D69 0061 10D69 0041 @@ -167946,6 +167957,7 @@ A4F7 0062 105D2 0307 003F 105C9 0334 105D2 0307 0334 +105D2 0334 0307 105C9 0061 105D2 0307 0061 105D2 0591 0307 0061 @@ -168117,6 +168129,7 @@ A4F7 0062 105DA 0307 003F 105E4 003F 105DA 0307 0334 +105DA 0334 0307 105E4 0334 105DA 0307 0061 105E4 0061 @@ -169610,6 +169623,7 @@ A4F7 0062 1612E 0062 1612F 0021 1612F 003F +0334 1612F 1612F 0334 1612F 0061 1612F 0041