diff --git a/.github/workflows/cache_retain.yml b/.github/workflows/cache_retain.yml index 471d25a21..8cfc236d1 100644 --- a/.github/workflows/cache_retain.yml +++ b/.github/workflows/cache_retain.yml @@ -30,6 +30,10 @@ jobs: retain-maven-cache: name: Run all tests with Maven runs-on: ubuntu-latest + # Only run this on the upstream repo. Otherwise, running in a personal fork will cause + # Github to disable the personal fork copy of the workflow + # (Github complains about running a scheduled workflow on a repo with > 60 days of inactivity) + if: github.ref == 'refs/heads/main' && github.repository == 'unicode-org/unicodetools' steps: - name: Checkout and setup uses: actions/checkout@v2 diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 3c9e0ca80..6f71f9214 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -482,6 +482,7 @@ 088C; TAH WITH 3 DOTS BELOW; D; TAH 088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF 088E; VERTICAL TAIL; R; VERTICAL TAIL +088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON 0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group 0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group @@ -850,6 +851,8 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF +10EC6; THIN NOON; D; THIN NOON +10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH # Sogdian Characters diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index be2a535d2..13a98f8d0 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -228,6 +228,7 @@ FFF0..FFFF; Specials 108E0..108FF; Hatran 10900..1091F; Phoenician 10920..1093F; Lydian +10940..1095C; Sidetic 10980..1099F; Meroitic Hieroglyphs 109A0..109FF; Meroitic Cursive 10A00..10A5F; Kharoshthi diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 1b7a9c156..533abab63 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ -# CaseFolding-16.0.0.txt -# Date: 2024-04-30, 21:48:11 GMT +# CaseFolding-17.0.0.txt +# Date: 2024-11-13, 22:03:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN +A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 93332092c..6eba838ff 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-10-16, 14:21:33 GMT +# Date: 2024-11-14, 18:37:36 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2065,8 +2065,27 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) +088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE +09FF ; 17.0 # BENGALI LETTER SANSKRIT BA +0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE +0C5C ; 17.0 # TELUGU ARCHAIC SHRII +0CDC ; 17.0 # KANNADA ARCHAIC SHRII +1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE +2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE +A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE +A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN +A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S +FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH +FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH +10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 +10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O -# Total code points: 8 +# Total code points: 115 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 48f7433c8..c46405c3a 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-16, 14:22:01 GMT +# Date: 2024-11-14, 18:38:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -273,8 +273,8 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 01BC..01BF ; Alphabetic # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Alphabetic # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Alphabetic # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Alphabetic # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Alphabetic # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Alphabetic # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; Alphabetic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; Alphabetic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; Alphabetic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -344,7 +344,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Alphabetic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897 ; Alphabetic # Mn ARABIC PEPET 08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH @@ -386,6 +386,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 09E2..09E3 ; Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL 09F0..09F1 ; Alphabetic # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Alphabetic # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; Alphabetic # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; Alphabetic # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; Alphabetic # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -477,7 +478,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Alphabetic # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU @@ -497,7 +498,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Alphabetic # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -833,11 +834,8 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Alphabetic # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Alphabetic # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1020,6 +1018,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 108F4..108F5 ; Alphabetic # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Alphabetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A @@ -1053,7 +1052,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; Alphabetic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFC ; Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1447,7 +1448,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142767 +# Total code points: 142810 # ================================================ @@ -1601,7 +1602,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 024B ; Lowercase # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lowercase # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lowercase # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lowercase # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lowercase # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2079,13 +2080,14 @@ A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lowercase # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lowercase # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2145,7 +2147,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2570 # ================================================ @@ -2756,7 +2758,10 @@ A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Uppercase # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA @@ -2809,7 +2814,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1978 +# Total code points: 1981 # ================================================ @@ -2827,7 +2832,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 01BC..01BF ; Cased # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C4..0293 ; Cased # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Cased # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Cased # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Cased # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Cased # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Cased # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2917,11 +2922,8 @@ A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Cased # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Cased # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2994,7 +2996,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4582 # ================================================ @@ -3109,7 +3111,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0B3F ; Case_Ignorable # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Case_Ignorable # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Case_Ignorable # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Case_Ignorable # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Case_Ignorable # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Case_Ignorable # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Case_Ignorable # Mn TAMIL SIGN ANUSVARA 0BC0 ; Case_Ignorable # Mn TAMIL VOWEL SIGN II @@ -3200,7 +3202,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Case_Ignorable # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Case_Ignorable # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3280,7 +3283,7 @@ A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE.. A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN -A7F2..A7F4 ; Case_Ignorable # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Case_Ignorable # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA @@ -3356,7 +3359,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; Case_Ignorable # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFA..10EFF ; Case_Ignorable # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3514,7 +3518,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2754 +# Total code points: 2787 # ================================================ @@ -4119,7 +4123,10 @@ A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA @@ -4138,7 +4145,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1460 +# Total code points: 1463 # ================================================ @@ -4756,7 +4763,10 @@ A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Uppercased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA @@ -4778,7 +4788,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1552 +# Total code points: 1555 # ================================================ @@ -5395,7 +5405,10 @@ A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Titlecased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA @@ -5417,7 +5430,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1479 +# Total code points: 1482 # ================================================ @@ -6031,7 +6044,10 @@ A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA @@ -6053,7 +6069,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1533 +# Total code points: 1536 # ================================================ @@ -6165,9 +6181,7 @@ A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I -A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7B0..A7DC ; Changes_When_Casemapped # L& [45] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -6194,7 +6208,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2981 +# Total code points: 2987 # ================================================ @@ -6219,8 +6233,8 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 01BC..01BF ; ID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -6268,7 +6282,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -6289,6 +6303,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 09DF..09E1 ; ID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -6336,7 +6351,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -6345,7 +6360,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -6570,11 +6585,8 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -6711,6 +6723,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A @@ -6738,6 +6751,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6971,7 +6986,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141310 # ================================================ @@ -7000,8 +7015,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 01BC..01BF ; ID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -7077,7 +7092,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH @@ -7125,6 +7140,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 09F0..09F1 ; ID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; ID_Continue # Mn BENGALI SANDHI MARK +09FF ; ID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; ID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; ID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; ID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -7185,7 +7201,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0B47..0B48 ; ID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; ID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; ID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; ID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; ID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; ID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; ID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; ID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -7227,7 +7243,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -7249,7 +7265,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -7466,7 +7482,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; ID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; ID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -7655,11 +7672,8 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -7866,6 +7880,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 108F4..108F5 ; ID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A @@ -7904,7 +7919,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFF ; ID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8385,7 +8402,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144549 +# Total code points: 144621 # ================================================ @@ -8408,8 +8425,8 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -8456,7 +8473,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -8477,6 +8494,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; XID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -8524,7 +8542,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -8533,7 +8551,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -8757,11 +8775,8 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; XID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; XID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -8903,6 +8918,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A @@ -8930,6 +8946,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; XID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; XID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9163,7 +9181,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141287 # ================================================ @@ -9189,8 +9207,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -9265,7 +9283,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH @@ -9313,6 +9331,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; XID_Continue # Mn BENGALI SANDHI MARK +09FF ; XID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -9373,7 +9392,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; XID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; XID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -9415,7 +9434,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -9437,7 +9456,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -9654,7 +9673,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; XID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; XID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -9842,11 +9862,8 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; XID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; XID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -10059,6 +10076,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A @@ -10097,7 +10115,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; XID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFF ; XID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -10578,7 +10598,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144530 +# Total code points: 144602 # ================================================ @@ -10701,7 +10721,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ........ -# Total code points: 10554 +# Total code points: 10558 # ================================================ @@ -11582,11 +11587,15 @@ A7C7 ; NFKC_SCF; A7C8 # L& LATIN CAPITAL LETTER D WITH A7C9 ; NFKC_SCF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB ; NFKC_SCF; 0264 # L& LATIN CAPITAL LETTER RAMS HORN A7CC ; NFKC_SCF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; NFKC_SCF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_SCF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; NFKC_SCF; A7D3 # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; NFKC_SCF; A7D5 # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_SCF; A7DB # L& LATIN CAPITAL LETTER LAMBDA A7DC ; NFKC_SCF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; NFKC_SCF; 0073 # Lm MODIFIER LETTER CAPITAL S A7F2 ; NFKC_SCF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_SCF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_SCF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -15282,7 +15291,7 @@ E0080..E00FF ; NFKC_SCF; # Cn [128] .... -# Total code points: 10516 +# Total code points: 10520 # ================================================ @@ -16052,12 +16061,15 @@ A7C2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER ANG A7C4..A7C7 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK @@ -16300,6 +16312,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] .... -# Total code points: 10554 +# Total code points: 10558 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index dd739bbdb..d3582c914 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-06-06, 10:00:03 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-11-14, 18:38:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,8 +180,8 @@ 0252..0260 ; N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0261 ; A # Ll LATIN SMALL LETTER SCRIPT G 0262..0293 ; N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C4 ; A # Sk MODIFIER LETTER UP ARROWHEAD @@ -332,7 +332,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -391,6 +391,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -454,7 +455,7 @@ 0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; N # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; N # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; N # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -502,7 +503,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -528,7 +529,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -806,7 +807,8 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1351,8 +1353,7 @@ 2B55 ; W # So HEAVY LARGE CIRCLE 2B56..2B59 ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V @@ -1548,11 +1549,8 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1709,13 +1707,15 @@ FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1904,6 +1904,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -1964,7 +1965,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index 47ee96fd4..b71569272 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ -# IndicPositionalCategory-16.0.0.txt -# Date: 2024-06-06, 10:00:04 GMT +# IndicPositionalCategory-17.0.0.txt +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -426,7 +426,7 @@ AABB..AABC ; Visual_Order_Left # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL 0AFA..0AFF ; Top # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Top # Mn ORIYA SIGN CANDRABINDU 0B3F ; Top # Mn ORIYA VOWEL SIGN I -0B55..0B56 ; Top # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Top # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B82 ; Top # Mn TAMIL SIGN ANUSVARA 0BC0 ; Top # Mn TAMIL VOWEL SIGN II 0BCD ; Top # Mn TAMIL SIGN VIRAMA diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index b5ceef710..6c068793d 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ -# IndicSyllabicCategory-16.0.0.txt -# Date: 2024-06-06, 10:00:04 GMT +# IndicSyllabicCategory-17.0.0.txt +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -525,7 +525,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 0B41..0B44 ; Vowel_Dependent # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; Vowel_Dependent # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Vowel_Dependent # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B55..0B56 ; Vowel_Dependent # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Vowel_Dependent # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Vowel_Dependent # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Vowel_Dependent # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0BBE..0BBF ; Vowel_Dependent # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I @@ -820,6 +820,7 @@ AA74..AA76 ; Consonant_Placeholder # Lo [3] MYANMAR LOGOGRAM KHAMTI OAY..MY 09DC..09DD ; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF ; Consonant # Lo BENGALI LETTER YYA 09F0..09F1 ; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FF ; Consonant # Lo BENGALI LETTER SANSKRIT BA 0A15..0A28 ; Consonant # Lo [20] GURMUKHI LETTER KA..GURMUKHI LETTER NA 0A2A..0A30 ; Consonant # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 0A32..0A33 ; Consonant # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 1acb714fd..6cedc4b29 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-16, 14:22:09 GMT +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -132,8 +132,8 @@ 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; AL # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; AL # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -278,7 +278,7 @@ 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -338,6 +338,7 @@ 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN 09FE ; CM # Mn BENGALI SANDHI MARK +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; CM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; CM # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -401,7 +402,7 @@ 0B47..0B48 ; CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; CM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; CM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; CM # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; AL # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; AL # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -449,7 +450,7 @@ 0C4A..0C4D ; CM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; CM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; CM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -475,7 +476,7 @@ 0CCA..0CCB ; CM # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; CM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; CM # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -776,7 +777,9 @@ 1AA8..1AAD ; SA # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B05..1B33 ; AK # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1317,8 +1320,7 @@ 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; AL # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V @@ -1574,11 +1576,8 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2537,13 +2536,15 @@ FB43..FB44 ; HL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; HL # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; CL # Pe ORNATE LEFT PARENTHESIS FD3F ; OP # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; PO # Sc RIAL SIGN FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -2758,6 +2759,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1091F ; BA # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; AL # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; AL # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -2820,7 +2822,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..c5e7583c7 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ -# NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# NormalizationTest-17.0.0.txt +# Date: 2024-11-14, 16:08:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2429,6 +2429,7 @@ FEFA 0334;FEFA 0334;FEFA 0334;0644 0625 0334;0644 0627 0334 0655; # (ﻺ◌̴; A69C;A69C;A69C;044A;044A; # (ꚜ; ꚜ; ꚜ; ъ; ъ; ) MODIFIER LETTER CYRILLIC HARD SIGN A69D;A69D;A69D;044C;044C; # (ꚝ; ꚝ; ꚝ; ь; ь; ) MODIFIER LETTER CYRILLIC SOFT SIGN A770;A770;A770;A76F;A76F; # (ꝰ; ꝰ; ꝰ; ꝯ; ꝯ; ) MODIFIER LETTER US +A7F1;A7F1;A7F1;0053;0053; # (꟱; ꟱; ꟱; S; S; ) MODIFIER LETTER CAPITAL S A7F2;A7F2;A7F2;0043;0043; # (ꟲ; ꟲ; ꟲ; C; C; ) MODIFIER LETTER CAPITAL C A7F3;A7F3;A7F3;0046;0046; # (ꟳ; ꟳ; ꟳ; F; F; ) MODIFIER LETTER CAPITAL F A7F4;A7F4;A7F4;0051;0051; # (ꟴ; ꟴ; ꟴ; Q; Q; ) MODIFIER LETTER CAPITAL Q @@ -18098,6 +18099,60 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1ACD 0315 0300 05AE 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062; # (a◌ᫍ◌̕◌̀◌֮b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER INSULAR R, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1ACE 0062;00E0 05AE 1ACE 0315 0062;0061 05AE 0300 1ACE 0315 0062;00E0 05AE 1ACE 0315 0062;0061 05AE 0300 1ACE 0315 0062; # (a◌̕◌̀◌֮◌ᫎb; à◌֮◌ᫎ◌̕b; a◌֮◌̀◌ᫎ◌̕b; à◌֮◌ᫎ◌̕b; a◌֮◌̀◌ᫎ◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LATIN SMALL LETTER INSULAR T, LATIN SMALL LETTER B 0061 1ACE 0315 0300 05AE 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062; # (a◌ᫎ◌̕◌̀◌֮b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER INSULAR T, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ACF 0062;00E0 05AE 1ACF 0315 0062;0061 05AE 0300 1ACF 0315 0062;00E0 05AE 1ACF 0315 0062;0061 05AE 0300 1ACF 0315 0062; # (a◌̕◌̀◌֮◌᫏b; à◌֮◌᫏◌̕b; a◌֮◌̀◌᫏◌̕b; à◌֮◌᫏◌̕b; a◌֮◌̀◌᫏◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOUBLE CARON, LATIN SMALL LETTER B +0061 1ACF 0315 0300 05AE 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062; # (a◌᫏◌̕◌̀◌֮b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOUBLE CARON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD0 0062;00E0 05AE 1AD0 0315 0062;0061 05AE 0300 1AD0 0315 0062;00E0 05AE 1AD0 0315 0062;0061 05AE 0300 1AD0 0315 0062; # (a◌̕◌̀◌֮◌᫐b; à◌֮◌᫐◌̕b; a◌֮◌̀◌᫐◌̕b; à◌֮◌᫐◌̕b; a◌֮◌̀◌᫐◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-ACUTE, LATIN SMALL LETTER B +0061 1AD0 0315 0300 05AE 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062; # (a◌᫐◌̕◌̀◌֮b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-ACUTE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD1 0062;00E0 05AE 1AD1 0315 0062;0061 05AE 0300 1AD1 0315 0062;00E0 05AE 1AD1 0315 0062;0061 05AE 0300 1AD1 0315 0062; # (a◌̕◌̀◌֮◌᫑b; à◌֮◌᫑◌̕b; a◌֮◌̀◌᫑◌̕b; à◌֮◌᫑◌̕b; a◌֮◌̀◌᫑◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GRAVE-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD1 0315 0300 05AE 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062; # (a◌᫑◌̕◌̀◌֮b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRAVE-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD2 0062;00E0 05AE 1AD2 0315 0062;0061 05AE 0300 1AD2 0315 0062;00E0 05AE 1AD2 0315 0062;0061 05AE 0300 1AD2 0315 0062; # (a◌̕◌̀◌֮◌᫒b; à◌֮◌᫒◌̕b; a◌֮◌̀◌᫒◌̕b; à◌֮◌᫒◌̕b; a◌֮◌̀◌᫒◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-GRAVE, LATIN SMALL LETTER B +0061 1AD2 0315 0300 05AE 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062; # (a◌᫒◌̕◌̀◌֮b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD3 0062;00E0 05AE 1AD3 0315 0062;0061 05AE 0300 1AD3 0315 0062;00E0 05AE 1AD3 0315 0062;0061 05AE 0300 1AD3 0315 0062; # (a◌̕◌̀◌֮◌᫓b; à◌֮◌᫓◌̕b; a◌֮◌̀◌᫓◌̕b; à◌֮◌᫓◌̕b; a◌֮◌̀◌᫓◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING ACUTE-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD3 0315 0300 05AE 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062; # (a◌᫓◌̕◌̀◌֮b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING ACUTE-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD4 0062;00E0 05AE 1AD4 0315 0062;0061 05AE 0300 1AD4 0315 0062;00E0 05AE 1AD4 0315 0062;0061 05AE 0300 1AD4 0315 0062; # (a◌̕◌̀◌֮◌᫔b; à◌֮◌᫔◌̕b; a◌֮◌̀◌᫔◌̕b; à◌֮◌᫔◌̕b; a◌֮◌̀◌᫔◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-MACRON, LATIN SMALL LETTER B +0061 1AD4 0315 0300 05AE 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062; # (a◌᫔◌̕◌̀◌֮b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-MACRON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD5 0062;00E0 05AE 1AD5 0315 0062;0061 05AE 0300 1AD5 0315 0062;00E0 05AE 1AD5 0315 0062;0061 05AE 0300 1AD5 0315 0062; # (a◌̕◌̀◌֮◌᫕b; à◌֮◌᫕◌̕b; a◌֮◌̀◌᫕◌̕b; à◌֮◌᫕◌̕b; a◌֮◌̀◌᫕◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MACRON-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD5 0315 0300 05AE 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062; # (a◌᫕◌̕◌̀◌֮b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MACRON-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD6 0062;00E0 05AE 1AD6 0315 0062;0061 05AE 0300 1AD6 0315 0062;00E0 05AE 1AD6 0315 0062;0061 05AE 0300 1AD6 0315 0062; # (a◌̕◌̀◌֮◌᫖b; à◌֮◌᫖◌̕b; a◌֮◌̀◌᫖◌̕b; à◌֮◌᫖◌̕b; a◌֮◌̀◌᫖◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-ACUTE-GRAVE, LATIN SMALL LETTER B +0061 1AD6 0315 0300 05AE 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062; # (a◌᫖◌̕◌̀◌֮b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-ACUTE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD7 0062;00E0 05AE 1AD7 0315 0062;0061 05AE 0300 1AD7 0315 0062;00E0 05AE 1AD7 0315 0062;0061 05AE 0300 1AD7 0315 0062; # (a◌̕◌̀◌֮◌᫗b; à◌֮◌᫗◌̕b; a◌֮◌̀◌᫗◌̕b; à◌֮◌᫗◌̕b; a◌֮◌̀◌᫗◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-GRAVE-ACUTE, LATIN SMALL LETTER B +0061 1AD7 0315 0300 05AE 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062; # (a◌᫗◌̕◌̀◌֮b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-GRAVE-ACUTE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD8 0062;00E0 05AE 1AD8 0315 0062;0061 05AE 0300 1AD8 0315 0062;00E0 05AE 1AD8 0315 0062;0061 05AE 0300 1AD8 0315 0062; # (a◌̕◌̀◌֮◌᫘b; à◌֮◌᫘◌̕b; a◌֮◌̀◌᫘◌̕b; à◌֮◌᫘◌̕b; a◌֮◌̀◌᫘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MACRON-ACUTE-GRAVE, LATIN SMALL LETTER B +0061 1AD8 0315 0300 05AE 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062; # (a◌᫘◌̕◌̀◌֮b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MACRON-ACUTE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD9 0062;00E0 05AE 1AD9 0315 0062;0061 05AE 0300 1AD9 0315 0062;00E0 05AE 1AD9 0315 0062;0061 05AE 0300 1AD9 0315 0062; # (a◌̕◌̀◌֮◌᫙b; à◌֮◌᫙◌̕b; a◌֮◌̀◌᫙◌̕b; à◌֮◌᫙◌̕b; a◌֮◌̀◌᫙◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SHARP SIGN, LATIN SMALL LETTER B +0061 1AD9 0315 0300 05AE 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062; # (a◌᫙◌̕◌̀◌֮b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SHARP SIGN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADA 0062;00E0 05AE 1ADA 0315 0062;0061 05AE 0300 1ADA 0315 0062;00E0 05AE 1ADA 0315 0062;0061 05AE 0300 1ADA 0315 0062; # (a◌̕◌̀◌֮◌᫚b; à◌֮◌᫚◌̕b; a◌֮◌̀◌᫚◌̕b; à◌֮◌᫚◌̕b; a◌֮◌̀◌᫚◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING FLAT SIGN, LATIN SMALL LETTER B +0061 1ADA 0315 0300 05AE 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062; # (a◌᫚◌̕◌̀◌֮b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING FLAT SIGN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADB 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062; # (a◌̕◌̀◌֮◌᫛b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOWN TACK ABOVE, LATIN SMALL LETTER B +0061 1ADB 0315 0300 05AE 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062; # (a◌᫛◌̕◌̀◌֮b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOWN TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADC 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062; # (a◌̕◌̀◌֮◌᫜b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DIAERESIS WITH RAISED LEFT DOT, LATIN SMALL LETTER B +0061 1ADC 0315 0300 05AE 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062; # (a◌᫜◌̕◌̀◌֮b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DIAERESIS WITH RAISED LEFT DOT, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 1ADD 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062; # (a◌֚◌̖◌᷺◌᫝b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOT-AND-RING BELOW, LATIN SMALL LETTER B +0061 1ADD 059A 0316 1DFA 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062; # (a◌᫝◌֚◌̖◌᷺b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOT-AND-RING BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE0 0062;00E0 05AE 1AE0 0315 0062;0061 05AE 0300 1AE0 0315 0062;00E0 05AE 1AE0 0315 0062;0061 05AE 0300 1AE0 0315 0062; # (a◌̕◌̀◌֮◌᫠b; à◌֮◌᫠◌̕b; a◌֮◌̀◌᫠◌̕b; à◌֮◌᫠◌̕b; a◌֮◌̀◌᫠◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT TACK ABOVE, LATIN SMALL LETTER B +0061 1AE0 0315 0300 05AE 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062; # (a◌᫠◌̕◌̀◌֮b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE1 0062;00E0 05AE 1AE1 0315 0062;0061 05AE 0300 1AE1 0315 0062;00E0 05AE 1AE1 0315 0062;0061 05AE 0300 1AE1 0315 0062; # (a◌̕◌̀◌֮◌᫡b; à◌֮◌᫡◌̕b; a◌֮◌̀◌᫡◌̕b; à◌֮◌᫡◌̕b; a◌֮◌̀◌᫡◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING RIGHT TACK ABOVE, LATIN SMALL LETTER B +0061 1AE1 0315 0300 05AE 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062; # (a◌᫡◌̕◌̀◌֮b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING RIGHT TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE2 0062;00E0 05AE 1AE2 0315 0062;0061 05AE 0300 1AE2 0315 0062;00E0 05AE 1AE2 0315 0062;0061 05AE 0300 1AE2 0315 0062; # (a◌̕◌̀◌֮◌᫢b; à◌֮◌᫢◌̕b; a◌֮◌̀◌᫢◌̕b; à◌֮◌᫢◌̕b; a◌֮◌̀◌᫢◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MINUS SIGN ABOVE, LATIN SMALL LETTER B +0061 1AE2 0315 0300 05AE 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062; # (a◌᫢◌̕◌̀◌֮b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MINUS SIGN ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE3 0062;00E0 05AE 1AE3 0315 0062;0061 05AE 0300 1AE3 0315 0062;00E0 05AE 1AE3 0315 0062;0061 05AE 0300 1AE3 0315 0062; # (a◌̕◌̀◌֮◌᫣b; à◌֮◌᫣◌̕b; a◌֮◌̀◌᫣◌̕b; à◌֮◌᫣◌̕b; a◌֮◌̀◌᫣◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING INVERTED BRIDGE ABOVE, LATIN SMALL LETTER B +0061 1AE3 0315 0300 05AE 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062; # (a◌᫣◌̕◌̀◌֮b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING INVERTED BRIDGE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE4 0062;00E0 05AE 1AE4 0315 0062;0061 05AE 0300 1AE4 0315 0062;00E0 05AE 1AE4 0315 0062;0061 05AE 0300 1AE4 0315 0062; # (a◌̕◌̀◌֮◌᫤b; à◌֮◌᫤◌̕b; a◌֮◌̀◌᫤◌̕b; à◌֮◌᫤◌̕b; a◌֮◌̀◌᫤◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SQUARE ABOVE, LATIN SMALL LETTER B +0061 1AE4 0315 0300 05AE 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062; # (a◌᫤◌̕◌̀◌֮b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SQUARE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE5 0062;00E0 05AE 1AE5 0315 0062;0061 05AE 0300 1AE5 0315 0062;00E0 05AE 1AE5 0315 0062;0061 05AE 0300 1AE5 0315 0062; # (a◌̕◌̀◌֮◌᫥b; à◌֮◌᫥◌̕b; a◌֮◌̀◌᫥◌̕b; à◌֮◌᫥◌̕b; a◌֮◌̀◌᫥◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SEAGULL ABOVE, LATIN SMALL LETTER B +0061 1AE5 0315 0300 05AE 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062; # (a◌᫥◌̕◌̀◌֮b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SEAGULL ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 1AE6 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062; # (a◌֚◌̖◌᷺◌᫦b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOUBLE ARCH BELOW, LATIN SMALL LETTER B +0061 1AE6 059A 0316 1DFA 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062; # (a◌᫦◌֚◌̖◌᷺b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOUBLE ARCH BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE7 0062;00E0 05AE 1AE7 0315 0062;0061 05AE 0300 1AE7 0315 0062;00E0 05AE 1AE7 0315 0062;0061 05AE 0300 1AE7 0315 0062; # (a◌̕◌̀◌֮◌᫧b; à◌֮◌᫧◌̕b; a◌֮◌̀◌᫧◌̕b; à◌֮◌᫧◌̕b; a◌֮◌̀◌᫧◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOUBLE ARCH ABOVE, LATIN SMALL LETTER B +0061 1AE7 0315 0300 05AE 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062; # (a◌᫧◌̕◌̀◌֮b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOUBLE ARCH ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE8 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062; # (a◌̕◌̀◌֮◌᫨b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING EQUALS SIGN ABOVE, LATIN SMALL LETTER B +0061 1AE8 0315 0300 05AE 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062; # (a◌᫨◌̕◌̀◌֮b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING EQUALS SIGN ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE9 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062; # (a◌̕◌̀◌֮◌᫩b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ANGLE CENTRED ABOVE, LATIN SMALL LETTER B +0061 1AE9 0315 0300 05AE 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062; # (a◌᫩◌̕◌̀◌֮b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT ANGLE CENTRED ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AEA 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062; # (a◌̕◌̀◌֮◌᫪b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING UPWARDS ARROW ABOVE, LATIN SMALL LETTER B +0061 1AEA 0315 0300 05AE 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062; # (a◌᫪◌̕◌̀◌֮b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING UPWARDS ARROW ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0345 035D 035C 1AEB 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062; # (a◌ͅ◌͝◌͜◌᫫b; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; ) LATIN SMALL LETTER A, COMBINING GREEK YPOGEGRAMMENI, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, COMBINING DOUBLE RIGHTWARDS ARROW ABOVE, LATIN SMALL LETTER B +0061 1AEB 0345 035D 035C 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062; # (a◌᫫◌ͅ◌͝◌͜b; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; ) LATIN SMALL LETTER A, COMBINING DOUBLE RIGHTWARDS ARROW ABOVE, COMBINING GREEK YPOGEGRAMMENI, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, LATIN SMALL LETTER B 0061 3099 093C 16FF0 1B34 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062; # (a◌゙◌𖿰़◌᬴b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, BALINESE SIGN REREKAN, LATIN SMALL LETTER B 0061 1B34 3099 093C 16FF0 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062; # (a◌᬴◌゙◌𖿰़b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; ) LATIN SMALL LETTER A, BALINESE SIGN REREKAN, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, LATIN SMALL LETTER B 0061 05B0 094D 3099 1B44 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062; # (a◌ְ◌्◌゙᭄b; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, BALINESE ADEG ADEG, LATIN SMALL LETTER B @@ -18646,6 +18701,10 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFA 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062; # (a◌֚◌̖◌᷺◌𐻺b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC DOUBLE VERTICAL BAR BELOW, LATIN SMALL LETTER B +0061 10EFA 059A 0316 1DFA 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062; # (a◌𐻺◌֚◌̖◌᷺b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC DOUBLE VERTICAL BAR BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFB 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062; # (a◌֚◌̖◌᷺◌𐻻b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON, LATIN SMALL LETTER B +0061 10EFB 059A 0316 1DFA 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062; # (a◌𐻻◌֚◌̖◌᷺b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B 0061 10EFD 059A 0316 1DFA 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062; # (a◌𐻽◌֚◌̖◌᷺b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW WORD SAKTA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFE 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062; # (a◌֚◌̖◌᷺◌𐻾b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD QASR, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 992698ee3..d41c02c99 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-10-16, 14:22:22 GMT +# Date: 2024-11-14, 18:38:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,7 +702,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EFA..10EFC ; Other_Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -864,7 +864,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1503 +# Total code points: 1505 # ================================================ @@ -953,7 +953,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B3C ; Diacritic # Mn ORIYA SIGN NUKTA 0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA -0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE +0B53..0B55 ; Diacritic # Mn [3] ORIYA SIGN DOT ABOVE..ORIYA SIGN OVERLINE 0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA 0C3C ; Diacritic # Mn TELUGU SIGN NUKTA 0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA @@ -996,6 +996,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY 1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT +1ACF..1ADD ; Diacritic # Mn [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Diacritic # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B34 ; Diacritic # Mn BALINESE SIGN REREKAN 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG @@ -1040,6 +1042,7 @@ A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F1 ; Diacritic # Lm MODIFIER LETTER CAPITAL S A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA @@ -1083,6 +1086,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK 10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EFA ; Diacritic # Mn ARABIC DOUBLE VERTICAL BAR BELOW 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1156,7 +1160,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1209 # ================================================ @@ -1166,7 +1170,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 07FA ; Extender # Lm NKO LAJANYALAN 0A71 ; Extender # Mn GURMUKHI ADDAK 0AFB ; Extender # Mn GUJARATI SIGN SHADDA -0B55 ; Extender # Mn ORIYA SIGN OVERLINE +0B54..0B55 ; Extender # Mn [2] ORIYA SIGN DOUBLE DOT ABOVE..ORIYA SIGN OVERLINE 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA 180A ; Extender # Po MONGOLIAN NIRUGU @@ -1203,7 +1207,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 59 +# Total code points: 60 # ================================================ @@ -1226,7 +1230,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Other_Lowercase # Lm MODIFIER LETTER US -A7F2..A7F4 ; Other_Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Other_Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W @@ -1236,7 +1240,7 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 312 # ================================================ @@ -1815,9 +1819,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR 2B74..2B75 ; Pattern_Syntax # Cn [2] .. -2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B96 ; Pattern_Syntax # Cn -2B97..2BFF ; Pattern_Syntax # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Pattern_Syntax # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index db30068ea..aee447d3e 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-10-16, 14:22:25 GMT +# Date: 2024-11-14, 18:38:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -424,6 +424,7 @@ blk; Sharada_Sup ; Sharada_Supplement blk; Shavian ; Shavian blk; Shorthand_Format_Controls ; Shorthand_Format_Controls blk; Siddham ; Siddham +blk; Sidetic ; Sidetic blk; Sinhala ; Sinhala blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers blk; Small_Forms ; Small_Form_Variants @@ -1090,6 +1091,7 @@ jg ; Taw ; Taw jg ; Teh_Marbuta ; Teh_Marbuta jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal jg ; Teth ; Teth +jg ; Thin_Noon ; Thin_Noon jg ; Thin_Yeh ; Thin_Yeh jg ; Vertical_Tail ; Vertical_Tail jg ; Waw ; Waw @@ -1440,6 +1442,7 @@ sc ; Sgnw ; SignWriting sc ; Shaw ; Shavian sc ; Shrd ; Sharada sc ; Sidd ; Siddham +sc ; Sidt ; Sidetic sc ; Sind ; Khudawadi sc ; Sinh ; Sinhala sc ; Sogd ; Sogdian diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 140901a87..27e16cbe4 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ -# ScriptExtensions-16.0.0.txt -# Date: 2024-07-30, 19:38:00 GMT +# ScriptExtensions-17.0.0.txt +# Date: 2024-10-16, 17:28:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 756559f21..90997ecbf 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ -# Scripts-16.0.0.txt -# Date: 2024-06-06, 10:00:28 GMT +# Scripts-17.0.0.txt +# Date: 2024-11-14, 18:38:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -306,8 +306,7 @@ 2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Common # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET @@ -633,7 +632,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9053 +# Total code points: 9054 # ================================================ @@ -648,8 +647,8 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Latin # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Latin # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN @@ -676,11 +675,8 @@ A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Latin # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Latin # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Latin # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Latin # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -702,7 +698,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1487 +# Total code points: 1492 # ================================================ @@ -869,7 +865,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE 0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; Arabic # Sk ARABIC RAISED ROUND DOT -0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Arabic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -878,11 +874,13 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; Arabic # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; Arabic # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; Arabic # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; Arabic # Sc RIAL SIGN FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -890,7 +888,11 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; Arabic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; Arabic # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; Arabic # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -926,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1373 +# Total code points: 1413 # ================================================ @@ -1014,8 +1016,9 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; Bengali # Po BENGALI ABBREVIATION SIGN 09FE ; Bengali # Mn BENGALI SANDHI MARK +09FF ; Bengali # Lo BENGALI LETTER SANSKRIT BA -# Total code points: 96 +# Total code points: 97 # ================================================ @@ -1092,7 +1095,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Oriya # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Oriya # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Oriya # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -1102,7 +1105,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0B71 ; Oriya # Lo ORIYA LETTER WA 0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS -# Total code points: 91 +# Total code points: 93 # ================================================ @@ -1155,7 +1158,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Telugu # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -1163,7 +1166,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; Telugu # So TELUGU SIGN TUUMU -# Total code points: 100 +# Total code points: 101 # ================================================ @@ -1186,14 +1189,14 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Kannada # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0CF3 ; Kannada # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -# Total code points: 91 +# Total code points: 92 # ================================================ @@ -1647,7 +1650,8 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT 1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Inherited # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Inherited # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Inherited # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL @@ -1676,7 +1680,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 657 +# Total code points: 684 # ================================================ @@ -3131,4 +3135,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 80 +# ================================================ + +10940..1095C ; Sidetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 + +# Total code points: 29 + # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 5a9a7897e..fcf03d716 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -659,7 +659,7 @@ 0292;LATIN SMALL LETTER EZH;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH;;01B7;;01B7 0293;LATIN SMALL LETTER EZH WITH CURL;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH CURL;;;; 0294;LATIN LETTER GLOTTAL STOP;Lo;0;L;;;;;N;;;;; -0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; +0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Lo;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; 0296;LATIN LETTER INVERTED GLOTTAL STOP;Ll;0;L;;;;;N;;;;; 0297;LATIN LETTER STRETCHED C;Ll;0;L;;;;;N;;;;; 0298;LATIN LETTER BILABIAL CLICK;Ll;0;L;;;;;N;LATIN LETTER BULLSEYE;;;; @@ -2121,6 +2121,7 @@ 088C;ARABIC LETTER TAH WITH THREE DOTS BELOW;Lo;0;AL;;;;;N;;;;; 088D;ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;; +088F;ARABIC LETTER NOON WITH RING ABOVE;Lo;0;AL;;;;;N;;;;; 0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;; @@ -2452,6 +2453,7 @@ 09FC;BENGALI LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;; 09FD;BENGALI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 09FE;BENGALI SANDHI MARK;Mn;230;NSM;;;;;N;;;;; +09FF;BENGALI LETTER SANSKRIT BA;Lo;0;L;;;;;N;;;;; 0A01;GURMUKHI SIGN ADAK BINDI;Mn;0;NSM;;;;;N;;;;; 0A02;GURMUKHI SIGN BINDI;Mn;0;NSM;;;;;N;;;;; 0A03;GURMUKHI SIGN VISARGA;Mc;0;L;;;;;N;;;;; @@ -2686,6 +2688,8 @@ 0B4B;ORIYA VOWEL SIGN O;Mc;0;L;0B47 0B3E;;;;N;;;;; 0B4C;ORIYA VOWEL SIGN AU;Mc;0;L;0B47 0B57;;;;N;;;;; 0B4D;ORIYA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +0B53;ORIYA SIGN DOT ABOVE;Mn;0;NSM;;;;;N;;;;; +0B54;ORIYA SIGN DOUBLE DOT ABOVE;Mn;0;NSM;;;;;N;;;;; 0B55;ORIYA SIGN OVERLINE;Mn;0;NSM;;;;;N;;;;; 0B56;ORIYA AI LENGTH MARK;Mn;0;NSM;;;;;N;;;;; 0B57;ORIYA AU LENGTH MARK;Mc;0;L;;;;;N;;;;; @@ -2862,6 +2866,7 @@ 0C58;TELUGU LETTER TSA;Lo;0;L;;;;;N;;;;; 0C59;TELUGU LETTER DZA;Lo;0;L;;;;;N;;;;; 0C5A;TELUGU LETTER RRRA;Lo;0;L;;;;;N;;;;; +0C5C;TELUGU ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0C5D;TELUGU LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0C60;TELUGU LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; 0C61;TELUGU LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; @@ -2958,6 +2963,7 @@ 0CCD;KANNADA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; 0CD5;KANNADA LENGTH MARK;Mc;0;L;;;;;N;;;;; 0CD6;KANNADA AI LENGTH MARK;Mc;0;L;;;;;N;;;;; +0CDC;KANNADA ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0CDD;KANNADA LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0CDE;KANNADA LETTER FA;Lo;0;L;;;;;N;;;;; 0CE0;KANNADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; @@ -6137,6 +6143,33 @@ 1ACC;COMBINING LATIN SMALL LETTER INSULAR G;Mn;230;NSM;;;;;N;;;;; 1ACD;COMBINING LATIN SMALL LETTER INSULAR R;Mn;230;NSM;;;;;N;;;;; 1ACE;COMBINING LATIN SMALL LETTER INSULAR T;Mn;230;NSM;;;;;N;;;;; +1ACF;COMBINING DOUBLE CARON;Mn;230;NSM;;;;;N;;;;; +1AD0;COMBINING VERTICAL-LINE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD1;COMBINING GRAVE-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD2;COMBINING VERTICAL-LINE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD3;COMBINING ACUTE-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD4;COMBINING VERTICAL-LINE-MACRON;Mn;230;NSM;;;;;N;;;;; +1AD5;COMBINING MACRON-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD6;COMBINING VERTICAL-LINE-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD7;COMBINING VERTICAL-LINE-GRAVE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD8;COMBINING MACRON-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD9;COMBINING SHARP SIGN;Mn;230;NSM;;;;;N;;;;; +1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; +1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1ADC;COMBINING DIAERESIS WITH RAISED LEFT DOT;Mn;230;NSM;;;;;N;;;;; +1ADD;COMBINING DOT-AND-RING BELOW;Mn;220;NSM;;;;;N;;;;; +1AE0;COMBINING LEFT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE1;COMBINING RIGHT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE2;COMBINING MINUS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE3;COMBINING INVERTED BRIDGE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE4;COMBINING SQUARE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE5;COMBINING SEAGULL ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE6;COMBINING DOUBLE ARCH BELOW;Mn;220;NSM;;;;;N;;;;; +1AE7;COMBINING DOUBLE ARCH ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE8;COMBINING EQUALS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE9;COMBINING LEFT ANGLE CENTRED ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEA;COMBINING UPWARDS ARROW ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEB;COMBINING DOUBLE RIGHTWARDS ARROW ABOVE;Mn;234;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; @@ -10239,6 +10272,7 @@ 2B93;NEWLINE RIGHT;So;0;ON;;;;;N;;;;; 2B94;FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE;So;0;ON;;;;;N;;;;; 2B95;RIGHTWARDS BLACK ARROW;So;0;ON;;;;;N;;;;; +2B96;EQUALS SIGN WITH INFINITY ABOVE;So;0;ON;;;;;N;;;;; 2B97;SYMBOL FOR TYPE A ELECTRONICS;So;0;ON;;;;;N;;;;; 2B98;THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; 2B99;THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; @@ -14274,10 +14308,14 @@ A7CA;LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY;Ll;0;L;;;;;N;;;A7C9;;A7C9 A7CB;LATIN CAPITAL LETTER RAMS HORN;Lu;0;L;;;;;N;;;;0264; A7CC;LATIN CAPITAL LETTER S WITH DIAGONAL STROKE;Lu;0;L;;;;;N;;;;A7CD; A7CD;LATIN SMALL LETTER S WITH DIAGONAL STROKE;Ll;0;L;;;;;N;;;A7CC;;A7CC +A7CE;LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE;Lu;0;L;;;;;N;;;;A7CF; +A7CF;LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;;;A7CE;;A7CE A7D0;LATIN CAPITAL LETTER CLOSED INSULAR G;Lu;0;L;;;;;N;;;;A7D1; A7D1;LATIN SMALL LETTER CLOSED INSULAR G;Ll;0;L;;;;;N;;;A7D0;;A7D0 -A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;;; -A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;;; +A7D2;LATIN CAPITAL LETTER DOUBLE THORN;Lu;0;L;;;;;N;;;;A7D3; +A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;A7D2;;A7D2 +A7D4;LATIN CAPITAL LETTER DOUBLE WYNN;Lu;0;L;;;;;N;;;;A7D5; +A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;A7D4;;A7D4 A7D6;LATIN CAPITAL LETTER MIDDLE SCOTS S;Lu;0;L;;;;;N;;;;A7D7; A7D7;LATIN SMALL LETTER MIDDLE SCOTS S;Ll;0;L;;;;;N;;;A7D6;;A7D6 A7D8;LATIN CAPITAL LETTER SIGMOID S;Lu;0;L;;;;;N;;;;A7D9; @@ -14285,6 +14323,7 @@ A7D9;LATIN SMALL LETTER SIGMOID S;Ll;0;L;;;;;N;;;A7D8;;A7D8 A7DA;LATIN CAPITAL LETTER LAMBDA;Lu;0;L;;;;;N;;;;A7DB; A7DB;LATIN SMALL LETTER LAMBDA;Ll;0;L;;;;;N;;;A7DA;;A7DA A7DC;LATIN CAPITAL LETTER LAMBDA WITH STROKE;Lu;0;L;;;;;N;;;;019B; +A7F1;MODIFIER LETTER CAPITAL S;Lm;0;L; 0053;;;;N;;;;; A7F2;MODIFIER LETTER CAPITAL C;Lm;0;L; 0043;;;;N;;;;; A7F3;MODIFIER LETTER CAPITAL F;Lm;0;L; 0046;;;;N;;;;; A7F4;MODIFIER LETTER CAPITAL Q;Lm;0;L; 0051;;;;N;;;;; @@ -15925,6 +15964,22 @@ FBBF;ARABIC SYMBOL RING;Sk;0;AL;;;;;N;;;;; FBC0;ARABIC SYMBOL SMALL TAH ABOVE;Sk;0;AL;;;;;N;;;;; FBC1;ARABIC SYMBOL SMALL TAH BELOW;Sk;0;AL;;;;;N;;;;; FBC2;ARABIC SYMBOL WASLA ABOVE;Sk;0;AL;;;;;N;;;;; +FBC3;ARABIC LIGATURE JALLA WA-ALAA;So;0;ON;;;;;N;;;;; +FBC4;ARABIC LIGATURE DAAMAT BARAKAATUHUM;So;0;ON;;;;;N;;;;; +FBC5;ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH;So;0;ON;;;;;N;;;;; +FBC6;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM;So;0;ON;;;;;N;;;;; +FBC7;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA;So;0;ON;;;;;N;;;;; +FBC8;ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBC9;ARABIC LIGATURE RAHIMAHUMAA ALLAAH;So;0;ON;;;;;N;;;;; +FBCA;ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCB;ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM;So;0;ON;;;;;N;;;;; +FBCC;ARABIC LIGATURE HAFIZAHU ALLAAH;So;0;ON;;;;;N;;;;; +FBCD;ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCE;ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCF;ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBD0;ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM;So;0;ON;;;;;N;;;;; +FBD1;ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FBD2;ARABIC LIGATURE ALAYHI AR-RAHMAH;So;0;ON;;;;;N;;;;; FBD3;ARABIC LETTER NG ISOLATED FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD4;ARABIC LETTER NG FINAL FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD5;ARABIC LETTER NG INITIAL FORM;Lo;0;AL; 06AD;;;;N;;;;; @@ -16370,6 +16425,8 @@ FD8C;ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM;Lo;0;AL; 0645 FD8D;ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062C 0645;;;;N;;;;; FD8E;ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM;Lo;0;AL; 0645 062E 062C;;;;N;;;;; FD8F;ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062E 0645;;;;N;;;;; +FD90;ARABIC LIGATURE RAHMATU ALLAAHI ALAYH;So;0;ON;;;;;N;;;;; +FD91;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA;So;0;ON;;;;;N;;;;; FD92;ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM;Lo;0;AL; 0645 062C 062E;;;;N;;;;; FD93;ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM;Lo;0;AL; 0647 0645 062C;;;;N;;;;; FD94;ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0647 0645 0645;;;;N;;;;; @@ -16424,6 +16481,13 @@ FDC4;ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0639 FDC5;ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0635 0645 0645;;;;N;;;;; FDC6;ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM;Lo;0;AL; 0633 062E 064A;;;;N;;;;; FDC7;ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM;Lo;0;AL; 0646 062C 064A;;;;N;;;;; +FDC8;ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA;So;0;ON;;;;;N;;;;; +FDC9;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH;So;0;ON;;;;;N;;;;; +FDCA;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA;So;0;ON;;;;;N;;;;; +FDCB;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA;So;0;ON;;;;;N;;;;; +FDCC;ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM;So;0;ON;;;;;N;;;;; +FDCD;ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FDCE;ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH;So;0;ON;;;;;N;;;;; FDCF;ARABIC LIGATURE SALAAMUHU ALAYNAA;So;0;ON;;;;;N;;;;; FDF0;ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0635 0644 06D2;;;;N;;;;; FDF1;ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0642 0644 06D2;;;;N;;;;; @@ -18708,6 +18772,35 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10938;LYDIAN LETTER NN;Lo;0;R;;;;;N;;;;; 10939;LYDIAN LETTER C;Lo;0;R;;;;;N;;;;; 1093F;LYDIAN TRIANGULAR MARK;Po;0;R;;;;;N;;;;; +10940;SIDETIC LETTER N01;Lo;0;R;;;;;N;;;;; +10941;SIDETIC LETTER N02;Lo;0;R;;;;;N;;;;; +10942;SIDETIC LETTER N03;Lo;0;R;;;;;N;;;;; +10943;SIDETIC LETTER N04;Lo;0;R;;;;;N;;;;; +10944;SIDETIC LETTER N05;Lo;0;R;;;;;N;;;;; +10945;SIDETIC LETTER N06;Lo;0;R;;;;;N;;;;; +10946;SIDETIC LETTER N07;Lo;0;R;;;;;N;;;;; +10947;SIDETIC LETTER N08;Lo;0;R;;;;;N;;;;; +10948;SIDETIC LETTER N09;Lo;0;R;;;;;N;;;;; +10949;SIDETIC LETTER N10;Lo;0;R;;;;;N;;;;; +1094A;SIDETIC LETTER N11;Lo;0;R;;;;;N;;;;; +1094B;SIDETIC LETTER N12;Lo;0;R;;;;;N;;;;; +1094C;SIDETIC LETTER N13;Lo;0;R;;;;;N;;;;; +1094D;SIDETIC LETTER N14;Lo;0;R;;;;;N;;;;; +1094E;SIDETIC LETTER N15;Lo;0;R;;;;;N;;;;; +1094F;SIDETIC LETTER N16;Lo;0;R;;;;;N;;;;; +10950;SIDETIC LETTER N17;Lo;0;R;;;;;N;;;;; +10951;SIDETIC LETTER N18;Lo;0;R;;;;;N;;;;; +10952;SIDETIC LETTER N19;Lo;0;R;;;;;N;;;;; +10953;SIDETIC LETTER N20;Lo;0;R;;;;;N;;;;; +10954;SIDETIC LETTER N21;Lo;0;R;;;;;N;;;;; +10955;SIDETIC LETTER N22;Lo;0;R;;;;;N;;;;; +10956;SIDETIC LETTER N23;Lo;0;R;;;;;N;;;;; +10957;SIDETIC LETTER N24;Lo;0;R;;;;;N;;;;; +10958;SIDETIC LETTER N25;Lo;0;R;;;;;N;;;;; +10959;SIDETIC LETTER N26;Lo;0;R;;;;;N;;;;; +1095A;SIDETIC LETTER N27;Lo;0;R;;;;;N;;;;; +1095B;SIDETIC LETTER N28;Lo;0;R;;;;;N;;;;; +1095C;SIDETIC LETTER N29;Lo;0;R;;;;;N;;;;; 10980;MEROITIC HIEROGLYPHIC LETTER A;Lo;0;R;;;;;N;;;;; 10981;MEROITIC HIEROGLYPHIC LETTER E;Lo;0;R;;;;;N;;;;; 10982;MEROITIC HIEROGLYPHIC LETTER I;Lo;0;R;;;;;N;;;;; @@ -19541,6 +19634,20 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; +10EC6;ARABIC LETTER THIN NOON;Lo;0;AL;;;;;N;;;;; +10EC7;ARABIC LETTER YEH WITH FOUR DOTS BELOW;Lo;0;AL;;;;;N;;;;; +10ED0;ARABIC BIBLICAL END OF VERSE;Po;0;ON;;;;;N;;;;; +10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED4;ARABIC LIGATURE QADDASA ALLAAHU SIRRAH;So;0;ON;;;;;N;;;;; +10ED5;ARABIC LIGATURE QUDDISA SIRRAHUM;So;0;ON;;;;;N;;;;; +10ED6;ARABIC LIGATURE QUDDISA SIRRAHUMAA;So;0;ON;;;;;N;;;;; +10ED7;ARABIC LIGATURE QUDDISAT ASRAARUHUM;So;0;ON;;;;;N;;;;; +10ED8;ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH;So;0;ON;;;;;N;;;;; +10EFA;ARABIC DOUBLE VERTICAL BAR BELOW;Mn;220;NSM;;;;;N;;;;; +10EFB;ARABIC SMALL LOW NOON;Mn;220;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index c2dd08ceb..cc5f22a4e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-06-06, 10:00:30 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-11-14, 18:38:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -140,8 +140,8 @@ 01C0..01C3 ; R # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; R # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; R # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; R # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; R # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; R # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; R # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; R # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; R # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; R # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -266,7 +266,7 @@ 0860..086A ; R # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; R # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; R # Sk ARABIC RAISED ROUND DOT -0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; R # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -325,6 +325,7 @@ 09FC ; R # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; R # Po BENGALI ABBREVIATION SIGN 09FE ; R # Mn BENGALI SANDHI MARK +09FF ; R # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; R # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; R # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; R # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -388,7 +389,7 @@ 0B47..0B48 ; R # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; R # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; R # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; R # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; R # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; R # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; R # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; R # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -436,7 +437,7 @@ 0C4A..0C4D ; R # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; R # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; R # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; R # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; R # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; R # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; R # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; R # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -462,7 +463,7 @@ 0CCA..0CCB ; R # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; R # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; R # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; R # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; R # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; R # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; R # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; R # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -740,7 +741,8 @@ 1AA8..1AAD ; R # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; R # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; R # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; R # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; R # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; R # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; R # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; R # Mc BALINESE SIGN BISAH 1B05..1B33 ; R # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1092,7 +1094,7 @@ 2B4D..2B4F ; R # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B50..2B59 ; U # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; R # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; R # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B76..2B96 ; R # So [33] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..EQUALS SIGN WITH INFINITY ABOVE 2B97 ; U # So SYMBOL FOR TYPE A ELECTRONICS 2B98..2BB7 ; R # So [32] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..RIBBON ARROW RIGHT DOWN 2BB8..2BD1 ; U # So [26] UPWARDS WHITE ARROW FROM BAR WITH HORIZONTAL BAR..UNCERTAINTY SIGN @@ -1356,11 +1358,8 @@ A788 ; R # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; R # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; R # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; R # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; R # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; R # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; R # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; R # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; R # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; R # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; R # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; R # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1521,13 +1520,15 @@ FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; R # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; R # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; R # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; R # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; R # Pe ORNATE LEFT PARENTHESIS FD3F ; R # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; R # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; R # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; R # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; R # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; R # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; R # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; R # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; R # Sc RIAL SIGN FDFD..FDFF ; R # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1724,6 +1725,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1091F ; R # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; U # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; R # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -1784,7 +1786,11 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; R # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; R # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; R # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index a38742220..38d8320b2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-17.0.0.txt -# Date: 2024-10-16, 14:22:09 GMT +# Date: 2024-11-14, 18:38:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -145,7 +145,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0B3F ; Extend # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -243,7 +243,8 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -339,7 +340,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -498,7 +499,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2203 +# Total code points: 2234 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html index 405d0078c..619182f0b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html @@ -7,7 +7,7 @@

Grapheme_Cluster_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:11:44 GMT

+

Date: 2024-10-30, 21:25:11 GMT

This page illustrates the application of the Grapheme_Cluster_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

After the heavy blue line in the table are additional rows, either with different sample characters or for sequences. Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of LVT and T shows ×, with the rule 8.0. Checking below the table, rule 8.0 is “( LVT | T) × T”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -39,7 +39,7 @@

Table

Other÷÷÷÷×÷÷×÷÷÷÷÷×÷÷÷÷××××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule GB9a is given the number 9.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule GB9a is given the number 9.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules, see UAX #29.

@@ -294,6 +294,14 @@

Sample Strings

◌्     + +
0.2sot ÷
0.3÷ eot
36 +     +◌ૻ   +◌્   +   +◌ૻ   +

diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt index d10c174b6..4e55634d7 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt @@ -1,5 +1,5 @@ -# GraphemeBreakTest-16.0.0.txt -# Date: 2024-05-02, 15:02:48 GMT +# GraphemeBreakTest-17.0.0.txt +# Date: 2024-10-30, 21:25:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1115,7 +1115,8 @@ ÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0AB8 × 0AFB × 0ACD × 0AB8 × 0AFB ÷ # ÷ [0.2] GUJARATI LETTER SA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] GUJARATI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] GUJARATI LETTER SA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] # -# Lines: 1093 +# Lines: 1094 # # EOF diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html index 4cfb8f6d9..745622907 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html @@ -7,7 +7,7 @@

Line_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:11:46 GMT

+

Date: 2024-10-30, 21:25:12 GMT

This page illustrates the application of the Line_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of H3 and JT shows ×, with the rule 26.03. Checking below the table, rule 26.03 is “JT | H3 × JT”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -67,9 +67,9 @@

Table

BA_NotEastAsian_NonEastAsianBA_Hyphen××××÷÷÷÷×÷×÷÷÷÷÷××××××÷÷×××÷××÷×÷×÷÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××××××××× CP_NotEastAsian_CP30×××××÷÷÷×÷×÷÷÷÷÷××××××÷÷××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××××××××× OP_NotEastAsian_OP30×××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××× -CM1_NotEastAsian_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× +CM1_NotEastAsian_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× ZWJ_O_ZWJ_NotEastAsian_CM×××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××× -CM1_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× +CM1_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× AL_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× AI_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× XX_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× @@ -79,7 +79,7 @@

Table

CJ_NS÷××××÷÷÷×÷×÷÷÷÷÷××××××÷÷×××÷××××÷×÷÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××÷÷÷÷÷××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ×”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule LB21a is given the number 21.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #14, and thus reflected in a transformation of the rules usually not visible here. Where it does show up, an extra variable like CM+ may appear, and the rule may be recast. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. Where a rule has multiple parts (lines), each one is numbered using hundredths, such as 21.01) × BA, 21.02) × HY, ... In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules, see UAX #14.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ×”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule LB21a is given the number 21.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. Where a rule has multiple parts (lines), each one is numbered using hundredths, such as 21.01) × BA, 21.02) × HY, ...

For the original rules, see UAX #14.

@@ -93,13 +93,12 @@

Rules

- + + - - - + @@ -229,7 +228,7 @@

Sample Strings

diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt index 472c419c5..cc03bb2fb 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt @@ -1,5 +1,5 @@ -# LineBreakTest-16.0.0.txt -# Date: 2024-07-05, 00:45:20 GMT +# LineBreakTest-17.0.0.txt +# Date: 2024-10-11, 18:57:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -40,7 +40,7 @@ × 23E9 × 0308 × 0020 × FE15 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 23E9 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 × 0020 ÷ 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 23E9 × 0308 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 23E9 × 0308 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 ÷ AC00 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 23E9 × 0020 ÷ AC00 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -144,11 +144,11 @@ × 23E9 × 0308 × 0020 × 0085 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 23E9 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 23E9 × 0308 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 23E9 × 0308 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 23E9 × 0308 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 23E9 × 0308 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 ÷ 00B4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00B4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -404,7 +404,7 @@ × 3000 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3000 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3000 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3000 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3000 × 0020 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -552,7 +552,7 @@ × 232A × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 232A × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 232A × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 232A × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A ÷ AC00 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 232A × 0020 ÷ AC00 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -656,11 +656,11 @@ × 232A × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 232A × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 232A × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 232A × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 232A × 0308 × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 232A × 0308 × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A ÷ 00B4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00B4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -808,7 +808,7 @@ × FE15 × 0308 × 0020 × FE15 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE15 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE15 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE15 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE15 × 0020 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -912,11 +912,11 @@ × FE15 × 0308 × 0020 × 0085 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE15 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE15 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE15 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE15 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE15 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1320,7 +1320,7 @@ × AC00 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × AC00 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× AC00 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× AC00 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × AC00 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1424,11 +1424,11 @@ × AC00 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × AC00 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× AC00 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× AC00 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× AC00 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× AC00 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1576,7 +1576,7 @@ × AC01 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × AC01 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× AC01 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× AC01 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × AC01 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1680,11 +1680,11 @@ × AC01 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × AC01 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× AC01 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× AC01 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× AC01 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× AC01 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1832,7 +1832,7 @@ × 231A × 0308 × 0020 × FE15 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 231A × 16FE4 ÷ # × [0.3] WATCH (ID) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A × 0020 ÷ 16FE4 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 231A × 0308 × 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 231A × 0308 × 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A ÷ AC00 ÷ # × [0.3] WATCH (ID) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 231A × 0020 ÷ AC00 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1936,11 +1936,11 @@ × 231A × 0308 × 0020 × 0085 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 231A × 00A0 ÷ # × [0.3] WATCH (ID) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00A0 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 231A × 0308 × 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 231A × 0308 × 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 00AB ÷ # × [0.3] WATCH (ID) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00AB ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 231A × 0308 × 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 231A × 0308 × 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A ÷ 00B4 ÷ # × [0.3] WATCH (ID) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00B4 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2088,7 +2088,7 @@ × FE19 × 0308 × 0020 × FE15 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE19 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE19 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE19 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE19 × 0020 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2192,11 +2192,11 @@ × FE19 × 0308 × 0020 × 0085 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE19 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE19 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE19 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE19 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE19 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2344,7 +2344,7 @@ × 1100 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1100 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1100 × 0308 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1100 × 0308 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × AC00 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [26.01] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1100 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2448,11 +2448,11 @@ × 1100 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1100 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1100 × 0308 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1100 × 0308 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 1100 × 0308 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 1100 × 0308 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 ÷ 00B4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2600,7 +2600,7 @@ × 3005 × 0308 × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 3005 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 3005 × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 3005 × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 3005 × 0020 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2704,11 +2704,11 @@ × 3005 × 0308 × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 3005 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 3005 × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3005 × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3005 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3005 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2856,7 +2856,7 @@ × 2329 × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2329 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2329 × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2329 × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × AC00 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2329 × 0020 × AC00 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2960,7 +2960,7 @@ × 2329 × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2329 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2329 × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2329 × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 00AB ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2329 × 0020 × 00AB ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -3112,7 +3112,7 @@ × FE6A × 0308 × 0020 × FE15 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE6A × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A × 0020 ÷ 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE6A × 0308 × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE6A × 0308 × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A ÷ AC00 ÷ # × [0.3] SMALL PERCENT SIGN (PO) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE6A × 0020 ÷ AC00 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3216,11 +3216,11 @@ × FE6A × 0308 × 0020 × 0085 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE6A × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE6A × 0308 × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE6A × 0308 × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE6A × 0308 × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE6A × 0308 × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A ÷ 00B4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00B4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3368,7 +3368,7 @@ × 20A9 × 0308 × 0020 × FE15 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 20A9 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × 0020 ÷ 16FE4 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 20A9 × 0308 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 20A9 × 0308 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × AC00 ÷ # × [0.3] WON SIGN (PR) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 20A9 × 0020 ÷ AC00 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3472,11 +3472,11 @@ × 20A9 × 0308 × 0020 × 0085 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 20A9 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00A0 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 20A9 × 0308 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 20A9 × 0308 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 00AB ÷ # × [0.3] WON SIGN (PR) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00AB ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 20A9 × 0308 × 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 20A9 × 0308 × 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 ÷ 00B4 ÷ # × [0.3] WON SIGN (PR) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00B4 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3624,7 +3624,7 @@ × 270A × 0308 × 0020 × FE15 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 270A × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A × 0020 ÷ 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 270A × 0308 × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 270A × 0308 × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A ÷ AC00 ÷ # × [0.3] RAISED FIST (EB) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 270A × 0020 ÷ AC00 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3728,11 +3728,11 @@ × 270A × 0308 × 0020 × 0085 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 270A × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00A0 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 270A × 0308 × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 270A × 0308 × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 00AB ÷ # × [0.3] RAISED FIST (EB) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00AB ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 270A × 0308 × 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 270A × 0308 × 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A ÷ 00B4 ÷ # × [0.3] RAISED FIST (EB) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00B4 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3880,7 +3880,7 @@ × 1F3FB × 0308 × 0020 × FE15 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1F3FB × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB × 0020 ÷ 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1F3FB × 0308 × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1F3FB × 0308 × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB ÷ AC00 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1F3FB × 0020 ÷ AC00 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3984,11 +3984,11 @@ × 1F3FB × 0308 × 0020 × 0085 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1F3FB × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1F3FB × 0308 × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1F3FB × 0308 × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 1F3FB × 0308 × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 1F3FB × 0308 × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB ÷ 00B4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00B4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -4136,7 +4136,7 @@ × 000A ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000A ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000A ÷ 0308 × 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000A ÷ 0308 × 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ AC00 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000A ÷ 0020 ÷ AC00 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4240,7 +4240,7 @@ × 000A ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000A ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 0020 ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000A ÷ 0308 × 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000A ÷ 0308 × 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 00AB ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000A ÷ 0020 ÷ 00AB ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4392,7 +4392,7 @@ × 000B ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000B ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000B ÷ 0308 × 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000B ÷ 0308 × 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ AC00 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000B ÷ 0020 ÷ AC00 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4496,7 +4496,7 @@ × 000B ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000B ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 0020 ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000B ÷ 0308 × 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000B ÷ 0308 × 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 00AB ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000B ÷ 0020 ÷ 00AB ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4648,7 +4648,7 @@ × 000D ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000D ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000D ÷ 0308 × 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000D ÷ 0308 × 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ AC00 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000D ÷ 0020 ÷ AC00 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4752,7 +4752,7 @@ × 000D ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000D ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 0020 ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000D ÷ 0308 × 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000D ÷ 0308 × 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 00AB ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000D ÷ 0020 ÷ 00AB ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4904,7 +4904,7 @@ × 0020 ÷ 0308 × 0020 × FE15 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0020 ÷ 0308 × 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0020 ÷ 0308 × 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 ÷ AC00 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0020 × 0020 ÷ AC00 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5008,7 +5008,7 @@ × 0020 ÷ 0308 × 0020 × 0085 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0020 ÷ 0308 × 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0020 ÷ 0308 × 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 ÷ 00AB ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0020 × 0020 ÷ 00AB ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5160,7 +5160,7 @@ × 0021 × 0308 × 0020 × FE15 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0021 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 × 0020 ÷ 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0021 × 0308 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0021 × 0308 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 ÷ AC00 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0021 × 0020 ÷ AC00 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5264,7 +5264,7 @@ × 0021 × 0308 × 0020 × 0085 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0021 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 0020 ÷ 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0021 × 0308 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0021 × 0308 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 00AB ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0021 × 0020 ÷ 00AB ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5416,7 +5416,7 @@ × 0022 × 0308 × 0020 × FE15 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0022 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × 0020 ÷ 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0022 × 0308 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0022 × 0308 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × AC00 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [19.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0022 × 0020 ÷ AC00 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5520,7 +5520,7 @@ × 0022 × 0308 × 0020 × 0085 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0022 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 0020 ÷ 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0022 × 0308 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0022 × 0308 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 00AB ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [19.02] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0022 × 0020 ÷ 00AB ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5672,7 +5672,7 @@ × 0024 × 0308 × 0020 × FE15 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0024 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × 0020 ÷ 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0024 × 0308 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0024 × 0308 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0024 × 0020 ÷ AC00 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5776,7 +5776,7 @@ × 0024 × 0308 × 0020 × 0085 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0024 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0024 × 0308 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0024 × 0308 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 00AB ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0024 × 0020 ÷ 00AB ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5928,7 +5928,7 @@ × 0025 × 0308 × 0020 × FE15 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0025 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 × 0020 ÷ 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0025 × 0308 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0025 × 0308 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 ÷ AC00 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0025 × 0020 ÷ AC00 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6032,7 +6032,7 @@ × 0025 × 0308 × 0020 × 0085 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0025 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 0020 ÷ 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0025 × 0308 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0025 × 0308 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 00AB ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0025 × 0020 ÷ 00AB ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6184,7 +6184,7 @@ × 002C × 0308 × 0020 × FE15 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 002C × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C × 0020 ÷ 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 002C × 0308 × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 002C × 0308 × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C ÷ AC00 ÷ # × [0.3] COMMA (IS_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 002C × 0020 ÷ AC00 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6288,7 +6288,7 @@ × 002C × 0308 × 0020 × 0085 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 002C × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 0020 ÷ 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 002C × 0308 × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 002C × 0308 × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 00AB ÷ # × [0.3] COMMA (IS_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 002C × 0020 ÷ 00AB ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6696,7 +6696,7 @@ × 002F × 0308 × 0020 × FE15 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 002F × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F × 0020 ÷ 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 002F × 0308 × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 002F × 0308 × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F ÷ AC00 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 002F × 0020 ÷ AC00 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6800,7 +6800,7 @@ × 002F × 0308 × 0020 × 0085 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 002F × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 0020 ÷ 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 002F × 0308 × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 002F × 0308 × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 00AB ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 002F × 0020 ÷ 00AB ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6952,7 +6952,7 @@ × 0030 × 0308 × 0020 × FE15 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0030 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 × 0020 ÷ 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0030 × 0308 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0030 × 0308 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 ÷ AC00 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0030 × 0020 ÷ AC00 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7056,7 +7056,7 @@ × 0030 × 0308 × 0020 × 0085 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0030 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 0020 ÷ 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0030 × 0308 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0030 × 0308 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 00AB ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0030 × 0020 ÷ 00AB ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7208,7 +7208,7 @@ × 007D × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 007D × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 007D × 0308 × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 007D × 0308 × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D ÷ AC00 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 007D × 0020 ÷ AC00 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7312,7 +7312,7 @@ × 007D × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 007D × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 007D × 0308 × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 007D × 0308 × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 00AB ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 007D × 0020 ÷ 00AB ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7464,7 +7464,7 @@ × 0085 ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0085 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0085 ÷ 0308 × 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0085 ÷ 0308 × 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ AC00 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0085 ÷ 0020 ÷ AC00 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7568,7 +7568,7 @@ × 0085 ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0085 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 0020 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0085 ÷ 0308 × 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0085 ÷ 0308 × 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 00AB ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0085 ÷ 0020 ÷ 00AB ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7976,7 +7976,7 @@ × 00AB × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00AB × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00AB × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00AB × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [15.11] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × AC00 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [15.11] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00AB × 0020 × AC00 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8080,7 +8080,7 @@ × 00AB × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00AB × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00AB × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00AB × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [15.11] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 00AB ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [15.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00AB × 0020 × 00AB ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8232,7 +8232,7 @@ × 00B4 × 0308 × 0020 × FE15 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00B4 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × 0020 ÷ 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00B4 × 0308 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00B4 × 0308 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × AC00 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [21.04] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00B4 × 0020 ÷ AC00 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8336,7 +8336,7 @@ × 00B4 × 0308 × 0020 × 0085 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00B4 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 0020 ÷ 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00B4 × 0308 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00B4 × 0308 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 00AB ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00B4 × 0020 ÷ 00AB ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8488,7 +8488,7 @@ × 00BB × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00BB × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00BB × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00BB × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × AC00 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [19.13] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00BB × 0020 ÷ AC00 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8592,7 +8592,7 @@ × 00BB × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00BB × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00BB × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00BB × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 00AB ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00BB × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8744,7 +8744,7 @@ × 05D0 × 0308 × 0020 × FE15 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 05D0 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 × 0020 ÷ 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 05D0 × 0308 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 05D0 × 0308 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 ÷ AC00 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 05D0 × 0020 ÷ AC00 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8848,7 +8848,7 @@ × 05D0 × 0308 × 0020 × 0085 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 05D0 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 0020 ÷ 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 05D0 × 0308 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 05D0 × 0308 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 00AB ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 05D0 × 0020 ÷ 00AB ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9000,7 +9000,7 @@ × 1160 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1160 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1160 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1160 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 ÷ AC00 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1160 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9104,7 +9104,7 @@ × 1160 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1160 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1160 × 0308 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1160 × 0308 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 00AB ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1160 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9256,7 +9256,7 @@ × 11A8 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 11A8 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 11A8 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 11A8 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 ÷ AC00 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 11A8 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9360,7 +9360,7 @@ × 11A8 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 11A8 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 11A8 × 0308 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 11A8 × 0308 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 00AB ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 11A8 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9512,7 +9512,7 @@ × 1B05 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B05 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B05 × 0308 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B05 × 0308 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 ÷ AC00 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B05 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9616,7 +9616,7 @@ × 1B05 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B05 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B05 × 0308 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B05 × 0308 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 00AB ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B05 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9768,7 +9768,7 @@ × 1B44 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B44 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B44 × 0308 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B44 × 0308 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 ÷ AC00 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B44 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9872,7 +9872,7 @@ × 1B44 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B44 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B44 × 0308 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B44 × 0308 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 00AB ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B44 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10024,7 +10024,7 @@ × 1B50 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B50 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B50 × 0308 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B50 × 0308 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 ÷ AC00 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B50 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10128,7 +10128,7 @@ × 1B50 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B50 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B50 × 0308 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B50 × 0308 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 00AB ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B50 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10280,7 +10280,7 @@ × 1B5C × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B5C × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B5C × 0308 × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B5C × 0308 × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C ÷ AC00 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B5C × 0020 ÷ AC00 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10384,7 +10384,7 @@ × 1B5C × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B5C × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B5C × 0308 × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B5C × 0308 × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 00AB ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B5C × 0020 ÷ 00AB ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10536,7 +10536,7 @@ × 1BF2 × 0308 × 0020 × FE15 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1BF2 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 × 0020 ÷ 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1BF2 × 0308 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1BF2 × 0308 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 ÷ AC00 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1BF2 × 0020 ÷ AC00 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10640,7 +10640,7 @@ × 1BF2 × 0308 × 0020 × 0085 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1BF2 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 0020 ÷ 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1BF2 × 0308 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1BF2 × 0308 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 00AB ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1BF2 × 0020 ÷ 00AB ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10792,7 +10792,7 @@ × 200B ÷ 0308 × 0020 × FE15 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 200B ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 200B ÷ 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 200B ÷ 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B ÷ AC00 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 200B × 0020 ÷ AC00 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10896,7 +10896,7 @@ × 200B ÷ 0308 × 0020 × 0085 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 200B ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 200B ÷ 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 200B ÷ 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B ÷ 00AB ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 200B × 0020 ÷ 00AB ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11048,7 +11048,7 @@ × 2014 × 0308 × 0020 × FE15 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2014 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 × 0020 ÷ 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2014 × 0308 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2014 × 0308 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 ÷ AC00 ÷ # × [0.3] EM DASH (B2_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2014 × 0020 ÷ AC00 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11152,7 +11152,7 @@ × 2014 × 0308 × 0020 × 0085 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2014 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 0020 ÷ 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2014 × 0308 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2014 × 0308 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 00AB ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2014 × 0020 ÷ 00AB ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11304,7 +11304,7 @@ × 2024 × 0308 × 0020 × FE15 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2024 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 × 0020 ÷ 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2024 × 0308 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2024 × 0308 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 ÷ AC00 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2024 × 0020 ÷ AC00 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11408,7 +11408,7 @@ × 2024 × 0308 × 0020 × 0085 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2024 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 0020 ÷ 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2024 × 0308 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2024 × 0308 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 00AB ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2024 × 0020 ÷ 00AB ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11816,7 +11816,7 @@ × 261D × 0308 × 0020 × FE15 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 261D × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D × 0020 ÷ 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 261D × 0308 × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 261D × 0308 × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D ÷ AC00 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 261D × 0020 ÷ AC00 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11920,7 +11920,7 @@ × 261D × 0308 × 0020 × 0085 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 261D × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 0020 ÷ 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 261D × 0308 × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 261D × 0308 × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 00AB ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 261D × 0020 ÷ 00AB ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12072,7 +12072,7 @@ × FFFC × 0308 × 0020 × FE15 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FFFC × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC × 0020 ÷ 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FFFC × 0308 × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FFFC × 0308 × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC ÷ AC00 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) ÷ [20.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FFFC × 0020 ÷ AC00 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12176,7 +12176,7 @@ × FFFC × 0308 × 0020 × 0085 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FFFC × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 0020 ÷ 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FFFC × 0308 × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FFFC × 0308 × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 00AB ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FFFC × 0020 ÷ 00AB ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12328,7 +12328,7 @@ × 11003 × 0308 × 0020 × FE15 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 11003 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 × 0020 ÷ 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 11003 × 0308 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 11003 × 0308 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 ÷ AC00 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 11003 × 0020 ÷ AC00 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12432,7 +12432,7 @@ × 11003 × 0308 × 0020 × 0085 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 11003 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 0020 ÷ 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 11003 × 0308 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 11003 × 0308 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 00AB ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 11003 × 0020 ÷ 00AB ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12584,7 +12584,7 @@ × 1F1E6 × 0308 × 0020 × FE15 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1F1E6 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 × 0020 ÷ 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1F1E6 × 0308 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1F1E6 × 0308 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 ÷ AC00 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1F1E6 × 0020 ÷ AC00 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12688,7 +12688,7 @@ × 1F1E6 × 0308 × 0020 × 0085 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1F1E6 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 0020 ÷ 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1F1E6 × 0308 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1F1E6 × 0308 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 00AB ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F1E6 × 0020 ÷ 00AB ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13352,7 +13352,7 @@ × 0029 × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0029 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0029 × 0308 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0029 × 0308 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 ÷ AC00 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0029 × 0020 ÷ AC00 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13456,7 +13456,7 @@ × 0029 × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0029 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0029 × 0308 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0029 × 0308 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 00AB ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0029 × 0020 ÷ 00AB ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13608,7 +13608,7 @@ × 0028 × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0028 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × 0020 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0028 × 0308 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0028 × 0308 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × AC00 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0028 × 0020 × AC00 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13712,7 +13712,7 @@ × 0028 × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0028 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 0020 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0028 × 0308 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0028 × 0308 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 00AB ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0028 × 0020 × 00AB ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13862,9 +13862,9 @@ × 0001 × 0020 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0001 × 0308 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0001 × 0308 × 0020 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] -× 0001 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0001 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 × 0020 ÷ 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0001 × 0308 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0001 × 0308 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 ÷ AC00 ÷ # × [0.3] (CM1_NotEastAsian_CM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0001 × 0020 ÷ AC00 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13966,9 +13966,9 @@ × 0001 × 0020 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0020 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] -× 0001 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0001 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 0020 ÷ 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0001 × 0308 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0001 × 0308 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 00AB ÷ # × [0.3] (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0001 × 0020 ÷ 00AB ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14120,7 +14120,7 @@ × 200D × 0308 × 0020 × FE15 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 200D × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 200D × 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 200D × 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × AC00 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 200D × 0020 ÷ AC00 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14224,7 +14224,7 @@ × 200D × 0308 × 0020 × 0085 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 200D × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 200D × 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 200D × 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 00AB ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 200D × 0020 ÷ 00AB ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14374,9 +14374,9 @@ × 302A × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 302A × 0308 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 302A × 0308 × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] -× 302A × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 302A × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 302A × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 302A × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 302A × 0020 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14478,9 +14478,9 @@ × 302A × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] -× 302A × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 302A × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 302A × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 302A × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 00AB ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 302A × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14632,7 +14632,7 @@ × 0023 × 0308 × 0020 × FE15 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0023 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 × 0020 ÷ 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0023 × 0308 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0023 × 0308 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 ÷ AC00 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0023 × 0020 ÷ AC00 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14736,7 +14736,7 @@ × 0023 × 0308 × 0020 × 0085 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0023 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 0020 ÷ 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0023 × 0308 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0023 × 0308 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 00AB ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0023 × 0020 ÷ 00AB ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14888,7 +14888,7 @@ × 00A7 × 0308 × 0020 × FE15 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00A7 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 × 0020 ÷ 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00A7 × 0308 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00A7 × 0308 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 ÷ AC00 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00A7 × 0020 ÷ AC00 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14992,7 +14992,7 @@ × 00A7 × 0308 × 0020 × 0085 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00A7 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 0020 ÷ 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00A7 × 0308 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00A7 × 0308 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 00AB ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00A7 × 0020 ÷ 00AB ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15144,7 +15144,7 @@ × 50005 × 0308 × 0020 × FE15 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 50005 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 × 0020 ÷ 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 50005 × 0308 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 50005 × 0308 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 ÷ AC00 ÷ # × [0.3] (XX_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 50005 × 0020 ÷ AC00 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15248,7 +15248,7 @@ × 50005 × 0308 × 0020 × 0085 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 50005 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 0020 ÷ 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 50005 × 0308 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 50005 × 0308 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 00AB ÷ # × [0.3] (XX_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 50005 × 0020 ÷ 00AB ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15400,7 +15400,7 @@ × 0E01 × 0308 × 0020 × FE15 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0E01 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 × 0020 ÷ 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0E01 × 0308 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0E01 × 0308 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 ÷ AC00 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0E01 × 0020 ÷ AC00 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15504,7 +15504,7 @@ × 0E01 × 0308 × 0020 × 0085 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0E01 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 0020 ÷ 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0E01 × 0308 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0E01 × 0308 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 00AB ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0E01 × 0020 ÷ 00AB ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15656,7 +15656,7 @@ × 2757 × 0308 × 0020 × FE15 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2757 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 × 0020 ÷ 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2757 × 0308 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2757 × 0308 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 ÷ AC00 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2757 × 0020 ÷ AC00 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15760,11 +15760,11 @@ × 2757 × 0308 × 0020 × 0085 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2757 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2757 × 0308 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2757 × 0308 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 2757 × 0308 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 2757 × 0308 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 ÷ 00B4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00B4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -15912,7 +15912,7 @@ × 17D6 × 0308 × 0020 × FE15 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 17D6 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 × 0020 ÷ 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 17D6 × 0308 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 17D6 × 0308 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 ÷ AC00 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 17D6 × 0020 ÷ AC00 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -16016,7 +16016,7 @@ × 17D6 × 0308 × 0020 × 0085 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 17D6 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 0020 ÷ 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 17D6 × 0308 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 17D6 × 0308 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 00AB ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 17D6 × 0020 ÷ 00AB ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -16168,7 +16168,7 @@ × 3041 × 0308 × 0020 × FE15 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 3041 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 × 0020 ÷ 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 3041 × 0308 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 3041 × 0308 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 ÷ AC00 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 3041 × 0020 ÷ AC00 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -16272,11 +16272,11 @@ × 3041 × 0308 × 0020 × 0085 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 3041 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3041 × 0308 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 ÷ 00B4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00B4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -16416,7 +16416,7 @@ × 200D × 261D ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] WHITE UP POINTING INDEX (EB_NotEastAsian) ÷ [0.3] × 3041 × 2060 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [11.01] WORD JOINER (WJ_NotEastAsian) ÷ [0.3] × 2060 × 3041 ÷ # × [0.3] WORD JOINER (WJ_NotEastAsian) × [11.02] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [0.3] -× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 002F ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] SOLIDUS (SY_NotEastAsian) ÷ [0.3] × 2014 × 2014 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [17.0] EM DASH (B2_NotEastAsian) ÷ [0.3] diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 37e34f786..ea5954271 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-16, 14:22:44 GMT +# Date: 2024-11-14, 18:38:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -111,7 +111,7 @@ 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -247,7 +247,8 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -373,7 +374,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -592,7 +593,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2609 +# Total code points: 2640 # ================================================ @@ -785,7 +786,7 @@ E0001 ; Format # Cf LANGUAGE TAG 024B ; Lower # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lower # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lower # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lower # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lower # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lower # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lower # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lower # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -1260,13 +1261,14 @@ A7C3 ; Lower # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lower # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lower # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lower # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lower # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lower # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lower # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lower # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lower # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lower # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lower # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lower # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -1326,7 +1328,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2523 # ================================================ @@ -1935,7 +1937,10 @@ A7C2 ; Upper # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Upper # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Upper # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Upper # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Upper # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Upper # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Upper # L& LATIN CAPITAL LETTER LAMBDA @@ -1988,13 +1993,13 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1963 +# Total code points: 1966 # ================================================ 01BB ; OLetter # Lo LATIN LETTER TWO WITH STROKE 01C0..01C3 ; OLetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; OLetter # Lo LATIN LETTER GLOTTAL STOP +0294..0295 ; OLetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 02B9..02BF ; OLetter # Lm [7] MODIFIER LETTER PRIME..MODIFIER LETTER LEFT HALF RING 02C6..02D1 ; OLetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02EC ; OLetter # Lm MODIFIER LETTER VOICING @@ -2028,7 +2033,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; OLetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; OLetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; OLetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; OLetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; OLetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -2049,6 +2054,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; OLetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2096,7 +2102,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; OLetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2105,7 +2111,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; OLetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2372,6 +2378,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; OLetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; OLetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; OLetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; OLetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; OLetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; OLetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; OLetter # Lo KHAROSHTHI LETTER A @@ -2395,6 +2402,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; OLetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; OLetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2591,7 +2600,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136945 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html index a698e956c..a851a3035 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html @@ -7,7 +7,7 @@

Sentence_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:12:16 GMT

+

Date: 2024-10-14, 12:07:04 GMT

This page illustrates the application of the Sentence_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of ATerm and Close shows ×, with the rule 9.0. Checking below the table, rule 9.0 is “SATerm Close* × ( Close | Sp | ParaSep )”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -30,7 +30,7 @@

Table

0.2sot ×
0.3÷ eot
7.02× ZW
8.0ZW SP* ÷
8.1ZWJ_O ×
9.0[^ SP BK CR LF NL ZW] × CM
9.0(?<X>[^BK CR LF NL SP ZW]) ( CM | ZWJ )* {X}
10.0( CM | ZWJ ) A
11.01× WJ
11.02WJ ×
12.0GL ×
12.1[^ SP BA HY CM] × GL
12.2[^ BA HY CM] CM+ × GL
12.3^ CM+ × GL
12.1[^ SP BA HY] × GL
13.01× EX
13.02× CL
13.03× CP
11      -◌̈   +◌̈      
Extend_FE×××××××××××××××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule SB8a is given the number 8.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules and the macro values they use, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule SB8a is given the number 8.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules and the macro values they use, see UAX #29.

diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e9cf6f8ba..df639a405 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-10-16, 14:22:47 GMT +# Date: 2024-11-14, 18:38:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -147,7 +147,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -283,7 +283,8 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -409,7 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -629,7 +630,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2613 +# Total code points: 2644 # ================================================ @@ -693,8 +694,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ALetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ALetter # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -751,7 +752,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ALetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ALetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ALetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ALetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ALetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -772,6 +773,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ALetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -819,7 +821,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ALetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -828,7 +830,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ALetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1008,11 +1010,8 @@ A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ALetter # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ALetter # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ALetter # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ALetter # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ALetter # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1121,6 +1120,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ALetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ALetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ALetter # Lo KHAROSHTHI LETTER A @@ -1148,6 +1148,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ALetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ALetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1361,7 +1363,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33832 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html index 52a647c4a..6c25af5cf 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html @@ -7,7 +7,7 @@

Word_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:12:18 GMT

+

Date: 2024-10-14, 12:07:11 GMT

This page illustrates the application of the Word_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

After the heavy blue line in the table are additional rows, either with different sample characters or for sequences, such as “ALetter MidLetter”. Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of ExtendNumLet and ALetter shows ×, with the rule 13.2. Checking below the table, rule 13.2 is “ExtendNumLet × (AHLetter | Numeric | Katakana)”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -45,7 +45,7 @@

Table

0.2sot ÷
0.3÷ eot
Numeric MidNumLet Format_FE÷÷÷÷÷÷÷÷÷×÷÷÷÷÷÷÷×××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule WB13a is given the number 13.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules and the macro values they use, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule WB13a is given the number 13.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules and the macro values they use, see UAX #29.

diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 22ad39d5c..e4ed9cadf 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-16, 14:21:58 GMT +# Date: 2024-11-14, 18:37:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -67,6 +67,7 @@ # 108E0..108FF Hatran # 10900..1091F Phoenician # 10920..1093F Lydian +# 10940..1095C Sidetic # 10980..1099F Meroitic_Hieroglyphs # 109A0..109FF Meroitic_Cursive # 10A00..10A5F Kharoshthi @@ -138,8 +139,8 @@ 01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; L # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; L # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; L # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP 02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON @@ -198,6 +199,7 @@ 09FA ; L # So BENGALI ISSHAR 09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; L # Po BENGALI ABBREVIATION SIGN +09FF ; L # Lo BENGALI LETTER SANSKRIT BA 0A03 ; L # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI @@ -273,7 +275,7 @@ 0C3D ; L # Lo TELUGU SIGN AVAGRAHA 0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C77 ; L # Po TELUGU SIGN SIDDHAM @@ -294,7 +296,7 @@ 0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -662,11 +664,8 @@ A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; L # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; L # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1217,8 +1216,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815343 code points not listed here. -# Total code points: 1095508 +# The above property value applies to 815305 code points not listed here. +# Total code points: 1095478 # ================================================ @@ -1272,6 +1271,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1330,7 +1330,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2087 code points not listed here. +# The above property value applies to 2058 code points not listed here. # Total code points: 3631 # ================================================ @@ -1734,8 +1734,7 @@ FF1A ; CS # Po FULLWIDTH COLON 2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; ON # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; ON # No COPTIC FRACTION ONE HALF @@ -1849,10 +1848,12 @@ A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FBC3..FBD2 ; ON # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD3E ; ON # Pe ORNATE LEFT PARENTHESIS FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; ON # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; ON # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET @@ -1939,6 +1940,8 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; ON # Pd GARAY HYPHEN +10ED0 ; ON # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; ON # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2000,7 +2003,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6751 +# Total code points: 6786 # ================================================ @@ -2110,7 +2113,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0B3F ; NSM # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; NSM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; NSM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; NSM # Mn TAMIL SIGN ANUSVARA 0BC0 ; NSM # Mn TAMIL VOWEL SIGN II @@ -2192,7 +2195,8 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; NSM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; NSM # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2277,7 +2281,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; NSM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2414,7 +2418,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2033 +# Total code points: 2064 # ================================================ @@ -2448,7 +2452,7 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM @@ -2462,6 +2466,8 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2507,8 +2513,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 293 code points not listed here. -# Total code points: 1767 +# The above property value applies to 253 code points not listed here. +# Total code points: 1731 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 2efae939a..d9a17a78c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-16, 14:22:00 GMT +# Date: 2024-11-14, 18:37:59 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -83,8 +83,8 @@ 01BC..01BF ; 0 # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; 0 # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; 0 # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; 0 # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; 0 # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; 0 # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; 0 # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; 0 # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; 0 # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; 0 # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -182,7 +182,7 @@ 0860..086A ; 0 # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; 0 # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; 0 # Sk ARABIC RAISED ROUND DOT -0889..088E ; 0 # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; 0 # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; 0 # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08A0..08C8 ; 0 # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; 0 # Lm ARABIC SMALL FARSI YEH @@ -232,6 +232,7 @@ 09FB ; 0 # Sc BENGALI GANDA MARK 09FC ; 0 # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; 0 # Po BENGALI ABBREVIATION SIGN +09FF ; 0 # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; 0 # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; 0 # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; 0 # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -290,7 +291,7 @@ 0B41..0B44 ; 0 # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; 0 # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; 0 # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B55..0B56 ; 0 # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; 0 # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; 0 # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; 0 # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; 0 # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -335,7 +336,7 @@ 0C46..0C48 ; 0 # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4C ; 0 # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C58..0C5A ; 0 # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; 0 # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; 0 # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; 0 # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; 0 # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; 0 # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -360,7 +361,7 @@ 0CCA..0CCB ; 0 # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; 0 # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; 0 # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; 0 # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; 0 # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; 0 # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; 0 # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; 0 # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -911,8 +912,7 @@ 2B45..2B46 ; 0 # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; 0 # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; 0 # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; 0 # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; 0 # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; 0 # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; 0 # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; 0 # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; 0 # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1093,11 +1093,8 @@ A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; 0 # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; 0 # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; 0 # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; 0 # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; 0 # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; 0 # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; 0 # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; 0 # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; 0 # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1234,13 +1231,15 @@ FB40..FB41 ; 0 # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; 0 # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; 0 # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; 0 # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; 0 # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; 0 # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; 0 # Pe ORNATE LEFT PARENTHESIS FD3F ; 0 # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; 0 # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; 0 # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; 0 # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; 0 # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; 0 # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; 0 # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; 0 # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; 0 # Sc RIAL SIGN FDFD..FDFF ; 0 # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1421,6 +1420,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1091F ; 0 # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; 0 # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; 0 # Po LYDIAN TRIANGULAR MARK +10940..1095C ; 0 # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; 0 # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; 0 # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; 0 # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1475,6 +1475,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; 0 # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; 0 # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2066,8 +2070,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821573 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821466 code points not listed here. +# Total code points: 1113149 # ================================================ @@ -2601,6 +2605,8 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1ABF..1AC0 ; 220 # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW +1ADD ; 220 # Mn COMBINING DOT-AND-RING BELOW +1AE6 ; 220 # Mn COMBINING DOUBLE ARCH BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2621,6 +2627,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 10A0D ; 220 # Mn KHAROSHTHI SIGN DOUBLE RING BELOW 10A3A ; 220 # Mn KHAROSHTHI SIGN DOT BELOW 10AE6 ; 220 # Mn MANICHAEAN ABBREVIATION MARK BELOW +10EFA..10EFB ; 220 # Mn [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 10EFD..10EFF ; 220 # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F47 ; 220 # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW 10F4B ; 220 # Mn SOGDIAN COMBINING CURVE BELOW @@ -2633,7 +2640,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 186 # ================================================ @@ -2742,7 +2749,9 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1ACB..1ADC ; 230 # Mn [18] COMBINING TRIPLE ACUTE ACCENT..COMBINING DIAERESIS WITH RAISED LEFT DOT +1AE0..1AE5 ; 230 # Mn [6] COMBINING LEFT TACK ABOVE..COMBINING SEAGULL ABOVE +1AE7..1AEA ; 230 # Mn [4] COMBINING DOUBLE ARCH ABOVE..COMBINING UPWARDS ARROW ABOVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2809,7 +2818,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 541 # ================================================ @@ -2841,9 +2850,10 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 035D..035E ; 234 # Mn [2] COMBINING DOUBLE BREVE..COMBINING DOUBLE MACRON 0360..0361 ; 234 # Mn [2] COMBINING DOUBLE TILDE..COMBINING DOUBLE INVERTED BREVE +1AEB ; 234 # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; 234 # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac..1b8d1ae99 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedDecompositionType-17.0.0.txt +# Date: 2024-11-13, 22:18:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -873,7 +873,7 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 3196..319F ; Super # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK A69C..A69D ; Super # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Super # Lm MODIFIER LETTER US -A7F2..A7F4 ; Super # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Super # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Super # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Super # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W @@ -884,7 +884,7 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 250 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 2b51b2ce2..3c0cb2d75 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-16, 14:22:03 GMT +# Date: 2024-11-14, 18:38:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,8 +91,8 @@ 01DD..0250 ; N # L& [116] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER TURNED A 0252..0260 ; N # L& [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0262..0293 ; N # L& [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C5 ; N # Sk MODIFIER LETTER DOWN ARROWHEAD @@ -223,7 +223,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -280,6 +280,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -343,7 +344,7 @@ 0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; N # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; N # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; N # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -391,7 +392,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -417,7 +418,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -691,7 +692,8 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1069,8 +1071,7 @@ 2B4D..2B4F ; N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B51..2B54 ; N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; N # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; N # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1182,11 +1183,8 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1330,13 +1328,15 @@ FB40..FB41 ; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; N # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1427,6 +1427,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; N # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1486,7 +1487,11 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2109,7 +2114,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761091 code points not listed here. +# The above property value applies to 760984 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index a65d31c6e..145d3bc46 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-16, 14:22:03 GMT +# Date: 2024-11-14, 18:38:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36,7 +36,6 @@ 085C..085D ; Cn # [2] .. 085F ; Cn # 086B..086F ; Cn # [5] .. -088F ; Cn # 0892..0896 ; Cn # [5] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. @@ -51,7 +50,7 @@ 09D8..09DB ; Cn # [4] .. 09DE ; Cn # 09E4..09E5 ; Cn # [2] .. -09FF..0A00 ; Cn # [2] .. +0A00 ; Cn # 0A04 ; Cn # 0A0B..0A0E ; Cn # [4] .. 0A11..0A12 ; Cn # [2] .. @@ -91,7 +90,7 @@ 0B3A..0B3B ; Cn # [2] .. 0B45..0B46 ; Cn # [2] .. 0B49..0B4A ; Cn # [2] .. -0B4E..0B54 ; Cn # [7] .. +0B4E..0B52 ; Cn # [5] .. 0B58..0B5B ; Cn # [4] .. 0B5E ; Cn # 0B64..0B65 ; Cn # [2] .. @@ -120,7 +119,7 @@ 0C49 ; Cn # 0C4E..0C54 ; Cn # [7] .. 0C57 ; Cn # -0C5B..0C5C ; Cn # [2] .. +0C5B ; Cn # 0C5E..0C5F ; Cn # [2] .. 0C64..0C65 ; Cn # [2] .. 0C70..0C76 ; Cn # [7] .. @@ -132,7 +131,7 @@ 0CC5 ; Cn # 0CC9 ; Cn # 0CCE..0CD4 ; Cn # [7] .. -0CD7..0CDC ; Cn # [6] .. +0CD7..0CDB ; Cn # [5] .. 0CDF ; Cn # 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # @@ -228,7 +227,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1ADE..1ADF ; Cn # [2] .. +1AEC..1AFF ; Cn # [20] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -263,7 +263,6 @@ 242A..243F ; Cn # [22] .. 244B..245F ; Cn # [21] .. 2B74..2B75 ; Cn # [2] .. -2B96 ; Cn # 2CF4..2CF8 ; Cn # [5] .. 2D26 ; Cn # 2D28..2D2C ; Cn # [5] .. @@ -294,10 +293,7 @@ A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7CE..A7CF ; Cn # [2] .. -A7D2 ; Cn # -A7D4 ; Cn # -A7DD..A7F1 ; Cn # [21] .. +A7DD..A7F0 ; Cn # [20] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. @@ -333,9 +329,6 @@ FB3D ; Cn # FB3F ; Cn # FB42 ; Cn # FB45 ; Cn # -FBC3..FBD2 ; Cn # [16] .. -FD90..FD91 ; Cn # [2] .. -FDC8..FDCE ; Cn # [7] .. FDD0..FDEF ; Cn # [32] .. FE1A..FE1F ; Cn # [6] .. FE53 ; Cn # @@ -407,7 +400,7 @@ FFFE..FFFF ; Cn # [2] .. 108F6..108FA ; Cn # [5] .. 1091C..1091E ; Cn # [3] .. 1093A..1093E ; Cn # [5] .. -10940..1097F ; Cn # [64] .. +1095D..1097F ; Cn # [35] .. 109B8..109BB ; Cn # [4] .. 109D0..109D1 ; Cn # [2] .. 10A04 ; Cn # @@ -439,7 +432,8 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10EFB ; Cn # [55] .. +10EC8..10ECF ; Cn # [8] .. +10ED9..10EF9 ; Cn # [33] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -748,7 +742,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819525 +# Total code points: 819418 # ================================================ @@ -1356,7 +1350,10 @@ A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Lu # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Lu # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Lu # LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA @@ -1406,7 +1403,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1858 +# Total code points: 1861 # ================================================ @@ -1557,7 +1554,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 024B ; Ll # LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Ll # LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Ll # [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Ll # [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Ll # [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 0371 ; Ll # GREEK SMALL LETTER HETA 0373 ; Ll # GREEK SMALL LETTER ARCHAIC SAMPI 0377 ; Ll # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA @@ -2018,6 +2015,7 @@ A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Ll # LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Ll # LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Ll # LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN @@ -2144,7 +2142,7 @@ A69C..A69D ; Lm # [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER C A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A770 ; Lm # MODIFIER LETTER US A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A7F2..A7F4 ; Lm # [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lm # [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A9CF ; Lm # JAVANESE PANGRANGKEP A9E6 ; Lm # MYANMAR MODIFIER LETTER SHAN REDUPLICATION @@ -2160,6 +2158,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10D4E ; Lm # GARAY VOWEL LENGTH MARK 10D6F ; Lm # GARAY REDUPLICATION MARK +10EC5 ; Lm # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT @@ -2174,7 +2173,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 406 # ================================================ @@ -2184,7 +2183,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 00BA ; Lo # MASCULINE ORDINAL INDICATOR 01BB ; Lo # LATIN LETTER TWO WITH STROKE 01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; Lo # LATIN LETTER GLOTTAL STOP +0294..0295 ; Lo # [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 05D0..05EA ; Lo # [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; Lo # [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0620..063F ; Lo # [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE @@ -2204,7 +2203,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Lo # [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Lo # [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Lo # [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; Lo # [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; Lo # DEVANAGARI SIGN AVAGRAHA @@ -2223,6 +2222,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 09DF..09E1 ; Lo # [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; Lo # [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Lo # BENGALI LETTER VEDIC ANUSVARA +09FF ; Lo # BENGALI LETTER SANSKRIT BA 0A05..0A0A ; Lo # [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; Lo # [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; Lo # [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2270,7 +2270,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; Lo # TELUGU SIGN AVAGRAHA 0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Lo # TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Lo # [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2279,7 +2279,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; Lo # KANNADA SIGN AVAGRAHA -0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Lo # [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2505,6 +2505,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 108F4..108F5 ; Lo # [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Lo # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Lo # KHAROSHTHI LETTER A @@ -2526,6 +2527,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6..10EC7 ; Lo # [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2709,7 +2711,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136513 # ================================================ @@ -2774,7 +2776,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0B3F ; Mn # ORIYA VOWEL SIGN I 0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Mn # ORIYA SIGN VIRAMA -0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Mn # [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Mn # TAMIL SIGN ANUSVARA 0BC0 ; Mn # TAMIL VOWEL SIGN II @@ -2857,7 +2859,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Mn # [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Mn # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2939,7 +2942,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Mn # [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -3076,7 +3079,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2025 +# Total code points: 2056 # ================================================ @@ -3907,6 +3910,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 10AF0..10AF6 ; Po # [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER 10B39..10B3F ; Po # [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10B99..10B9C ; Po # [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10ED0 ; Po # ARABIC BIBLICAL END OF VERSE 10F55..10F59 ; Po # [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 10F86..10F89 ; Po # [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS @@ -3958,7 +3962,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1E5FF ; Po # OL ONAL ABBREVIATION SIGN 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 640 +# Total code points: 641 # ================================================ @@ -4181,8 +4185,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 2B00..2B2F ; So # [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE 2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; So # [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR 2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP @@ -4210,8 +4213,10 @@ A828..A82B ; So # [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK- A836..A837 ; So # [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK A839 ; So # NORTH INDIC QUANTITY MARK AA77..AA79 ; So # [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +FBC3..FBD2 ; So # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD40..FD4F ; So # [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FDCF ; So # ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; So # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; So # [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; So # [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FFE4 ; So # FULLWIDTH BROKEN BAR FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL @@ -4225,6 +4230,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND 10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON 10AC8 ; So # MANICHAEAN SIGN UD +10ED1..10ED8 ; So # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 1173F ; So # AHOM SYMBOL VI 11FD5..11FDC ; So # [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI 11FE1..11FF1 ; So # [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA @@ -4290,7 +4296,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7376 +# Total code points: 7410 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index 17778a8a0..f24475268 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ -# DerivedJoiningGroup-16.0.0.txt -# Date: 2024-07-30, 21:15:55 GMT +# DerivedJoiningGroup-17.0.0.txt +# Date: 2024-11-14, 15:27:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -231,8 +231,9 @@ 06B9..06BC ; Noon # Lo [4] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH RING 0767..0769 ; Noon # Lo [3] ARABIC LETTER NOON WITH TWO DOTS BELOW..ARABIC LETTER NOON WITH SMALL V 0889 ; Noon # Lo ARABIC LETTER NOON WITH INVERTED SMALL V +088F ; Noon # Lo ARABIC LETTER NOON WITH RING ABOVE -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -384,8 +385,9 @@ 0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW 08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08BA ; Yeh # Lo ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE +10EC7 ; Yeh # Lo ARABIC LETTER YEH WITH FOUR DOTS BELOW -# Total code points: 10 +# Total code points: 11 # ================================================ @@ -750,4 +752,10 @@ # Total code points: 1 +# ================================================ + +10EC6 ; Thin_Noon # Lo ARABIC LETTER THIN NOON + +# Total code points: 1 + # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 413179f15..9560720aa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-17.0.0.txt -# Date: 2024-10-16, 14:22:05 GMT +# Date: 2024-11-14, 18:38:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,6 +70,7 @@ 0868 ; D # Lo SYRIAC LETTER MALAYALAM LLA 0886 ; D # Lo ARABIC LETTER THIN YEH 0889..088D ; D # Lo [5] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW +088F ; D # Lo ARABIC LETTER NOON WITH RING ABOVE 08A0..08A9 ; D # Lo [10] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08AF..08B0 ; D # Lo [2] ARABIC LETTER SAD WITH THREE DOTS BELOW..ARABIC LETTER GAF WITH INVERTED STROKE 08B3..08B8 ; D # Lo [6] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER TEH WITH SMALL TEH ABOVE @@ -96,6 +97,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O 10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA 10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6..10EC7 ; D # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL 10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH 10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY @@ -111,7 +113,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 612 +# Total code points: 615 # ================================================ @@ -265,7 +267,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 0B3F ; T # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; T # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; T # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; T # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; T # Mn TAMIL SIGN ANUSVARA 0BC0 ; T # Mn TAMIL VOWEL SIGN II @@ -349,7 +351,8 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; T # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; T # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -441,7 +444,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; T # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; T # Mn BRAHMI SIGN ANUSVARA @@ -584,6 +587,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2190 +# Total code points: 2221 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index e21b381c0..45434e051 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-16, 14:22:05 GMT +# Date: 2024-11-14, 18:38:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757645 code points not listed here. -# Total code points: 895113 +# The above property value applies to 757538 code points not listed here. +# Total code points: 895006 # ================================================ @@ -312,6 +312,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 0F12 ; GL # Po TIBETAN MARK RGYA GRAM SHAD 0FD9..0FDA ; GL # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 180E ; GL # Cf MONGOLIAN VOWEL SEPARATOR +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; GL # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE 1DFC ; GL # Mn COMBINING DOUBLE INVERTED BREVE BELOW 2007 ; GL # Zs FIGURE SPACE @@ -329,7 +330,7 @@ FE2D..FE2E ; GL # Mn [2] COMBINING CONJOINING MACRON BELOW..COMBINING CYRIL 13439..1343B ; GL # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER -# Total code points: 41 +# Total code points: 42 # ================================================ @@ -586,8 +587,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 01BC..01BF ; AL # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; AL # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -665,7 +666,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH 0904..0939 ; AL # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -690,6 +691,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 09FA ; AL # So BENGALI ISSHAR 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; AL # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; AL # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -744,7 +746,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0C2A..0C39 ; AL # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; AL # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C78..0C7E ; AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; AL # So TELUGU SIGN TUUMU @@ -755,7 +757,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0CAA..0CB3 ; AL # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; AL # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; AL # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1078,8 +1080,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2B47..2B4C ; AL # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; AL # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; AL # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1140,11 +1141,8 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1187,11 +1185,13 @@ FB13..FB17 ; AL # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LI FB29 ; AL # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM @@ -1271,6 +1271,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10916..1091B ; AL # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; AL # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; AL # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1315,6 +1316,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1613,7 +1617,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26754 # ================================================ @@ -1966,7 +1970,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0B47..0B48 ; CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; CM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; CM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; CM # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; CM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; CM # Mn TAMIL SIGN ANUSVARA @@ -2054,7 +2058,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2182,7 +2187,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2393,7 +2398,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2478 +# Total code points: 2508 # ================================================ @@ -2510,6 +2515,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; BA # Pd GARAY HYPHEN 10EAD ; BA # Pd YEZIDI HYPHENATION MARK +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11140..11143 ; BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK @@ -2546,7 +2552,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; BA # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 272 +# Total code points: 273 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index f99a9031c..006451cad 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-10-16, 14:22:05 GMT +# Date: 2024-11-14, 18:38:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2096,6 +2096,7 @@ 088C ; ARABIC LETTER TAH WITH THREE DOTS BELOW 088D ; ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW 088E ; ARABIC VERTICAL TAIL +088F ; ARABIC LETTER NOON WITH RING ABOVE 0890 ; ARABIC POUND MARK ABOVE 0891 ; ARABIC PIASTRE MARK ABOVE 0897 ; ARABIC PEPET @@ -2427,6 +2428,7 @@ 09FC ; BENGALI LETTER VEDIC ANUSVARA 09FD ; BENGALI ABBREVIATION SIGN 09FE ; BENGALI SANDHI MARK +09FF ; BENGALI LETTER SANSKRIT BA 0A01 ; GURMUKHI SIGN ADAK BINDI 0A02 ; GURMUKHI SIGN BINDI 0A03 ; GURMUKHI SIGN VISARGA @@ -2661,6 +2663,8 @@ 0B4B ; ORIYA VOWEL SIGN O 0B4C ; ORIYA VOWEL SIGN AU 0B4D ; ORIYA SIGN VIRAMA +0B53 ; ORIYA SIGN DOT ABOVE +0B54 ; ORIYA SIGN DOUBLE DOT ABOVE 0B55 ; ORIYA SIGN OVERLINE 0B56 ; ORIYA AI LENGTH MARK 0B57 ; ORIYA AU LENGTH MARK @@ -2837,6 +2841,7 @@ 0C58 ; TELUGU LETTER TSA 0C59 ; TELUGU LETTER DZA 0C5A ; TELUGU LETTER RRRA +0C5C ; TELUGU ARCHAIC SHRII 0C5D ; TELUGU LETTER NAKAARA POLLU 0C60 ; TELUGU LETTER VOCALIC RR 0C61 ; TELUGU LETTER VOCALIC LL @@ -2933,6 +2938,7 @@ 0CCD ; KANNADA SIGN VIRAMA 0CD5 ; KANNADA LENGTH MARK 0CD6 ; KANNADA AI LENGTH MARK +0CDC ; KANNADA ARCHAIC SHRII 0CDD ; KANNADA LETTER NAKAARA POLLU 0CDE ; KANNADA LETTER FA 0CE0 ; KANNADA LETTER VOCALIC RR @@ -6112,6 +6118,33 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1ACF ; COMBINING DOUBLE CARON +1AD0 ; COMBINING VERTICAL-LINE-ACUTE +1AD1 ; COMBINING GRAVE-VERTICAL-LINE +1AD2 ; COMBINING VERTICAL-LINE-GRAVE +1AD3 ; COMBINING ACUTE-VERTICAL-LINE +1AD4 ; COMBINING VERTICAL-LINE-MACRON +1AD5 ; COMBINING MACRON-VERTICAL-LINE +1AD6 ; COMBINING VERTICAL-LINE-ACUTE-GRAVE +1AD7 ; COMBINING VERTICAL-LINE-GRAVE-ACUTE +1AD8 ; COMBINING MACRON-ACUTE-GRAVE +1AD9 ; COMBINING SHARP SIGN +1ADA ; COMBINING FLAT SIGN +1ADB ; COMBINING DOWN TACK ABOVE +1ADC ; COMBINING DIAERESIS WITH RAISED LEFT DOT +1ADD ; COMBINING DOT-AND-RING BELOW +1AE0 ; COMBINING LEFT TACK ABOVE +1AE1 ; COMBINING RIGHT TACK ABOVE +1AE2 ; COMBINING MINUS SIGN ABOVE +1AE3 ; COMBINING INVERTED BRIDGE ABOVE +1AE4 ; COMBINING SQUARE ABOVE +1AE5 ; COMBINING SEAGULL ABOVE +1AE6 ; COMBINING DOUBLE ARCH BELOW +1AE7 ; COMBINING DOUBLE ARCH ABOVE +1AE8 ; COMBINING EQUALS SIGN ABOVE +1AE9 ; COMBINING LEFT ANGLE CENTRED ABOVE +1AEA ; COMBINING UPWARDS ARROW ABOVE +1AEB ; COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -10214,6 +10247,7 @@ 2B93 ; NEWLINE RIGHT 2B94 ; FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE 2B95 ; RIGHTWARDS BLACK ARROW +2B96 ; EQUALS SIGN WITH INFINITY ABOVE 2B97 ; SYMBOL FOR TYPE A ELECTRONICS 2B98 ; THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD 2B99 ; THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD @@ -14247,9 +14281,13 @@ A7CA ; LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CB ; LATIN CAPITAL LETTER RAMS HORN A7CC ; LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CD ; LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CE ; LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE +A7CF ; LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; LATIN CAPITAL LETTER CLOSED INSULAR G A7D1 ; LATIN SMALL LETTER CLOSED INSULAR G +A7D2 ; LATIN CAPITAL LETTER DOUBLE THORN A7D3 ; LATIN SMALL LETTER DOUBLE THORN +A7D4 ; LATIN CAPITAL LETTER DOUBLE WYNN A7D5 ; LATIN SMALL LETTER DOUBLE WYNN A7D6 ; LATIN CAPITAL LETTER MIDDLE SCOTS S A7D7 ; LATIN SMALL LETTER MIDDLE SCOTS S @@ -14258,6 +14296,7 @@ A7D9 ; LATIN SMALL LETTER SIGMOID S A7DA ; LATIN CAPITAL LETTER LAMBDA A7DB ; LATIN SMALL LETTER LAMBDA A7DC ; LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; MODIFIER LETTER CAPITAL S A7F2 ; MODIFIER LETTER CAPITAL C A7F3 ; MODIFIER LETTER CAPITAL F A7F4 ; MODIFIER LETTER CAPITAL Q @@ -26590,6 +26629,22 @@ FBBF ; ARABIC SYMBOL RING FBC0 ; ARABIC SYMBOL SMALL TAH ABOVE FBC1 ; ARABIC SYMBOL SMALL TAH BELOW FBC2 ; ARABIC SYMBOL WASLA ABOVE +FBC3 ; ARABIC LIGATURE JALLA WA-ALAA +FBC4 ; ARABIC LIGATURE DAAMAT BARAKAATUHUM +FBC5 ; ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH +FBC6 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM +FBC7 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA +FBC8 ; ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA +FBC9 ; ARABIC LIGATURE RAHIMAHUMAA ALLAAH +FBCA ; ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA +FBCB ; ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM +FBCC ; ARABIC LIGATURE HAFIZAHU ALLAAH +FBCD ; ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA +FBCE ; ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA +FBCF ; ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA +FBD0 ; ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM +FBD1 ; ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF +FBD2 ; ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3 ; ARABIC LETTER NG ISOLATED FORM FBD4 ; ARABIC LETTER NG FINAL FORM FBD5 ; ARABIC LETTER NG INITIAL FORM @@ -27035,6 +27090,8 @@ FD8C ; ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM FD8D ; ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM FD8E ; ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM FD8F ; ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYH +FD91 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92 ; ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM FD93 ; ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM FD94 ; ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM @@ -27089,6 +27146,13 @@ FDC4 ; ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM FDC5 ; ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM FDC6 ; ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM FDC7 ; ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDC8 ; ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA +FDC9 ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH +FDCA ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA +FDCB ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA +FDCC ; ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM +FDCD ; ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF +FDCE ; ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH FDCF ; ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0 ; ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM FDF1 ; ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM @@ -29373,6 +29437,35 @@ FFFD ; REPLACEMENT CHARACTER 10938 ; LYDIAN LETTER NN 10939 ; LYDIAN LETTER C 1093F ; LYDIAN TRIANGULAR MARK +10940 ; SIDETIC LETTER N01 +10941 ; SIDETIC LETTER N02 +10942 ; SIDETIC LETTER N03 +10943 ; SIDETIC LETTER N04 +10944 ; SIDETIC LETTER N05 +10945 ; SIDETIC LETTER N06 +10946 ; SIDETIC LETTER N07 +10947 ; SIDETIC LETTER N08 +10948 ; SIDETIC LETTER N09 +10949 ; SIDETIC LETTER N10 +1094A ; SIDETIC LETTER N11 +1094B ; SIDETIC LETTER N12 +1094C ; SIDETIC LETTER N13 +1094D ; SIDETIC LETTER N14 +1094E ; SIDETIC LETTER N15 +1094F ; SIDETIC LETTER N16 +10950 ; SIDETIC LETTER N17 +10951 ; SIDETIC LETTER N18 +10952 ; SIDETIC LETTER N19 +10953 ; SIDETIC LETTER N20 +10954 ; SIDETIC LETTER N21 +10955 ; SIDETIC LETTER N22 +10956 ; SIDETIC LETTER N23 +10957 ; SIDETIC LETTER N24 +10958 ; SIDETIC LETTER N25 +10959 ; SIDETIC LETTER N26 +1095A ; SIDETIC LETTER N27 +1095B ; SIDETIC LETTER N28 +1095C ; SIDETIC LETTER N29 10980 ; MEROITIC HIEROGLYPHIC LETTER A 10981 ; MEROITIC HIEROGLYPHIC LETTER E 10982 ; MEROITIC HIEROGLYPHIC LETTER I @@ -30206,6 +30299,20 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ARABIC LETTER THIN NOON +10EC7 ; ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; ARABIC BIBLICAL END OF VERSE +10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM +10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM +10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM +10ED4 ; ARABIC LIGATURE QADDASA ALLAAHU SIRRAH +10ED5 ; ARABIC LIGATURE QUDDISA SIRRAHUM +10ED6 ; ARABIC LIGATURE QUDDISA SIRRAHUMAA +10ED7 ; ARABIC LIGATURE QUDDISAT ASRAARUHUM +10ED8 ; ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA ; ARABIC DOUBLE VERTICAL BAR BELOW +10EFB ; ARABIC SMALL LOW NOON 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -45375,6 +45482,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155006 +# Total code points: 155113 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java b/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java index e2e9c1708..9b8019062 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java @@ -31,6 +31,9 @@ public String merge(String first, String second) { static final > M putNew(M map, K key, V value) { final V oldValue = map.get(key); if (oldValue != null) { + if (oldValue.equals(value)) { + return map; + } throw new UnicodePropertyException( "Key already present in Map: " + key @@ -48,6 +51,9 @@ static final UnicodeMap putNew( final V oldValue = map.get(key); if (oldValue != null && (missingSet == null || !missingSet.contains(key))) { if (merger == null) { + if (oldValue.equals(value)) { + return map; + } throw new UnicodePropertyException( "Key already present in UnicodeMap: " + Utility.hex(key) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 1ca62d9e5..8e497e96f 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -449,6 +449,7 @@ public enum Block_Values implements Named { Shavian("Shavian"), Shorthand_Format_Controls("Shorthand_Format_Controls"), Siddham("Siddham"), + Sidetic("Sidetic"), Sinhala("Sinhala"), Sinhala_Archaic_Numbers("Sinhala_Archaic_Numbers"), Small_Form_Variants("Small_Forms"), @@ -1351,6 +1352,7 @@ public enum Joining_Group_Values implements Named { Teh_Marbuta("Teh_Marbuta"), Teh_Marbuta_Goal("Teh_Marbuta_Goal", "Hamza_On_Heh_Goal"), Teth("Teth"), + Thin_Noon("Thin_Noon"), Thin_Yeh("Thin_Yeh"), Vertical_Tail("Vertical_Tail"), Waw("Waw"), @@ -1970,6 +1972,7 @@ public enum Script_Values implements Named { Shavian("Shaw"), Sharada("Shrd"), Siddham("Sidd"), + Sidetic("Sidt"), Khudawadi("Sind"), Sinhala("Sinh"), Sogdian("Sogd"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java index ff506b4ee..597862b41 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java @@ -855,23 +855,14 @@ public void generateTable(PrintWriter out) { } out.print( "." - + "
  • Any “treat as” or “ignore” rules are handled as discussed in UAX #" - + (fileName.equals("Line") ? "14" : "29") - + ", and thus reflected in a transformation of the rules usually not visible here. "); - if (fileName.equals("Line")) { - out.print( - "Where it does show up, an extra variable like CM+ may appear, and the rule may be recast. "); - } - out.print( - "In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  • "); + + "
  • Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  • "); if (fileName.equals("Line")) { out.print( "Where a rule has multiple parts (lines), each one is numbered using hundredths, " + "such as 21.01) × BA, 21.02) × HY, ... "); } out.println( - "In some cases, the numbering and form of a rule is changed due to “treat as” rules.
  • " - + "" + "" + "

    For the original rules" + (fileName.equals("Word") || fileName.equals("Sentence") ? " and the macro values they use" @@ -926,6 +917,9 @@ public void generateTable(PrintWriter out) { if (breakPoint < 0) { breakPoint = ruleBody.indexOf('÷'); } + if (breakPoint < 0) { + breakPoint = ruleBody.indexOf('→'); + } out.println( "

    0.2sot ÷
    0.3÷ eot
    " + linkAndAnchor("r" + ruleNumber, ruleNumber) @@ -1399,7 +1393,10 @@ public GenerateGraphemeBreakTest(UCD ucd, Segmenter.Target target) { "क" + "\u094D" + "a", "a" + "\u094D" + "त", "?" + "\u094D" + "त", - "क" + "\u094D\u094D" + "त")); + "क" + "\u094D\u094D" + "त", + // From L2/14-131, §3.2; made into a single EGC by 179-C31. + // This test would have caught ICU-22956. + "સૻ્સૻ")); } } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index 9075daea6..100273ad9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -1227,7 +1227,9 @@ public final class UCD_Names implements UCD_Types { "THIN_YEH", "VERTICAL_TAIL", // Unicode 16 - "KASHMIRI_YEH" + "KASHMIRI_YEH", + // Unicode n > 16 + "THIN_NOON", }; static { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index b1ffb8261..d00021b07 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -781,8 +781,10 @@ public interface UCD_Types { VERTICAL_TAIL = 104, // Unicode 16 KASHMIRI_YEH = 105, + // Unicode n > 16 + THIN_NOON = 106, // limit - LIMIT_JOINING_GROUP = KASHMIRI_YEH + 1; + LIMIT_JOINING_GROUP = THIN_NOON + 1; static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3; public static final int NF_COMPATIBILITY_MASK = 2, NF_COMPOSITION_MASK = 1; diff --git a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java index f630b199e..2b3644b85 100644 --- a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java +++ b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java @@ -20,8 +20,6 @@ import com.ibm.icu.text.UnicodeSet.XSymbolTable; import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.ULocale; -import java.io.IOException; -import java.io.PrintWriter; import java.text.ParsePosition; import java.util.ArrayList; import java.util.Collection; @@ -32,6 +30,7 @@ import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -39,10 +38,7 @@ import org.unicode.cldr.util.RegexUtilities; import org.unicode.cldr.util.TransliteratorUtilities; import org.unicode.props.UnicodeProperty; -import org.unicode.text.UCD.Default; -import org.unicode.text.UCD.ToolUnicodePropertySource; -import org.unicode.text.utility.Settings; -import org.unicode.tools.Segmenter.Rule.Breaks; +import org.unicode.tools.Segmenter.SegmentationRule.Breaks; /** Ordered list of rules, with variables resolved before building. Use Builder to make. */ public class Segmenter { @@ -170,16 +166,26 @@ public boolean breaksAt(CharSequence text, int position) { breakRule = NOBREAK_SUPPLEMENTARY; return false; } + StringBuilder remapped = new StringBuilder(text.toString()); + Consumer remap = + (s) -> { + remapped.setLength(0); + remapped.append(s); + }; + Integer[] indexInRemapped = new Integer[text.length() + 1]; + for (int i = 0; i < indexInRemapped.length; ++i) { + indexInRemapped[i] = i; + } for (int i = 0; i < rules.size(); ++i) { - Rule rule = rules.get(i); + SegmentationRule rule = rules.get(i); if (DEBUG_AT_RULE_CONTAINING != null && rule.toString().contains(DEBUG_AT_RULE_CONTAINING)) { System.out.println(" !#$@543 Debug"); } - Breaks result = rule.matches(text, position); - if (result != Rule.Breaks.UNKNOWN_BREAK) { + Breaks result = rule.applyAt(position, remapped, indexInRemapped, remap); + if (result != SegmentationRule.Breaks.UNKNOWN_BREAK) { breakRule = orders.get(i).doubleValue(); - return result == Rule.Breaks.BREAK; + return result == SegmentationRule.Breaks.BREAK; } } breakRule = BREAK_ANY; @@ -197,12 +203,12 @@ public int getRuleStatusVec(int[] ruleStatus) { * @param order * @param rule */ - public void add(double order, Rule rule) { + public void add(double order, SegmentationRule rule) { orders.add(new Double(order)); rules.add(rule); } - public Rule get(double order) { + public SegmentationRule get(double order) { int loc = orders.indexOf(new Double(order)); if (loc < 0) return null; return rules.get(loc); @@ -231,8 +237,7 @@ public String toString(boolean showResolved) { return result; } - /** A rule that determines the status of an offset. */ - public static class Rule { + public abstract static class SegmentationRule { /** Status of a breaking rule */ public enum Breaks { UNKNOWN_BREAK, @@ -240,19 +245,151 @@ public enum Breaks { NO_BREAK }; + /** + * Applies this rule throughout the text. + * + * @param remappedString The text, with any preceding remappings applied. + * @param indexInRemapped An array whose size is one greater than the original string. + * Associates indices in the original string to indices in remappedString. + * indexInRemapped[0] == 0, and indexInRemapped[indexInRemapped.size() - 1] == + * remappedString.size(). Whenever indexInRemapped[i] == null, resolvedBreaks[i] == + * NO_BREAK: this corresponds to positions inside a string which has been replaced by a + * remap rule. Remap rules may update this mapping. + * @param resolvedBreaks An array whose size is one greater than the original string, + * indicating resolved breaks in the string. Values that are UNKNOWN_BREAK are updated + * if the rule applies to their position. + * @param remap Called by remap rules with the value of remappedString to be passed to + * subsequent rules. The indices in indexInRemapped are updated consistently. + */ + public abstract void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap); + + protected abstract String toString(boolean showResolved); + + /** Same as above, but only returns the resolution at the current position. */ + public abstract Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap); + + public String toString() { + return toString(false); + } + } + + /** A « treat as » rule. */ + public static class RemapRule extends SegmentationRule { + + public RemapRule(String leftHandSide, String replacement, String line) { + pattern = Pattern.compile(leftHandSide, REGEX_FLAGS); + this.replacement = replacement; + name = line; + } + + @Override + public void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap) { + final var result = new StringBuilder(); + int i = 0; + int offset = 0; + final var matcher = pattern.matcher(remappedString); + while (matcher.find()) { + for (; ; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + if (indexInRemapped[i] > matcher.start()) { + break; + } + indexInRemapped[i] += offset; + } + for (; ; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + if (indexInRemapped[i] == matcher.end()) { + break; + } + if (resolvedBreaks[i] == Breaks.BREAK) { + throw new IllegalArgumentException( + "Replacement rule at remapped indices " + + matcher.start() + + " sqq. spans a break: " + + remappedString); + } + resolvedBreaks[i] = Breaks.NO_BREAK; + indexInRemapped[i] = null; + } + matcher.appendReplacement(result, replacement); + offset = result.length() - indexInRemapped[i]; + } + for (; i < indexInRemapped.length; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + indexInRemapped[i] += offset; + } + matcher.appendTail(result); + if (indexInRemapped[indexInRemapped.length - 1] != result.length()) { + StringBuilder indices = new StringBuilder(); + for (var j : indexInRemapped) { + indices.append(j == null ? "null" : j.toString()); + indices.append(","); + } + throw new IllegalArgumentException( + "Inconsistent indexInRemapped " + + indices + + " for new remapped string " + + result); + } + remap.accept(result); + } + + private Pattern pattern; + private String replacement; + private String name; + + @Override + public Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap) { + var resolvedBreaks = new Breaks[indexInRemapped.length]; + apply(remappedString, indexInRemapped, resolvedBreaks, remap); + return resolvedBreaks[position] == null + ? Breaks.UNKNOWN_BREAK + : resolvedBreaks[position]; + } + + @Override + protected String toString(boolean showResolved) { + return name; + } + } + + /** A rule that determines the status of an offset. */ + public static class RegexRule extends SegmentationRule { /** * @param before pattern for the text after the offset. All variables must be resolved. * @param result the break status to return when the rule is invoked * @param after pattern for the text before the offset. All variables must be resolved. * @param line */ - public Rule(String before, Breaks result, String after, String line) { + public RegexRule(String before, Breaks result, String after, String line) { breaks = result; before = ".*(" + before + ")"; String parsing = null; try { - matchPrevious = Pattern.compile(parsing = before, REGEX_FLAGS).matcher(""); - matchSucceeding = Pattern.compile(parsing = after, REGEX_FLAGS).matcher(""); + this.before = Pattern.compile(parsing = before, REGEX_FLAGS); + this.after = Pattern.compile(parsing = after, REGEX_FLAGS); } catch (PatternSyntaxException e) { // Format: Unclosed character class near index 927 int index = e.getIndex(); @@ -279,26 +416,37 @@ public Rule(String before, Breaks result, String after, String line) { // COMMENTS allows whitespace } - // Matcher numberMatcher = PatternCache.get("[0-9]+").matcher(""); - - /** - * Match the rule against text, at a position - * - * @param text - * @param position - * @return break status - */ - public Breaks matches(CharSequence text, int position) { - if (!matchAfter(matchSucceeding, text, position)) return Breaks.UNKNOWN_BREAK; - if (!matchBefore(matchPrevious, text, position)) return Breaks.UNKNOWN_BREAK; - return breaks; + @Override + public void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap) { + for (int i = 0; i < indexInRemapped.length; ++i) { + if (resolvedBreaks[i] == Breaks.UNKNOWN_BREAK) { + resolvedBreaks[i] = applyAt(i, remappedString, indexInRemapped, remap); + } + } } - /** Debugging aid */ - public String toString() { - return toString(false); + @Override + public Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap) { + if (after.matcher(remappedString) + .region(indexInRemapped[position], remappedString.length()) + .lookingAt() + && before.matcher(remappedString) + .region(0, indexInRemapped[position]) + .matches()) { + return breaks; + } + return Breaks.UNKNOWN_BREAK; } + @Override public String toString(boolean showResolved) { String result = name; if (showResolved) result += ": " + resolved; @@ -306,29 +454,16 @@ public String toString(boolean showResolved) { } // ============== Internals ================ - // in Java 5, this can be more efficient, and use a single regex - // of the form "(?<= before) after". MUST then have transparent bounds - private Matcher matchPrevious; - private Matcher matchSucceeding; + // We cannot use a single regex of the form "(?<= before) after" because + // (RI RI)* RI × RI would require unbounded lookbehind. + private Pattern before; + private Pattern after; private String name; private String resolved; private Breaks breaks; } - /** utility, since we are using Java 1.4 */ - static boolean matchAfter(Matcher matcher, CharSequence text, int position) { - return matcher.reset(text.subSequence(position, text.length())).lookingAt(); - } - - /** - * utility, since we are using Java 1.4 depends on the pattern having been built with .* not - * very efficient, works for testing and the best we can do. - */ - static boolean matchBefore(Matcher matcher, CharSequence text, int position) { - return matcher.reset(text.subSequence(0, position)).matches(); - } - /** Separate the builder for clarity */ /** Sort the longest strings first. Used for variable lists. */ @@ -474,17 +609,26 @@ public boolean addLine(String line) { throw new IllegalArgumentException("Rule must be of form '1)...': <" + line + ">"); } line = line.substring(relationPosition + 1).trim(); + relationPosition = line.indexOf('→'); + if (relationPosition >= 0) { + addRemapRule( + order, + line.substring(0, relationPosition).trim(), + line.substring(relationPosition + 1).trim(), + line); + return true; + } relationPosition = line.indexOf('\u00F7'); - Breaks breaks = Segmenter.Rule.Breaks.BREAK; + Breaks breaks = Segmenter.RegexRule.Breaks.BREAK; if (relationPosition < 0) { relationPosition = line.indexOf('\u00D7'); if (relationPosition < 0) { throw new IllegalArgumentException( "Couldn't find =, \u00F7, or \u00D7 on line: " + line); } - breaks = Segmenter.Rule.Breaks.NO_BREAK; + breaks = Segmenter.RegexRule.Breaks.NO_BREAK; } - addRule( + addRegexRule( order, line.substring(0, relationPosition).trim(), breaks, @@ -591,6 +735,40 @@ public static UnicodeMap composeWith( return target; } + Builder addRemapRule(Double order, String before, String after, String line) { + line = whiteSpace.reset(line).replaceAll(" "); + if (lastComments.size() != 0) { + double increment = 0.0001; + double temp = order.doubleValue() - increment * lastComments.size(); + for (int i = 0; i < lastComments.size(); ++i) { + Double position = new Double(temp); + if (xmlRules.containsKey(position)) { + System.out.println("WARNING: Overriding rule " + position); + } + xmlRules.put(position, lastComments.get(i)); + temp += increment; + } + lastComments.clear(); + } + if (htmlRules.containsKey(order) + || xmlRules.containsKey(order) + || rules.containsKey(order)) { + throw new IllegalArgumentException("Duplicate numbers for rules: " + order); + } + htmlRules.put(order, TransliteratorUtilities.toHTML.transliterate(line)); + xmlRules.put( + order, + " " + + TransliteratorUtilities.toXML.transliterate(line) + + " "); + rules.put(order, new Segmenter.RemapRule(replaceVariables(before), after, line)); + return this; + } + /** * Add a numbered rule, already broken into the parts before and after. * @@ -601,7 +779,8 @@ public static UnicodeMap composeWith( * @param line * @return */ - Builder addRule(Double order, String before, Breaks breaks, String after, String line) { + Builder addRegexRule( + Double order, String before, Breaks breaks, String after, String line) { // if (brokenIdentifierMatcher.reset(line).find()) { // int start = brokenIdentifierMatcher.start(); // int end = brokenIdentifierMatcher.end(); @@ -646,7 +825,7 @@ Builder addRule(Double order, String before, Breaks breaks, String after, String } rules.put( order, - new Segmenter.Rule( + new Segmenter.RegexRule( replaceVariables(before), breaks, replaceVariables(after), line)); return this; } @@ -671,9 +850,9 @@ public Segmenter make() { // longest first, to // make substitution // easy - private Map rules = new TreeMap(); + private Map rules = new TreeMap(); - public Map getProcessedRules() { + public Map getProcessedRules() { return rules; } @@ -813,532 +992,11 @@ public Map getOriginalVariables() { // ============== Internals ================ - private List rules = new ArrayList(1); + private List rules = new ArrayList(1); private List orders = new ArrayList(1); private double breakRule; public UnicodeMap getSamples() { return samples; } - - // TODO: delete? move elsewhere? - // Only used in main() to write to some files. Out of sync with SegmenterDefault.txt. - private static final String[][] cannedRules = { - { - "GraphemeClusterBreak", - "$CR=\\p{Grapheme_Cluster_Break=CR}", - "$LF=\\p{Grapheme_Cluster_Break=LF}", - "$Control=\\p{Grapheme_Cluster_Break=Control}", - "$Extend=\\p{Grapheme_Cluster_Break=Extend}", - "$ZWJ=\\p{Grapheme_Cluster_Break=ZWJ}", - "$RI=\\p{Grapheme_Cluster_Break=Regional_Indicator}", - "$Prepend=\\p{Grapheme_Cluster_Break=Prepend}", - "$SpacingMark=\\p{Grapheme_Cluster_Break=SpacingMark}", - "$L=\\p{Grapheme_Cluster_Break=L}", - "$V=\\p{Grapheme_Cluster_Break=V}", - "$T=\\p{Grapheme_Cluster_Break=T}", - "$LV=\\p{Grapheme_Cluster_Break=LV}", - "$LVT=\\p{Grapheme_Cluster_Break=LVT}", - "$Virama=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Virama}]", - "$LinkingConsonant=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Consonant}]", - - // "$E_Base=\\p{Grapheme_Cluster_Break=E_Base}", - // "$E_Modifier=\\p{Grapheme_Cluster_Break=E_Modifier}", - - "$ExtPict=\\p{Extended_Pictographic}", - "$ExtCccZwj=[[$Extend-\\p{ccc=0}] $ZWJ]", - // "$EBG=\\p{Grapheme_Cluster_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Grapheme_Cluster_Break=Glue_After_Zwj}", - - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break between a CR and LF. Otherwise, break before and after controls.", - "3) $CR \u00D7 $LF", - "4) ( $Control | $CR | $LF ) \u00F7", - "5) \u00F7 ( $Control | $CR | $LF )", - "# Do not break Hangul syllable sequences.", - "6) $L \u00D7 ( $L | $V | $LV | $LVT )", - "7) ( $LV | $V ) \u00D7 ( $V | $T )", - "8) ( $LVT | $T) \u00D7 $T", - "# Do not break before extending characters or ZWJ.", - // "9) \u00D7 ($Extend | $ZWJ | $Virama)", - "9) \u00D7 ($Extend | $ZWJ)", - "# Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters.", - "9.1) \u00D7 $SpacingMark", - "9.2) $Prepend \u00D7", - "9.3) $LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* \u00D7 $LinkingConsonant", - "# Do not break within emoji modifier sequences or emoji zwj sequences.", - // "10) $E_Base $Extend* × $E_Modifier", - "11) $ExtPict $Extend* $ZWJ × $ExtPict", - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "12) ^ ($RI $RI)* $RI × $RI", - "13) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere.", - }, - { - "LineBreak", - "# Variables", - "$AI=\\p{Line_Break=Ambiguous}", - "$AL=\\p{Line_Break=Alphabetic}", - "$B2=\\p{Line_Break=Break_Both}", - "$BA=\\p{Line_Break=Break_After}", - "$BB=\\p{Line_Break=Break_Before}", - "$BK=\\p{Line_Break=Mandatory_Break}", - "$CB=\\p{Line_Break=Contingent_Break}", - "$CL=\\p{Line_Break=Close_Punctuation}", - "$CP=\\p{Line_Break=CP}", - "$CM1=\\p{Line_Break=Combining_Mark}", - "$CR=\\p{Line_Break=Carriage_Return}", - "$EX=\\p{Line_Break=Exclamation}", - "$GL=\\p{Line_Break=Glue}", - "$H2=\\p{Line_Break=H2}", - "$H3=\\p{Line_Break=H3}", - "$HL=\\p{Line_Break=HL}", - "$HY=\\p{Line_Break=Hyphen}", - "$ID=\\p{Line_Break=Ideographic}", - "$IN=\\p{Line_Break=Inseparable}", - "$IS=\\p{Line_Break=Infix_Numeric}", - "$JL=\\p{Line_Break=JL}", - "$JT=\\p{Line_Break=JT}", - "$JV=\\p{Line_Break=JV}", - "$LF=\\p{Line_Break=Line_Feed}", - "$NL=\\p{Line_Break=Next_Line}", - "$NS=\\p{Line_Break=Nonstarter}", - "$NU=\\p{Line_Break=Numeric}", - "$OP=\\p{Line_Break=Open_Punctuation}", - "$PO=\\p{Line_Break=Postfix_Numeric}", - "$PR=\\p{Line_Break=Prefix_Numeric}", - "$QU=\\p{Line_Break=Quotation}", - "$SA=\\p{Line_Break=Complex_Context}", - "$SG=\\p{Line_Break=Surrogate}", - "$SP=\\p{Line_Break=Space}", - "$SY=\\p{Line_Break=Break_Symbols}", - "$WJ=\\p{Line_Break=Word_Joiner}", - "$XX=\\p{Line_Break=Unknown}", - "$ZW=\\p{Line_Break=ZWSpace}", - "$CJ=\\p{Line_Break=Conditional_Japanese_Starter}", - "$RI=\\p{Line_Break=Regional_Indicator}", - "$EB=\\p{Line_Break=E_Base}", - "$EM=\\p{Line_Break=E_Modifier}", - "$ZWJ_O=\\p{Line_Break=ZWJ}", - "$ZWJ=\\p{Line_Break=ZWJ}", - "# Macros", - "$CM=[$CM1 $ZWJ]", - "# LB 1 Assign a line breaking class to each code point of the input. ", - "# Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm.", - "# NOTE: CB is ok to fall through, but must handle others here.", - // "show $AL", - "$AL=[$AI $AL $SG $XX $SA]", - "$NS=[$NS $CJ]", - // "show $AL", - // "$oldAL=$AL", // for debugging - "# WARNING: Fixes for Rule 9", - "# Treat X (CM|ZWJ* as if it were X.", - "# Where X is any line break class except SP, BK, CR, LF, NL or ZW.", - "$X=$CM*", - "# Macros", - "$Spec1_=[$SP $BK $CR $LF $NL $ZW]", - "$Spec2_=[^ $SP $BK $CR $LF $NL $ZW]", - "$Spec3a_=[^ $SP $BA $HY $CM]", - "$Spec3b_=[^ $BA $HY $CM]", - "$Spec4_=[^ $NU $CM]", - "$AI=($AI $X)", - "$AL=($AL $X)", - "$B2=($B2 $X)", - "$BA=($BA $X)", - "$BB=($BB $X)", - "$CB=($CB $X)", - "$CL=($CL $X)", - "$CP=($CP $X)", - "$CM=($CM $X)", - // "$CM=($CM $X)", - "$EX=($EX $X)", - "$GL=($GL $X)", - "$H2=($H2 $X)", - "$H3=($H3 $X)", - "$HL=($HL $X)", - "$HY=($HY $X)", - "$ID=($ID $X)", - "$IN=($IN $X)", - "$IS=($IS $X)", - "$JL=($JL $X)", - "$JT=($JT $X)", - "$JV=($JV $X)", - "$NS=($NS $X)", - "$NU=($NU $X)", - "$OP=($OP $X)", - "$PO=($PO $X)", - "$PR=($PR $X)", - "$QU=($QU $X)", - "$SA=($SA $X)", - "$SG=($SG $X)", - "$SY=($SY $X)", - "$WJ=($WJ $X)", - "$XX=($XX $X)", - "$RI=($RI $X)", - "$EB=($EB $X)", - "$EM=($EM $X)", - "$ZWJ=($ZWJ $X)", - "# OUT OF ORDER ON PURPOSE", - "# LB 10 Treat any remaining combining mark as AL.", - "$AL=($AL | ^ $CM | (?<=$Spec1_) $CM)", - "# Rules", - "# LB 4 Always break after hard line breaks (but never between CR and LF).", - "4) $BK \u00F7", - "# LB 5 Treat CR followed by LF, as well as CR, LF and NL as hard line breaks.", - "5.01) $CR \u00D7 $LF", - "5.02) $CR \u00F7", - "5.03) $LF \u00F7", - "5.04) $NL \u00F7", - "# LB 6 Do not break before hard line breaks.", - "6) \u00D7 ( $BK | $CR | $LF | $NL )", - "# LB 7 Do not break before spaces or zero-width space.", - "7.01) \u00D7 $SP", - "7.02) \u00D7 $ZW", - "# LB 8 Break before any character following a zero-width space, even if one or more spaces intervene.", - "8) $ZW $SP* \u00F7", - "# LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences)", - "8.1) $ZWJ_O \u00D7", - "# LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character", - "# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.)", - "9) $Spec2_ \u00D7 $CM", - "#WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)!", - "# LB 11 Do not break before or after WORD JOINER and related characters.", - "11.01) \u00D7 $WJ", - "11.02) $WJ \u00D7", - "# LB 12 Do not break after NBSP and related characters.", - // "12.01) [^$SP] \u00D7 $GL", - "12) $GL \u00D7", - "12.1) $Spec3a_ \u00D7 $GL", - "12.2) $Spec3b_ $CM+ \u00D7 $GL", - "12.3) ^ $CM+ \u00D7 $GL", - "# LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces.", - "# Using customization 7.", - "13.01) \u00D7 $EX", - "13.02) $Spec4_ \u00D7 ($CL | $CP | $IS | $SY)", - "13.03) $Spec4_ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - "13.04) ^ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - // "13.03) $Spec4_ \u00D7 $IS", - // "13.04) $Spec4_ \u00D7 $SY", - "#LB 14 Do not break after \u2018[\u2019, even after spaces.", - "14) $OP $SP* \u00D7", - "# LB 15 Do not break within \u2018\"[\u2019, even with intervening spaces.", - "15) $QU $SP* \u00D7 $OP", - "# LB 16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.", - "16) ($CL | $CP) $SP* \u00D7 $NS", - "# LB 17 Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces.", - "17) $B2 $SP* \u00D7 $B2", - "# LB 18 Break after spaces.", - "18) $SP \u00F7", - "# LB 19 Do not break before or after \u2018\"\u2019.", - "19.01) \u00D7 $QU", - "19.02) $QU \u00D7", - "# LB 20 Break before and after unresolved CB.", - "20.01) \u00F7 $CB", - "20.02) $CB \u00F7", - "# LB 21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents.", - "21.01) \u00D7 $BA", - "21.02) \u00D7 $HY", - "21.03) \u00D7 $NS", - "21.04) $BB \u00D7", - "# LB 21a Don't break after Hebrew + Hyphen.", - "21.1) $HL ($HY | $BA) \u00D7", - "# LB 21b Don’t break between Solidus and Hebrew letters.", - "21.2) $SY × $HL", - "# LB 22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis.", - // "show $AL", - "22.01) ($AL | $HL) \u00D7 $IN", - "22.02) $EX \u00D7 $IN", - "22.03) ($ID | $EB | $EM) \u00D7 $IN", - "22.04) $IN \u00D7 $IN", - "22.05) $NU \u00D7 $IN", - "# LB 23 Do not break between digits and letters.", - // "23.01) ($ID | $EB | $EM) \u00D7 $PO", - "23.02) ($AL | $HL) \u00D7 $NU", - "23.03) $NU \u00D7 ($AL | $HL)", - "# LB 24 Do not break between prefix and letters or ideographs.", - "23.12) $PR \u00D7 ($ID | $EB | $EM)", - "23.13) ($ID | $EB | $EM) \u00D7 $PO", - "# LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.", - "24.02) ($PR | $PO) \u00D7 ($AL | $HL)", - "24.03) ($AL | $HL) \u00D7 ($PR | $PO)", - "# Using customization 7", - "# LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ?", - "# Insert \u00D7 every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after ", - "25.01) ($PR | $PO) \u00D7 ( $OP | $HY )? $NU", - "25.02) ( $OP | $HY ) \u00D7 $NU", - "25.03) $NU \u00D7 ($NU | $SY | $IS)", - "25.04) $NU ($NU | $SY | $IS)* \u00D7 ($NU | $SY | $IS | $CL | $CP)", - "25.05) $NU ($NU | $SY | $IS)* ($CL | $CP)? \u00D7 ($PO | $PR)", - "#LB 26 Do not break a Korean syllable.", - "26.01) $JL \u00D7 $JL | $JV | $H2 | $H3", - "26.02) $JV | $H2 \u00D7 $JV | $JT", - "26.03) $JT | $H3 \u00D7 $JT", - "# LB 27 Treat a Korean Syllable Block the same as ID.", - "27.01) $JL | $JV | $JT | $H2 | $H3 \u00D7 $PO", - "27.02) $PR \u00D7 $JL | $JV | $JT | $H2 | $H3", - "# LB 28 Do not break between alphabetics (\"at\").", - "28) ($AL | $HL) \u00D7 ($AL | $HL)", - "# LB 29 Do not break between numeric punctuation and alphabetics (\"e.g.\").", - "29) $IS \u00D7 ($AL | $HL)", - "# LB 30 Do not break between letters, numbers or ordinary symbols and opening or closing punctuation.", - "30.01) ($AL | $HL | $NU) \u00D7 $OP", - "30.02) $CP \u00D7 ($AL | $HL | $NU)", - "# LB 30a Break between two Regional Indicators if and only if there is an even number of them before the point being considered.", - "30.11) ^ ($RI $RI)* $RI × $RI", - "30.12) [^$RI] ($RI $RI)* $RI × $RI", - "30.13) $RI ÷ $RI", - "30.2) $EB × $EM", - }, - { - "SentenceBreak", - "$CR=\\p{Sentence_Break=CR}", - "$LF=\\p{Sentence_Break=LF}", - "$Extend=\\p{Sentence_Break=Extend}", - "$Format=\\p{Sentence_Break=Format}", - "$Sep=\\p{Sentence_Break=Sep}", - "$Sp=\\p{Sentence_Break=Sp}", - "$Lower=\\p{Sentence_Break=Lower}", - "$Upper=\\p{Sentence_Break=Upper}", - "$OLetter=\\p{Sentence_Break=OLetter}", - "$Numeric=\\p{Sentence_Break=Numeric}", - "$ATerm=\\p{Sentence_Break=ATerm}", - "$STerm=\\p{Sentence_Break=STerm}", - "$Close=\\p{Sentence_Break=Close}", - "$SContinue=\\p{Sentence_Break=SContinue}", - "$Any=.", - // "# subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need.", - // "$NotStuff=[^$OLetter $Upper $Lower $Sep]", - // "# $ATerm and $Sterm are temporary, to match ICU until UTC decides.", - - "# WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend", - "$FE=[$Format $Extend]", - "# Special rules", - "$NotPreLower_=[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]", - // "$NotSep_=[^ $Sep $CR $LF]", - - // "$FE=$Extend* $Format*", - "$Sp=($Sp $FE*)", - "$Lower=($Lower $FE*)", - "$Upper=($Upper $FE*)", - "$OLetter=($OLetter $FE*)", - "$Numeric=($Numeric $FE*)", - "$ATerm=($ATerm $FE*)", - "$STerm=($STerm $FE*)", - "$Close=($Close $FE*)", - "$SContinue=($SContinue $FE*)", - "# Macros", - "$ParaSep = ($Sep | $CR | $LF)", - "$SATerm = ($STerm | $ATerm)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Break after paragraph separators.", - "4) $ParaSep \u00F7", - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - "# Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "5) \u00D7 [$Format $Extend]", - "# Do not break after full stop in certain contexts. [See note below.]", - "# Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter,", - "# is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase.", - "# For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.", - "6) $ATerm \u00D7 $Numeric", - "7) ($Upper | $Lower) $ATerm \u00D7 $Upper", - "8) $ATerm $Close* $Sp* \u00D7 $NotPreLower_* $Lower", - "8.1) $SATerm $Close* $Sp* \u00D7 ($SContinue | $SATerm)", - "# Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator.", - "9) $SATerm $Close* \u00D7 ( $Close | $Sp | $ParaSep )", - "# Note the fix to $Sp*, $Sep?", - "10) $SATerm $Close* $Sp* \u00D7 ( $Sp | $ParaSep )", - "11) $SATerm $Close* $Sp* $ParaSep? \u00F7", - "#Otherwise, do not break", - "998) \u00D7 $Any", - }, - { - "WordBreak", - "$CR=\\p{Word_Break=CR}", - "$LF=\\p{Word_Break=LF}", - "$Newline=\\p{Word_Break=Newline}", - // "$Control=\\p{Word_Break=Control}", - "$Extend=\\p{Word_Break=Extend}", - // "$NEWLINE=[$CR $LF \\u0085 \\u000B \\u000C \\u2028 \\u2029]", - // "$Sep=\\p{Sentence_Break=Sep}", - "# Now normal variables", - "$Format=\\p{Word_Break=Format}", - "$Katakana=\\p{Word_Break=Katakana}", - "$ALetter=\\p{Word_Break=ALetter}", - "$MidLetter=\\p{Word_Break=MidLetter}", - "$MidNum=\\p{Word_Break=MidNum}", - "$MidNumLet=\\p{Word_Break=MidNumLet}", - "$Numeric=\\p{Word_Break=Numeric}", - "$ExtendNumLet=\\p{Word_Break=ExtendNumLet}", - "$RI=\\p{Word_Break=Regional_Indicator}", - "$Hebrew_Letter=\\p{Word_Break=Hebrew_Letter}", - "$Double_Quote=\\p{Word_Break=Double_Quote}", - "$Single_Quote=\\p{Word_Break=Single_Quote}", - - // "$E_Base=\\p{Word_Break=E_Base}", - // "$E_Modifier=\\p{Word_Break=E_Modifier}", - "$ZWJ=\\p{Word_Break=ZWJ}", - "$ExtPict=\\p{Extended_Pictographic}", - - // "$EBG=\\p{Word_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Word_Break=Glue_After_Zwj}", - - "$WSegSpace=\\p{Word_Break=WSegSpace}", - "# Macros", - "$AHLetter=($ALetter | $Hebrew_Letter)", - "$MidNumLetQ=($MidNumLet | $Single_Quote)", - "# WARNING: For Rule 4: Fixes for GC, Format", - // "# Subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Add format and extend to everything", - "$FE=[$Format $Extend $ZWJ]", - "# Special rules", - "$NotBreak_=[^ $Newline $CR $LF ]", - // "$FE= ($Extend | $Format)*", - "$Katakana=($Katakana $FE*)", - "$ALetter=($ALetter $FE*)", - "$MidLetter=($MidLetter $FE*)", - "$MidNum=($MidNum $FE*)", - "$MidNumLet=($MidNumLet $FE*)", - "$Numeric=($Numeric $FE*)", - "$ExtendNumLet=($ExtendNumLet $FE*)", - "$RI=($RI $FE*)", - "$Hebrew_Letter=($Hebrew_Letter $FE*)", - "$Double_Quote=($Double_Quote $FE*)", - "$Single_Quote=($Single_Quote $FE*)", - - // "$E_Base=($E_Base $FE*)", - // "$E_Modifier=($E_Modifier $FE*)", - // "$ZWJ=($ZWJ $FE*)", don't do this one! - // "$Glue_After_Zwj=($Glue_After_Zwj $FE*)", - // "$EBG=($EBG $FE*)", - - "$AHLetter=($AHLetter $FE*)", - "$MidNumLetQ=($MidNumLetQ $FE*)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Otherwise break before and after Newlines (including CR and LF)", - "3.1) ($Newline | $CR | $LF) \u00F7", - "3.2) \u00F7 ($Newline | $CR | $LF)", - "# Do not break within emoji zwj sequences.", - "3.3) $ZWJ × $ExtPict", - "3.4) $WSegSpace × $WSegSpace", - - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - // "3.9) \u00D7 $Extend", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "# Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "4) \u00D7 [$Format $Extend]", - "4) $NotBreak_ \u00D7 [$Format $Extend $ZWJ]", - "# Vanilla rules", - "# Do not break between most letters.", - "5) $AHLetter \u00D7 $AHLetter", - "# Do not break letters across certain punctuation.", - "6) $AHLetter \u00D7 ($MidLetter | $MidNumLetQ) $AHLetter", - "7) $AHLetter ($MidLetter | $MidNumLetQ) \u00D7 $AHLetter", - "7.1) $Hebrew_Letter × $Single_Quote", - "7.2) $Hebrew_Letter × $Double_Quote $Hebrew_Letter", - "7.3) $Hebrew_Letter $Double_Quote × $Hebrew_Letter", - "# Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).", - "8) $Numeric \u00D7 $Numeric", - "9) $AHLetter \u00D7 $Numeric", - "10) $Numeric \u00D7 $AHLetter", - "# Do not break within sequences, such as “3.2” or “3,456.789”.", - "11) $Numeric ($MidNum | $MidNumLetQ) \u00D7 $Numeric", - "12) $Numeric \u00D7 ($MidNum | $MidNumLetQ) $Numeric", - "# Do not break between Katakana.", - "13) $Katakana \u00D7 $Katakana", - "# Do not break from extenders.", - "13.1) ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) \u00D7 $ExtendNumLet", - "13.2) $ExtendNumLet \u00D7 ($AHLetter | $Numeric | $Katakana)", - - // "# Do not break within emoji modifier sequences.", - // "14) $E_Base × $E_Modifier", - - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "15) ^ ($RI $RI)* $RI × $RI", - "16) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere (including around ideographs).", - } - }; - - public static void main(String[] args) throws IOException { - for (int i = 0; i < cannedRules.length; ++i) { - String type = cannedRules[i][0]; - boolean hadHash = false; - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "segmentation/", type + "Rules.txt")) { - out.println("# Segmentation rules for " + type); - out.println("#"); - out.println("# Character Classes"); - out.println("#"); - for (int j = 1; j < cannedRules[i].length; ++j) { - String cannedRule = cannedRules[i][j].trim(); - if (cannedRule.equals("#")) { - continue; - } - boolean hasHash = cannedRule.startsWith("#"); - if (hasHash && !hadHash) { - out.println("#"); - } - out.println(cannedRule); - if (hasHash) { - out.println("#"); - } - hadHash = hasHash; - } - } - } - - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "cldr/segmentation/", "rootAddon.xml")) { - out.println( - "\n" - + "\n" - + "\n" - + "\n" - + "\t\n" - + "\t\t\n" - + "\t\t\n" - + "\t\n" - + "\t"); - for (final String type : - new String[] { - "GraphemeClusterBreak", "LineBreak", "SentenceBreak", "WordBreak" - }) { - final Builder segBuilder = - Segmenter.make(ToolUnicodePropertySource.make(Default.ucdVersion()), type); - out.print(segBuilder.toString(type, "\t\t")); - if (type.equals("")) { - out.print( - "\t\t\t\n" - + "\t\t\t\t\n" - + "\t\t\t\n"); - } - } - out.println("\t\n" + ""); - } - } } diff --git a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java index e748ff9c6..ead5331b7 100644 --- a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java +++ b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java @@ -22,7 +22,6 @@ import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.RandomStringGenerator; import org.unicode.props.UnicodeProperty; -import org.unicode.tools.Segmenter.Rule.Breaks; /** * Quick class for testing proposed syntax for Segments. TODO doesn't yet handle supplementaries. It @@ -133,7 +132,7 @@ public static void main(String[] args) throws IOException { } private static void debugRule(Segmenter.Builder rb) { - Segmenter.Rule rule = rb.make().get(16.01); + Segmenter.SegmentationRule rule = rb.make().get(16.01); String oldAL = (String) rb.getVariables().get("$oldAL"); UnicodeSet oldALSet = new UnicodeSet(oldAL); String testStr = "\uA80D/\u0745\u2026"; @@ -142,7 +141,7 @@ private static void debugRule(Segmenter.Builder rb) { System.out.println( k + ": " + inside + com.ibm.icu.impl.Utility.escape("" + testStr.charAt(k))); } - Breaks m = rule.matches(testStr, 3); + rule.applyAt(3, testStr, new Integer[] {0, 1, 2, 3}, null); } private static void doCompare(UnicodeProperty.Factory factory, Segmenter rl, String line) { diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt new file mode 100644 index 000000000..58662bbb8 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt @@ -0,0 +1,30 @@ +# ARABIC LETTER NOON WITH RING ABOVE (088F) +# https://github.com/unicode-org/utc-release-management/issues/118 + +Let $OldNoons := [ ن ڹ ں ڻ ڼ ڽ ݧ ݨ ݩ ࢉ ] + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block Unicode_1_Name: + +# « Another ن, propertywise like the others. ». + +# Differs from ڽ (with three dots above) in Joining_Group, +# ڽ being jg=Nya vs. jg=Noon for the others; see +# https://www.unicode.org/versions/latest/ch09.pdf#G39824. +Ignoring Joining_Group: +Propertywise [ $OldNoons \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike +end Ignoring; + +Propertywise [ $OldNoons - [ڽ] \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike + +end Ignoring; + +end Ignoring; + +end Ignoring; diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index d9de8380c..bf082107a 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -268,6 +268,7 @@ Sharada_Sup ; Sharada_Supplement Shavian ; Shavian Shorthand_Format_Controls ; Shorthand_Format_Controls Siddham ; Siddham +Sidetic ; Sidetic Sinhala ; Sinhala Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers Small_Forms ; Small_Form_Variants diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 570de2e90..4e4685029 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -700,6 +700,7 @@ Let $nonAlphabeticBindus := [] Let $nonAlphabeticDependentVowels := [ \N{ORIYA SIGN OVERLINE} + \p{Name=/^ORIYA SIGN (DOUBLE )?DOT ABOVE$/} # L2/24-106R, related to the overline. \N{THAI CHARACTER MAITAIKHU} \N{LIMBU SIGN KEMPHRENG} \N{SHARADA VOWEL MODIFIER MARK} @@ -1379,4 +1380,4 @@ Ignoring Unicode_1_Name Confusable_MA: end Ignoring; -end Ignoring; \ No newline at end of file +end Ignoring; diff --git a/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt b/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt index e7e6193bd..eb9b2b132 100644 --- a/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt +++ b/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt @@ -142,86 +142,10 @@ $CM=[$CM1 $ZWJ] ## show $AL $AL=[$AI $AL $SG $XX $SA] $NS=[$NS $CJ] -## show $AL -## $oldAL=$AL // for debugging -# WARNING: Fixes for Rule 9 -# Treat X (CM|ZWJ* as if it were X. -# Where X is any line break class except SP, BK, CR, LF, NL or ZW. -$X=$CM* # MACROS -$Spec1_=[$SP $BK $CR $LF $NL $ZW] -$Spec2_=[^ $SP $BK $CR $LF $NL $ZW] -$Spec3a_=[^ $SP $BA $HY $CM] -$Spec3b_=[^ $BA $HY $CM] -$Spec4_=[^ $NU $CM] - -# SPECIAL EXTENSIONS - -$AI=($AI $X) -$AK=($AK $X) -$AL=($AL $X) -$AP=($AP $X) -$AS=($AS $X) -$B2=($B2 $X) -$BA=($BA $X) -$BB=($BB $X) -$CB=($CB $X) -$CL=($CL $X) -$CP=($CP $X) -$CM=($CM $X) -## $CM=($CM $X) -$EX=($EX $X) -$GL=($GL $X) -$H2=($H2 $X) -$H3=($H3 $X) -$HL=($HL $X) -$HY=($HY $X) -$ID=($ID $X) -$IN=($IN $X) -$IS=($IS $X) -$JL=($JL $X) -$JT=($JT $X) -$JV=($JV $X) -$NS=($NS $X) -$NU=($NU $X) -$OP=($OP $X) -$PO=($PO $X) -$PR=($PR $X) -$QU=($QU $X) -$SA=($SA $X) -$SG=($SG $X) -$SY=($SY $X) -$VF=($VF $X) -$VI=($VI $X) -$WJ=($WJ $X) -$XX=($XX $X) -$RI=($RI $X) -$EB=($EB $X) -$EM=($EM $X) -$ZWJ=($ZWJ $X) - -$QU_Pi=($QU_Pi $X) -$QU_Pf=($QU_Pf $X) - -$QUmPi=($QUmPi $X) -$QUmPf=($QUmPf $X) - -$NotEastAsian=( $NotEastAsian | [$NotEastAsian - $Spec1_] $X) -$NonEastAsianBA=(NonEastAsianBA $X) - -$DottedCircle=($DottedCircle $X) -$Hyphen=($Hyphen $X) - -$CP30=($CP30 $X) -$OP30=($OP30 $X) - -# OUT OF ORDER ON PURPOSE - -# LB 10 Treat any remaining combining mark as AL and non-$EastAsian. -$AL=($AL | ^ $CM | (?<=$Spec1_) $CM) -$NotEastAsian=( $NotEastAsian | ^ $CM | (?<=$Spec1_) $CM ) +$Spec3a_=[^ $SP $BA $HY] # RULES @@ -241,19 +165,18 @@ $NotEastAsian=( $NotEastAsian | ^ $CM | (?<=$Spec1_) $CM ) 8) $ZW $SP* ÷ # LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) 8.1) $ZWJ_O × -# LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character -# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -9) $Spec2_ × $CM -##WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)! +# LB 9 Do not break a combining character sequence; treat it as if it has the line breaking class +# of the base character in all of the following rules. Treat ZWJ as if it were CM. +9) (?[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X} +# LB10 Treat any remaining combining mark or ZWJ as AL. +10) ( $CM | $ZWJ ) → A ## LB 11 Do not break before or after WORD JOINER and related characters. 11.01) × $WJ 11.02) $WJ × # LB 12 Do not break after NBSP and related characters. -## 12.01) [^$SP] × $GL 12) $GL × +# LB 12a Do not break before NBSP and related characters, except after spaces and hyphens. 12.1) $Spec3a_ × $GL -12.2) $Spec3b_ $CM+ × $GL -12.3) ^ $CM+ × $GL # LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces. 13.01) × $EX 13.02) × $CL diff --git a/unicodetools/src/test/java/org/unicode/test/TestSegment.java b/unicodetools/src/test/java/org/unicode/test/TestSegment.java index baf52f539..719f3cf14 100644 --- a/unicodetools/src/test/java/org/unicode/test/TestSegment.java +++ b/unicodetools/src/test/java/org/unicode/test/TestSegment.java @@ -31,7 +31,7 @@ import org.unicode.text.utility.Utility; import org.unicode.tools.Segmenter; import org.unicode.tools.Segmenter.Builder; -import org.unicode.tools.Segmenter.Rule; +import org.unicode.tools.Segmenter.SegmentationRule; public class TestSegment { @@ -398,8 +398,8 @@ private static void checkExemplars() { } private static void getExemplarStrings(UnicodeMap exemplars, Builder segmenter) { - Map srules = segmenter.getProcessedRules(); - for (Entry entry : srules.entrySet()) { + Map srules = segmenter.getProcessedRules(); + for (Entry entry : srules.entrySet()) { System.out.println(entry.getKey() + "\t\t" + entry.getValue()); } }