diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index c818d6948..a3a12dc65 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,6 +275,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -316,6 +332,10 @@ jobs: - name: Run command - UCA - collation validity log run: | cd unicodetools/mine/src + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then set +e + fi # invoke main() in class ...UCA.Main mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file @@ -333,6 +353,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -372,6 +408,16 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then ERROR="::notice" + else ERROR="::error" + fi + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" + STATUS=${PIPESTATUS[0]} + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then exit 0 + else exit $STATUS + fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/pipeline.md b/docs/pipeline.md index 695f659ec..69f2f4b88 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -49,7 +49,7 @@ Indic scripts only: - [ ] Commit --- -- [ ] PropsList.txt — Add Other_Alphabetic, Diacritic, and Extender to satisfy invariants, or to taste +- [ ] PropsList.txt — Add Other_Alphabetic, Other_Lowercase, Diacritic, and Extender to satisfy invariants, or to taste - [ ] Commit --- @@ -67,8 +67,13 @@ PR preparation: - [ ] PR button — Set to DRAFT pull request - unless approved for the upcoming version - [ ] PR button — Press - - The **Check UCA data** CI check might fail; many character additions need separate handling there, - but that is out of scope for the PAG work of preparing `data-for-new`. This will get resolved later. + - The **Check UCA data** and **Check security data invariants** CI checks are + suppressed; many character additions need separate handling there, + but that is out of scope for the PAG work of preparing `data-for-new`, + so reporting those failures could distract from real issues + in the UCD invariants. + UCA and security data issues are addressed later in the process, + before the start of β review. ## Scripts diff --git a/docs/security.md b/docs/security.md index 3229c35a9..01bb8955c 100644 --- a/docs/security.md +++ b/docs/security.md @@ -9,8 +9,8 @@ machine-generated, then tweaked. They have names like source/confusables-winFonts.txt. The main file is confusables-source.txt. ***There is fairly complex processing for the confusables, so carefully diff the -results. Sometimes you may get an unexpected union of two equivalence sets. Look -at Testing below for help.*** +results. Sometimes you may get an unexpected union of two equivalence sets. +Look at Testing below for help.*** Look at the following spreadsheets / bugs to see if there are any additional suggestions. @@ -19,17 +19,38 @@ suggestions. Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dHRXelRVbXRYSVp2QTNDdTBlV1I5X1E&usp=drive_web#gid=0)** * **[Identifier Restriction Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dEJJWkdzZzk4cDRYbEVLTmhraGN0Q3c&usp=drive_web#gid=0)** -* *[Unicode - Bugs](http://www.unicode.org/edcom/bugtrack/query?status=accepted&status=assigned&status=new&status=reopened&group=component&order=priority&col=id&col=summary&col=status&col=type&col=priority&col=milestone&col=component&owner=mark&report=10) - (under TR #36/39)*\ - :construction: **TODO**: That Trac instance is gone. - Markus thinks we decided that there was nothing useful in it, - and deleted it without saving data. Check with Mark. +* *[Sample PRs](https://github.com/unicode-org/unicodetools/pull/841) If so, assess and add to unicodetools/data/security/{version}/data/source/confusables-source.txt — *if needed.* - Then in the spreadsheets, move the "new stuff" line to the end. +### File Format +There is a brief description of the file format at the top. +Each line represents a mapping from a code point or set of code points to a sequence of one or more code points. + +For example: +``` +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +``` + +The ordering of characters doesn't matter. +So it doesn't matter whether you have the above line, or +``` +01C3 ; 0021 # ( ǃ → !) LATIN LETTER RETROFLEX CLICK → EXCLAMATION MARK +``` +It also doesn't matter if you have identical lines; the second one will be a NOOP. + +The mappings are used to generate equivalence classes. +From each equivalence class, one representative member will be chosen, +and in the resulting data file, all the other characters will map to that representative. +Because of transitivity, the equivalence class will tend to be somewhat looser than expected. + +We've discussed possible future enhancements: +- Have a second, narrower mapping that is more exact. +- Allow for mappings from sequences to sequences (instead of just code points to sequences). +- Provide for context, perhaps like the Transform rules. + Eg [x { a } y → A](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Aarabic_type%3A%5D&g=&i=) + ## Before generating First, in CLDR, update the script metadata: @@ -51,13 +72,10 @@ Run GenerateConfusables -c -b to generate the files. They will appear in two pla * reformatted source, log * $UNICODETOOLS_DIR/data/security/11.0.0/* *including log.txt* -**Run TestSecurity to verify that the confusable mappings are idempotent!** +The TestSecurity.java test is part of the unit test suite, run by a github CI. +It verifies that the confusable mappings are idempotent. -With the same VM arguments as the generator. -Starting in 2021q3, TestSecurity needs to be run as a JUnit test. -It is also now part of the unit test suite and run on GitHub CI. - -Copy the following from the output directory to the top level of the revision directory: +Copy the following from the output directory to the top level of the revision directory, and check in. * confusables.txt * confusablesSummary.txt @@ -66,6 +84,12 @@ Copy the following from the output directory to the top level of the revision di * ReadMe.txt * xidmodifications.txt +### Review + +Review the mappings to make sure that there are no surprises. +The biggest issue is if two equivalence classes are mistakenly joined. +For example, if you map b to d, then that will join the equivalence class for b with that of d. + ### IdentifierStatus.txt & IdentifierType.txt Markus 2020-feb-07 for Unicode 13.0: diff --git a/unicodetools/data/emoji/dev/emoji-test.txt b/unicodetools/data/emoji/dev/emoji-test.txt index e81fe0b19..95c5d5431 100644 --- a/unicodetools/data/emoji/dev/emoji-test.txt +++ b/unicodetools/data/emoji/dev/emoji-test.txt @@ -1,5 +1,5 @@ # emoji-test.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1751,12 +1751,12 @@ 1F936 1F3FD ; fully-qualified # 🤶🏽 E3.0 Mrs. Claus: medium skin tone 1F936 1F3FE ; fully-qualified # 🤶🏾 E3.0 Mrs. Claus: medium-dark skin tone 1F936 1F3FF ; fully-qualified # 🤶🏿 E3.0 Mrs. Claus: dark skin tone -1F9D1 200D 1F384 ; fully-qualified # 🧑🎄 E13.0 mx claus -1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻🎄 E13.0 mx claus: light skin tone -1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼🎄 E13.0 mx claus: medium-light skin tone -1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽🎄 E13.0 mx claus: medium skin tone -1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾🎄 E13.0 mx claus: medium-dark skin tone -1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿🎄 E13.0 mx claus: dark skin tone +1F9D1 200D 1F384 ; fully-qualified # 🧑🎄 E13.0 Mx claus +1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻🎄 E13.0 Mx claus: light skin tone +1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼🎄 E13.0 Mx claus: medium-light skin tone +1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽🎄 E13.0 Mx claus: medium skin tone +1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾🎄 E13.0 Mx claus: medium-dark skin tone +1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿🎄 E13.0 Mx claus: dark skin tone 1F9B8 ; fully-qualified # 🦸 E11.0 superhero 1F9B8 1F3FB ; fully-qualified # 🦸🏻 E11.0 superhero: light skin tone 1F9B8 1F3FC ; fully-qualified # 🦸🏼 E11.0 superhero: medium-light skin tone @@ -3721,6 +3721,11 @@ 1F41A ; fully-qualified # 🐚 E0.6 spiral shell 1FAB8 ; fully-qualified # 🪸 E14.0 coral 1FABC ; fully-qualified # 🪼 E15.0 jellyfish +1F980 ; fully-qualified # 🦀 E1.0 crab +1F99E ; fully-qualified # 🦞 E11.0 lobster +1F990 ; fully-qualified # 🦐 E3.0 shrimp +1F991 ; fully-qualified # 🦑 E3.0 squid +1F9AA ; fully-qualified # 🦪 E12.0 oyster # subgroup: animal-bug 1F40C ; fully-qualified # 🐌 E0.6 snail @@ -3777,8 +3782,8 @@ 1F344 ; fully-qualified # 🍄 E0.6 mushroom 1FABE ; fully-qualified # E16.0 leafless tree -# Animals & Nature subtotal: 161 -# Animals & Nature subtotal: 161 w/o modifiers +# Animals & Nature subtotal: 166 +# Animals & Nature subtotal: 166 w/o modifiers # group: Food & Drink @@ -3881,13 +3886,6 @@ 1F960 ; fully-qualified # 🥠 E5.0 fortune cookie 1F961 ; fully-qualified # 🥡 E5.0 takeout box -# subgroup: food-marine -1F980 ; fully-qualified # 🦀 E1.0 crab -1F99E ; fully-qualified # 🦞 E11.0 lobster -1F990 ; fully-qualified # 🦐 E3.0 shrimp -1F991 ; fully-qualified # 🦑 E3.0 squid -1F9AA ; fully-qualified # 🦪 E12.0 oyster - # subgroup: food-sweet 1F366 ; fully-qualified # 🍦 E0.6 soft ice cream 1F367 ; fully-qualified # 🍧 E0.6 shaved ice @@ -3936,8 +3934,8 @@ 1FAD9 ; fully-qualified # 🫙 E14.0 jar 1F3FA ; fully-qualified # 🏺 E1.0 amphora -# Food & Drink subtotal: 138 -# Food & Drink subtotal: 138 w/o modifiers +# Food & Drink subtotal: 133 +# Food & Drink subtotal: 133 w/o modifiers # group: Travel & Places diff --git a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt index e471645bf..ce31f22ce 100644 --- a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt +++ b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt @@ -1,5 +1,5 @@ # emoji-zwj-sequences.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -665,7 +665,7 @@ 1F9D1 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer # E12.1 [1] (🧑🌾) 1F9D1 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook # E12.1 [1] (🧑🍳) 1F9D1 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby # E13.0 [1] (🧑🍼) -1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus # E13.0 [1] (🧑🎄) +1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus # E13.0 [1] (🧑🎄) 1F9D1 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student # E12.1 [1] (🧑🎓) 1F9D1 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer # E12.1 [1] (🧑🎤) 1F9D1 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist # E12.1 [1] (🧑🎨) @@ -689,7 +689,7 @@ 1F9D1 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: light skin tone # E12.1 [1] (🧑🏻🌾) 1F9D1 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: light skin tone # E12.1 [1] (🧑🏻🍳) 1F9D1 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: light skin tone # E13.0 [1] (🧑🏻🍼) -1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: light skin tone # E13.0 [1] (🧑🏻🎄) +1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: light skin tone # E13.0 [1] (🧑🏻🎄) 1F9D1 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: light skin tone # E12.1 [1] (🧑🏻🎓) 1F9D1 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: light skin tone # E12.1 [1] (🧑🏻🎤) 1F9D1 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: light skin tone # E12.1 [1] (🧑🏻🎨) @@ -713,7 +713,7 @@ 1F9D1 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-light skin tone # E12.1 [1] (🧑🏼🌾) 1F9D1 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-light skin tone # E12.1 [1] (🧑🏼🍳) 1F9D1 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-light skin tone # E13.0 [1] (🧑🏼🍼) -1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-light skin tone # E13.0 [1] (🧑🏼🎄) +1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-light skin tone # E13.0 [1] (🧑🏼🎄) 1F9D1 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-light skin tone # E12.1 [1] (🧑🏼🎓) 1F9D1 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-light skin tone # E12.1 [1] (🧑🏼🎤) 1F9D1 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-light skin tone # E12.1 [1] (🧑🏼🎨) @@ -737,7 +737,7 @@ 1F9D1 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium skin tone # E12.1 [1] (🧑🏽🌾) 1F9D1 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium skin tone # E12.1 [1] (🧑🏽🍳) 1F9D1 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium skin tone # E13.0 [1] (🧑🏽🍼) -1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium skin tone # E13.0 [1] (🧑🏽🎄) +1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium skin tone # E13.0 [1] (🧑🏽🎄) 1F9D1 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium skin tone # E12.1 [1] (🧑🏽🎓) 1F9D1 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium skin tone # E12.1 [1] (🧑🏽🎤) 1F9D1 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium skin tone # E12.1 [1] (🧑🏽🎨) @@ -761,7 +761,7 @@ 1F9D1 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-dark skin tone # E12.1 [1] (🧑🏾🌾) 1F9D1 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-dark skin tone # E12.1 [1] (🧑🏾🍳) 1F9D1 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-dark skin tone # E13.0 [1] (🧑🏾🍼) -1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾🎄) +1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾🎄) 1F9D1 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-dark skin tone # E12.1 [1] (🧑🏾🎓) 1F9D1 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-dark skin tone # E12.1 [1] (🧑🏾🎤) 1F9D1 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-dark skin tone # E12.1 [1] (🧑🏾🎨) @@ -785,7 +785,7 @@ 1F9D1 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: dark skin tone # E12.1 [1] (🧑🏿🌾) 1F9D1 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: dark skin tone # E12.1 [1] (🧑🏿🍳) 1F9D1 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: dark skin tone # E13.0 [1] (🧑🏿🍼) -1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: dark skin tone # E13.0 [1] (🧑🏿🎄) +1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: dark skin tone # E13.0 [1] (🧑🏿🎄) 1F9D1 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: dark skin tone # E12.1 [1] (🧑🏿🎓) 1F9D1 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: dark skin tone # E12.1 [1] (🧑🏿🎤) 1F9D1 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: dark skin tone # E12.1 [1] (🧑🏿🎨) diff --git a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt index edc931dbb..8769e7477 100644 --- a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt +++ b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt @@ -1925,7 +1925,7 @@ 1F9CF 1F3FF; L2/18-229, L2/14-173 # 2019 (🧏🏿) deaf person: dark skin tone # L2/19-231 -1F9D1 200D 1F384; L2/19-231 # 2020 (🧑🎄) mx claus +1F9D1 200D 1F384; L2/19-231 # 2020 (🧑🎄) Mx claus # L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 1F9D1 200D 1F91D 200D 1F9D1; L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 # 2019 (🧑🤝🧑) people holding hands diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index 531fd2a7f..ecbd58c23 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -577,10 +577,10 @@ FF07 ; 0027 ; MA #* ( ' → ' ) FULLWIDTH APOSTROPHE → APOSTROPHE # →’ 2018 ; 0027 ; MA #* ( ‘ → ' ) LEFT SINGLE QUOTATION MARK → APOSTROPHE # 2019 ; 0027 ; MA #* ( ’ → ' ) RIGHT SINGLE QUOTATION MARK → APOSTROPHE # 201B ; 0027 ; MA #* ( ‛ → ' ) SINGLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE # →′→ +05F3 ; 0027 ; MA #* ( ׳ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 2032 ; 0027 ; MA #* ( ′ → ' ) PRIME → APOSTROPHE # 2035 ; 0027 ; MA #* ( ‵ → ' ) REVERSED PRIME → APOSTROPHE # →ʽ→→‘→ 055A ; 0027 ; MA #* ( ՚ → ' ) ARMENIAN APOSTROPHE → APOSTROPHE # →’→ -05F3 ; 0027 ; MA #* ( ׳ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 0060 ; 0027 ; MA #* ( ` → ' ) GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 1FEF ; 0027 ; MA #* ( ` → ' ) GREEK VARIA → APOSTROPHE # →ˋ→→`→→‘→ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘→ @@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘ 02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE # 0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→ 02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE # -02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′→ +02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ→ 02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→ 02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→ @@ -615,10 +615,10 @@ FF02 ; 0027 0027 ; MA #* ( " → '' ) FULLWIDTH QUOTATION MARK → APOSTROPHE, 201C ; 0027 0027 ; MA #* ( “ → '' ) LEFT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201D ; 0027 0027 ; MA #* ( ” → '' ) RIGHT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201F ; 0027 0027 ; MA #* ( ‟ → '' ) DOUBLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE, APOSTROPHE # →“→→"→ +05F4 ; 0027 0027 ; MA #* ( ״ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 2033 ; 0027 0027 ; MA #* ( ″ → '' ) DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 2036 ; 0027 0027 ; MA #* ( ‶ → '' ) REVERSED DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →‵‵→ 3003 ; 0027 0027 ; MA #* ( 〃 → '' ) DITTO MARK → APOSTROPHE, APOSTROPHE # →″→→"→ -05F4 ; 0027 0027 ; MA #* ( ״ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 02DD ; 0027 0027 ; MA #* ( ˝ → '' ) DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →"→ 02BA ; 0027 0027 ; MA # ( ʺ → '' ) MODIFIER LETTER DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 02F6 ; 0027 0027 ; MA #* ( ˶ → '' ) MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →˝→→"→ @@ -1417,6 +1417,7 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO 23E8 ; 2081 2080 ; MA #* ( ⏨ → ₁₀ ) DECIMAL EXPONENT SYMBOL → SUBSCRIPT ONE, SUBSCRIPT ZERO # +1CCF2 ; 0032 ; MA # ( → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # 1D7D0 ; 0032 ; MA # ( 𝟐 → 2 ) MATHEMATICAL BOLD DIGIT TWO → DIGIT TWO # 1D7DA ; 0032 ; MA # ( 𝟚 → 2 ) MATHEMATICAL DOUBLE-STRUCK DIGIT TWO → DIGIT TWO # 1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO # @@ -1429,7 +1430,6 @@ A75A ; 0032 ; MA # ( Ꝛ → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO # A644 ; 0032 ; MA # ( Ꙅ → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→ 14BF ; 0032 ; MA # ( ᒿ → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO # A6EF ; 0032 ; MA # ( ꛯ → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→ -1CCF2 ; 0032 ; MA # ( → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO # 06F2 ; 0662 ; MA # ( ۲ → ٢ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO # @@ -1491,6 +1491,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI 335A ; 0032 70B9 ; MA #* ( ㍚ → 2点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO → DIGIT TWO, CJK UNIFIED IDEOGRAPH-70B9 # 1D206 ; 0033 ; MA #* ( 𝈆 → 3 ) GREEK VOCAL NOTATION SYMBOL-7 → DIGIT THREE # +1CCF3 ; 0033 ; MA # ( → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 1D7D1 ; 0033 ; MA # ( 𝟑 → 3 ) MATHEMATICAL BOLD DIGIT THREE → DIGIT THREE # 1D7DB ; 0033 ; MA # ( 𝟛 → 3 ) MATHEMATICAL DOUBLE-STRUCK DIGIT THREE → DIGIT THREE # 1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE # @@ -1506,7 +1507,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→ 16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→ 118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE # -1CCF3 ; 0033 ; MA # ( → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 06F3 ; 0663 ; MA # ( ۳ → ٣ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE # 1E8C9 ; 0663 ; MA #* ( 𞣉 → ٣ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE # @@ -1531,6 +1531,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335B ; 0033 70B9 ; MA #* ( ㍛ → 3点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE → DIGIT THREE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF4 ; 0034 ; MA # ( → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 1D7D2 ; 0034 ; MA # ( 𝟒 → 4 ) MATHEMATICAL BOLD DIGIT FOUR → DIGIT FOUR # 1D7DC ; 0034 ; MA # ( 𝟜 → 4 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR → DIGIT FOUR # 1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR # @@ -1539,7 +1540,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR # 13CE ; 0034 ; MA # ( Ꮞ → 4 ) CHEROKEE LETTER SE → DIGIT FOUR # 118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR # -1CCF4 ; 0034 ; MA # ( → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 06F4 ; 0664 ; MA # ( ۴ → ٤ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR # @@ -1559,6 +1559,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335C ; 0034 70B9 ; MA #* ( ㍜ → 4点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR → DIGIT FOUR, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF5 ; 0035 ; MA # ( → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 1D7D3 ; 0035 ; MA # ( 𝟓 → 5 ) MATHEMATICAL BOLD DIGIT FIVE → DIGIT FIVE # 1D7DD ; 0035 ; MA # ( 𝟝 → 5 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FIVE → DIGIT FIVE # 1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE # @@ -1567,7 +1568,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE # 01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE # 118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE # -1CCF5 ; 0035 ; MA # ( → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE # @@ -1581,6 +1581,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335D ; 0035 70B9 ; MA #* ( ㍝ → 5点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE → DIGIT FIVE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF6 ; 0036 ; MA # ( → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 1D7D4 ; 0036 ; MA # ( 𝟔 → 6 ) MATHEMATICAL BOLD DIGIT SIX → DIGIT SIX # 1D7DE ; 0036 ; MA # ( 𝟞 → 6 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SIX → DIGIT SIX # 1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX # @@ -1591,7 +1592,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX # 13EE ; 0036 ; MA # ( Ꮾ → 6 ) CHEROKEE LETTER WV → DIGIT SIX # 118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX # -1CCF6 ; 0036 ; MA # ( → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 06F6 ; 0666 ; MA # ( ۶ → ٦ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX # @@ -1610,6 +1610,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335E ; 0036 70B9 ; MA #* ( ㍞ → 6点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX → DIGIT SIX, CJK UNIFIED IDEOGRAPH-70B9 # 1D212 ; 0037 ; MA #* ( 𝈒 → 7 ) GREEK VOCAL NOTATION SYMBOL-19 → DIGIT SEVEN # +1CCF7 ; 0037 ; MA # ( → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 1D7D5 ; 0037 ; MA # ( 𝟕 → 7 ) MATHEMATICAL BOLD DIGIT SEVEN → DIGIT SEVEN # 1D7DF ; 0037 ; MA # ( 𝟟 → 7 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN → DIGIT SEVEN # 1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN # @@ -1618,7 +1619,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN # 104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN # 118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN # -1CCF7 ; 0037 ; MA # ( → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN # @@ -1636,6 +1636,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 09EA ; 0038 ; MA # ( ৪ → 8 ) BENGALI DIGIT FOUR → DIGIT EIGHT # 0A6A ; 0038 ; MA # ( ੪ → 8 ) GURMUKHI DIGIT FOUR → DIGIT EIGHT # 1E8CB ; 0038 ; MA #* ( 𞣋 → 8 ) MENDE KIKAKUI DIGIT FIVE → DIGIT EIGHT # +1CCF8 ; 0038 ; MA # ( → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 1D7D6 ; 0038 ; MA # ( 𝟖 → 8 ) MATHEMATICAL BOLD DIGIT EIGHT → DIGIT EIGHT # 1D7E0 ; 0038 ; MA # ( 𝟠 → 8 ) MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT → DIGIT EIGHT # 1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT # @@ -1645,7 +1646,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT # 0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT # 1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT # -1CCF8 ; 0038 ; MA # ( → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT # @@ -1665,6 +1665,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0B68 ; 0039 ; MA # ( ୨ → 9 ) ORIYA DIGIT TWO → DIGIT NINE # 09ED ; 0039 ; MA # ( ৭ → 9 ) BENGALI DIGIT SEVEN → DIGIT NINE # 0D6D ; 0039 ; MA # ( ൭ → 9 ) MALAYALAM DIGIT SEVEN → DIGIT NINE # +1CCF9 ; 0039 ; MA # ( → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 1D7D7 ; 0039 ; MA # ( 𝟗 → 9 ) MATHEMATICAL BOLD DIGIT NINE → DIGIT NINE # 1D7E1 ; 0039 ; MA # ( 𝟡 → 9 ) MATHEMATICAL DOUBLE-STRUCK DIGIT NINE → DIGIT NINE # 1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE # @@ -1676,7 +1677,6 @@ A76E ; 0039 ; MA # ( Ꝯ → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE # 118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE # 118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE # 118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE # -1CCF9 ; 0039 ; MA # ( → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 0967 ; 0669 ; MA # ( १ → ٩ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE # 118E4 ; 0669 ; MA # ( 𑣤 → ٩ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE # @@ -1723,6 +1723,7 @@ FF41 ; 0061 ; MA # ( a → a ) FULLWIDTH LATIN SMALL LETTER A → LATIN SMALL 2DF6 ; 0363 ; MA # ( ⷶ → ͣ ) COMBINING CYRILLIC LETTER A → COMBINING LATIN SMALL LETTER A # FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # →А→ +1CCD6 ; 0041 ; MA #* ( → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 1D400 ; 0041 ; MA # ( 𝐀 → A ) MATHEMATICAL BOLD CAPITAL A → LATIN CAPITAL LETTER A # 1D434 ; 0041 ; MA # ( 𝐴 → A ) MATHEMATICAL ITALIC CAPITAL A → LATIN CAPITAL LETTER A # 1D468 ; 0041 ; MA # ( 𝑨 → A ) MATHEMATICAL BOLD ITALIC CAPITAL A → LATIN CAPITAL LETTER A # @@ -1748,7 +1749,6 @@ FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPI A4EE ; 0041 ; MA # ( ꓮ → A ) LISU LETTER A → LATIN CAPITAL LETTER A # 16F40 ; 0041 ; MA # ( 𖽀 → A ) MIAO LETTER ZZYA → LATIN CAPITAL LETTER A # 102A0 ; 0041 ; MA # ( 𐊠 → A ) CARIAN LETTER A → LATIN CAPITAL LETTER A # -1CCD6 ; 0041 ; MA #* ( → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 2376 ; 0061 0332 ; MA #* ( ⍶ → a̲ ) APL FUNCTIONAL SYMBOL ALPHA UNDERBAR → LATIN SMALL LETTER A, COMBINING LOW LINE # →α̲→→ɑ̲→ @@ -1826,6 +1826,7 @@ A4EF ; 2C6F ; MA # ( ꓯ → Ɐ ) LISU LETTER AE → LATIN CAPITAL LETTER TURNE FF22 ; 0042 ; MA # ( B → B ) FULLWIDTH LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # →Β→ 212C ; 0042 ; MA # ( ℬ → B ) SCRIPT CAPITAL B → LATIN CAPITAL LETTER B # +1CCD7 ; 0042 ; MA #* ( → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 1D401 ; 0042 ; MA # ( 𝐁 → B ) MATHEMATICAL BOLD CAPITAL B → LATIN CAPITAL LETTER B # 1D435 ; 0042 ; MA # ( 𝐵 → B ) MATHEMATICAL ITALIC CAPITAL B → LATIN CAPITAL LETTER B # 1D469 ; 0042 ; MA # ( 𝑩 → B ) MATHEMATICAL BOLD ITALIC CAPITAL B → LATIN CAPITAL LETTER B # @@ -1852,7 +1853,6 @@ A4D0 ; 0042 ; MA # ( ꓐ → B ) LISU LETTER BA → LATIN CAPITAL LETTER B # 10282 ; 0042 ; MA # ( 𐊂 → B ) LYCIAN LETTER B → LATIN CAPITAL LETTER B # 102A1 ; 0042 ; MA # ( 𐊡 → B ) CARIAN LETTER P2 → LATIN CAPITAL LETTER B # 10301 ; 0042 ; MA # ( 𐌁 → B ) OLD ITALIC LETTER BE → LATIN CAPITAL LETTER B # -1CCD7 ; 0042 ; MA #* ( → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 0253 ; 0062 0314 ; MA # ( ɓ → b̔ ) LATIN SMALL LETTER B WITH HOOK → LATIN SMALL LETTER B, COMBINING REVERSED COMMA ABOVE # @@ -1910,6 +1910,7 @@ FF23 ; 0043 ; MA # ( C → C ) FULLWIDTH LATIN CAPITAL LETTER C → LATIN CAPI 216D ; 0043 ; MA # ( Ⅽ → C ) ROMAN NUMERAL ONE HUNDRED → LATIN CAPITAL LETTER C # 2102 ; 0043 ; MA # ( ℂ → C ) DOUBLE-STRUCK CAPITAL C → LATIN CAPITAL LETTER C # 212D ; 0043 ; MA # ( ℭ → C ) BLACK-LETTER CAPITAL C → LATIN CAPITAL LETTER C # +1CCD8 ; 0043 ; MA #* ( → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 1D402 ; 0043 ; MA # ( 𝐂 → C ) MATHEMATICAL BOLD CAPITAL C → LATIN CAPITAL LETTER C # 1D436 ; 0043 ; MA # ( 𝐶 → C ) MATHEMATICAL ITALIC CAPITAL C → LATIN CAPITAL LETTER C # 1D46A ; 0043 ; MA # ( 𝑪 → C ) MATHEMATICAL BOLD ITALIC CAPITAL C → LATIN CAPITAL LETTER C # @@ -1930,7 +1931,6 @@ A4DA ; 0043 ; MA # ( ꓚ → C ) LISU LETTER CA → LATIN CAPITAL LETTER C # 10302 ; 0043 ; MA # ( 𐌂 → C ) OLD ITALIC LETTER KE → LATIN CAPITAL LETTER C # 10415 ; 0043 ; MA # ( 𐐕 → C ) DESERET CAPITAL LETTER CHEE → LATIN CAPITAL LETTER C # 1051C ; 0043 ; MA # ( 𐔜 → C ) ELBASAN LETTER SHE → LATIN CAPITAL LETTER C # -1CCD8 ; 0043 ; MA #* ( → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 00A2 ; 0063 0338 ; MA #* ( ¢ → c̸ ) CENT SIGN → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # 023C ; 0063 0338 ; MA # ( ȼ → c̸ ) LATIN SMALL LETTER C WITH STROKE → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # →¢→ @@ -2006,6 +2006,7 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 216E ; 0044 ; MA # ( Ⅾ → D ) ROMAN NUMERAL FIVE HUNDRED → LATIN CAPITAL LETTER D # 2145 ; 0044 ; MA # ( ⅅ → D ) DOUBLE-STRUCK ITALIC CAPITAL D → LATIN CAPITAL LETTER D # +1CCD9 ; 0044 ; MA #* ( → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 1D403 ; 0044 ; MA # ( 𝐃 → D ) MATHEMATICAL BOLD CAPITAL D → LATIN CAPITAL LETTER D # 1D437 ; 0044 ; MA # ( 𝐷 → D ) MATHEMATICAL ITALIC CAPITAL D → LATIN CAPITAL LETTER D # 1D46B ; 0044 ; MA # ( 𝑫 → D ) MATHEMATICAL BOLD ITALIC CAPITAL D → LATIN CAPITAL LETTER D # @@ -2023,7 +2024,6 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 15DE ; 0044 ; MA # ( ᗞ → D ) CANADIAN SYLLABICS CARRIER THE → LATIN CAPITAL LETTER D # 15EA ; 0044 ; MA # ( ᗪ → D ) CANADIAN SYLLABICS CARRIER PE → LATIN CAPITAL LETTER D # →ᗞ→ A4D3 ; 0044 ; MA # ( ꓓ → D ) LISU LETTER DA → LATIN CAPITAL LETTER D # -1CCD9 ; 0044 ; MA #* ( → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 0257 ; 0064 0314 ; MA # ( ɗ → d̔ ) LATIN SMALL LETTER D WITH HOOK → LATIN SMALL LETTER D, COMBINING REVERSED COMMA ABOVE # @@ -2099,6 +2099,7 @@ AB32 ; 0065 ; MA # ( ꬲ → e ) LATIN SMALL LETTER BLACKLETTER E → LATIN SMAL 22FF ; 0045 ; MA #* ( ⋿ → E ) Z NOTATION BAG MEMBERSHIP → LATIN CAPITAL LETTER E # FF25 ; 0045 ; MA # ( E → E ) FULLWIDTH LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # →Ε→ 2130 ; 0045 ; MA # ( ℰ → E ) SCRIPT CAPITAL E → LATIN CAPITAL LETTER E # +1CCDA ; 0045 ; MA #* ( → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 1D404 ; 0045 ; MA # ( 𝐄 → E ) MATHEMATICAL BOLD CAPITAL E → LATIN CAPITAL LETTER E # 1D438 ; 0045 ; MA # ( 𝐸 → E ) MATHEMATICAL ITALIC CAPITAL E → LATIN CAPITAL LETTER E # 1D46C ; 0045 ; MA # ( 𝑬 → E ) MATHEMATICAL BOLD ITALIC CAPITAL E → LATIN CAPITAL LETTER E # @@ -2124,7 +2125,6 @@ A4F0 ; 0045 ; MA # ( ꓰ → E ) LISU LETTER E → LATIN CAPITAL LETTER E # 118A6 ; 0045 ; MA # ( 𑢦 → E ) WARANG CITI CAPITAL LETTER II → LATIN CAPITAL LETTER E # 118AE ; 0045 ; MA # ( 𑢮 → E ) WARANG CITI CAPITAL LETTER YUJ → LATIN CAPITAL LETTER E # 10286 ; 0045 ; MA # ( 𐊆 → E ) LYCIAN LETTER I → LATIN CAPITAL LETTER E # -1CCDA ; 0045 ; MA #* ( → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 011B ; 0115 ; MA # ( ě → ĕ ) LATIN SMALL LETTER E WITH CARON → LATIN SMALL LETTER E WITH BREVE # @@ -2195,6 +2195,7 @@ A799 ; 0066 ; MA # ( ꞙ → f ) LATIN SMALL LETTER F WITH STROKE → LATIN SMAL 1D213 ; 0046 ; MA #* ( 𝈓 → F ) GREEK VOCAL NOTATION SYMBOL-20 → LATIN CAPITAL LETTER F # →Ϝ→ 2131 ; 0046 ; MA # ( ℱ → F ) SCRIPT CAPITAL F → LATIN CAPITAL LETTER F # +1CCDB ; 0046 ; MA #* ( → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 1D405 ; 0046 ; MA # ( 𝐅 → F ) MATHEMATICAL BOLD CAPITAL F → LATIN CAPITAL LETTER F # 1D439 ; 0046 ; MA # ( 𝐹 → F ) MATHEMATICAL ITALIC CAPITAL F → LATIN CAPITAL LETTER F # 1D46D ; 0046 ; MA # ( 𝑭 → F ) MATHEMATICAL BOLD ITALIC CAPITAL F → LATIN CAPITAL LETTER F # @@ -2217,7 +2218,6 @@ A4DD ; 0046 ; MA # ( ꓝ → F ) LISU LETTER TSA → LATIN CAPITAL LETTER F # 10287 ; 0046 ; MA # ( 𐊇 → F ) LYCIAN LETTER W → LATIN CAPITAL LETTER F # 102A5 ; 0046 ; MA # ( 𐊥 → F ) CARIAN LETTER R → LATIN CAPITAL LETTER F # 10525 ; 0046 ; MA # ( 𐔥 → F ) ELBASAN LETTER GHE → LATIN CAPITAL LETTER F # -1CCDB ; 0046 ; MA #* ( → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 0192 ; 0066 0326 ; MA # ( ƒ → f̦ ) LATIN SMALL LETTER F WITH HOOK → LATIN SMALL LETTER F, COMBINING COMMA BELOW # →f̡→ @@ -2264,6 +2264,7 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 018D ; 0067 ; MA # ( ƍ → g ) LATIN SMALL LETTER TURNED DELTA → LATIN SMALL LETTER G # 0581 ; 0067 ; MA # ( ց → g ) ARMENIAN SMALL LETTER CO → LATIN SMALL LETTER G # +1CCDC ; 0047 ; MA #* ( → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1D406 ; 0047 ; MA # ( 𝐆 → G ) MATHEMATICAL BOLD CAPITAL G → LATIN CAPITAL LETTER G # 1D43A ; 0047 ; MA # ( 𝐺 → G ) MATHEMATICAL ITALIC CAPITAL G → LATIN CAPITAL LETTER G # 1D46E ; 0047 ; MA # ( 𝑮 → G ) MATHEMATICAL BOLD ITALIC CAPITAL G → LATIN CAPITAL LETTER G # @@ -2281,7 +2282,6 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 13C0 ; 0047 ; MA # ( Ꮐ → G ) CHEROKEE LETTER NAH → LATIN CAPITAL LETTER G # 13F3 ; 0047 ; MA # ( Ᏻ → G ) CHEROKEE LETTER YU → LATIN CAPITAL LETTER G # A4D6 ; 0047 ; MA # ( ꓖ → G ) LISU LETTER GA → LATIN CAPITAL LETTER G # -1CCDC ; 0047 ; MA #* ( → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1DA2 ; 1D4D ; MA # ( ᶢ → ᵍ ) MODIFIER LETTER SMALL SCRIPT G → MODIFIER LETTER SMALL G # @@ -2325,6 +2325,7 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 210B ; 0048 ; MA # ( ℋ → H ) SCRIPT CAPITAL H → LATIN CAPITAL LETTER H # 210C ; 0048 ; MA # ( ℌ → H ) BLACK-LETTER CAPITAL H → LATIN CAPITAL LETTER H # 210D ; 0048 ; MA # ( ℍ → H ) DOUBLE-STRUCK CAPITAL H → LATIN CAPITAL LETTER H # +1CCDD ; 0048 ; MA #* ( → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D407 ; 0048 ; MA # ( 𝐇 → H ) MATHEMATICAL BOLD CAPITAL H → LATIN CAPITAL LETTER H # 1D43B ; 0048 ; MA # ( 𝐻 → H ) MATHEMATICAL ITALIC CAPITAL H → LATIN CAPITAL LETTER H # 1D46F ; 0048 ; MA # ( 𝑯 → H ) MATHEMATICAL BOLD ITALIC CAPITAL H → LATIN CAPITAL LETTER H # @@ -2347,7 +2348,6 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 157C ; 0048 ; MA # ( ᕼ → H ) CANADIAN SYLLABICS NUNAVUT H → LATIN CAPITAL LETTER H # A4E7 ; 0048 ; MA # ( ꓧ → H ) LISU LETTER XA → LATIN CAPITAL LETTER H # 102CF ; 0048 ; MA # ( 𐋏 → H ) CARIAN LETTER E2 → LATIN CAPITAL LETTER H # -1CCDD ; 0048 ; MA #* ( → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D78 ; 1D34 ; MA # ( ᵸ → ᴴ ) MODIFIER LETTER CYRILLIC EN → MODIFIER LETTER CAPITAL H # @@ -2465,6 +2465,7 @@ FF4A ; 006A ; MA # ( j → j ) FULLWIDTH LATIN SMALL LETTER J → LATIN SMALL 0458 ; 006A ; MA # ( ј → j ) CYRILLIC SMALL LETTER JE → LATIN SMALL LETTER J # FF2A ; 004A ; MA # ( J → J ) FULLWIDTH LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # →Ј→ +1CCDF ; 004A ; MA #* ( → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 1D409 ; 004A ; MA # ( 𝐉 → J ) MATHEMATICAL BOLD CAPITAL J → LATIN CAPITAL LETTER J # 1D43D ; 004A ; MA # ( 𝐽 → J ) MATHEMATICAL ITALIC CAPITAL J → LATIN CAPITAL LETTER J # 1D471 ; 004A ; MA # ( 𝑱 → J ) MATHEMATICAL BOLD ITALIC CAPITAL J → LATIN CAPITAL LETTER J # @@ -2484,7 +2485,6 @@ A7B2 ; 004A ; MA # ( Ʝ → J ) LATIN CAPITAL LETTER J WITH CROSSED-TAIL → LA 13AB ; 004A ; MA # ( Ꭻ → J ) CHEROKEE LETTER GU → LATIN CAPITAL LETTER J # 148D ; 004A ; MA # ( ᒍ → J ) CANADIAN SYLLABICS CO → LATIN CAPITAL LETTER J # A4D9 ; 004A ; MA # ( ꓙ → J ) LISU LETTER JA → LATIN CAPITAL LETTER J # -1CCDF ; 004A ; MA #* ( → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 0249 ; 006A 0335 ; MA # ( ɉ → j̵ ) LATIN SMALL LETTER J WITH STROKE → LATIN SMALL LETTER J, COMBINING SHORT STROKE OVERLAY # @@ -2513,6 +2513,7 @@ AB7B ; 1D0A ; MA # ( ꭻ → ᴊ ) CHEROKEE SMALL LETTER GU → LATIN LETTER SMA 212A ; 004B ; MA # ( K → K ) KELVIN SIGN → LATIN CAPITAL LETTER K # FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # →Κ→ +1CCE0 ; 004B ; MA #* ( → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 1D40A ; 004B ; MA # ( 𝐊 → K ) MATHEMATICAL BOLD CAPITAL K → LATIN CAPITAL LETTER K # 1D43E ; 004B ; MA # ( 𝐾 → K ) MATHEMATICAL ITALIC CAPITAL K → LATIN CAPITAL LETTER K # 1D472 ; 004B ; MA # ( 𝑲 → K ) MATHEMATICAL BOLD ITALIC CAPITAL K → LATIN CAPITAL LETTER K # @@ -2538,7 +2539,6 @@ FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPI 16D5 ; 004B ; MA # ( ᛕ → K ) RUNIC LETTER OPEN-P → LATIN CAPITAL LETTER K # A4D7 ; 004B ; MA # ( ꓗ → K ) LISU LETTER KA → LATIN CAPITAL LETTER K # 10518 ; 004B ; MA # ( 𐔘 → K ) ELBASAN LETTER QE → LATIN CAPITAL LETTER K # -1CCE0 ; 004B ; MA #* ( → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 0199 ; 006B 0314 ; MA # ( ƙ → k̔ ) LATIN SMALL LETTER K WITH HOOK → LATIN SMALL LETTER K, COMBINING REVERSED COMMA ABOVE # @@ -2561,6 +2561,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL 06F1 ; 006C ; MA # ( ۱ → l ) EXTENDED ARABIC-INDIC DIGIT ONE → LATIN SMALL LETTER L # →1→ 10320 ; 006C ; MA #* ( 𐌠 → l ) OLD ITALIC NUMERAL ONE → LATIN SMALL LETTER L # →𐌉→→I→ 1E8C7 ; 006C ; MA #* ( 𞣇 → l ) MENDE KIKAKUI DIGIT ONE → LATIN SMALL LETTER L # +1CCF1 ; 006C ; MA # ( → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7CF ; 006C ; MA # ( 𝟏 → l ) MATHEMATICAL BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7D9 ; 006C ; MA # ( 𝟙 → l ) MATHEMATICAL DOUBLE-STRUCK DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→ @@ -2572,6 +2573,7 @@ FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMAL 2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→ 2110 ; 006C ; MA # ( ℐ → l ) SCRIPT CAPITAL I → LATIN SMALL LETTER L # →I→ 2111 ; 006C ; MA # ( ℑ → l ) BLACK-LETTER CAPITAL I → LATIN SMALL LETTER L # →I→ +1CCDE ; 006C ; MA #* ( → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ 1D408 ; 006C ; MA # ( 𝐈 → l ) MATHEMATICAL BOLD CAPITAL I → LATIN SMALL LETTER L # →I→ 1D43C ; 006C ; MA # ( 𝐼 → l ) MATHEMATICAL ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ 1D470 ; 006C ; MA # ( 𝑰 → l ) MATHEMATICAL BOLD ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ @@ -2624,12 +2626,11 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I 16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→ 1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→ 10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→ -1CCDE ; 006C ; MA #* ( → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ -1CCF1 ; 006C ; MA # ( → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L # 216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L # 2112 ; 004C ; MA # ( ℒ → L ) SCRIPT CAPITAL L → LATIN CAPITAL LETTER L # +1CCE1 ; 004C ; MA #* ( → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # 1D40B ; 004C ; MA # ( 𝐋 → L ) MATHEMATICAL BOLD CAPITAL L → LATIN CAPITAL LETTER L # 1D43F ; 004C ; MA # ( 𝐿 → L ) MATHEMATICAL ITALIC CAPITAL L → LATIN CAPITAL LETTER L # 1D473 ; 004C ; MA # ( 𝑳 → L ) MATHEMATICAL BOLD ITALIC CAPITAL L → LATIN CAPITAL LETTER L # @@ -2651,7 +2652,6 @@ A4E1 ; 004C ; MA # ( ꓡ → L ) LISU LETTER LA → LATIN CAPITAL LETTER L # 118B2 ; 004C ; MA # ( 𑢲 → L ) WARANG CITI CAPITAL LETTER TTE → LATIN CAPITAL LETTER L # 1041B ; 004C ; MA # ( 𐐛 → L ) DESERET CAPITAL LETTER ETH → LATIN CAPITAL LETTER L # 10526 ; 004C ; MA # ( 𐔦 → L ) ELBASAN LETTER GHAMMA → LATIN CAPITAL LETTER L # -1CCE1 ; 004C ; MA #* ( → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # FD3C ; 006C 030B ; MA # ( ﴼ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →اً→ FD3D ; 006C 030B ; MA # ( ﴽ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →اً→ @@ -2805,6 +2805,7 @@ ABAE ; 029F ; MA # ( ꮮ → ʟ ) CHEROKEE SMALL LETTER TLE → LATIN LETTER SMA FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # →Μ→ 216F ; 004D ; MA # ( Ⅿ → M ) ROMAN NUMERAL ONE THOUSAND → LATIN CAPITAL LETTER M # 2133 ; 004D ; MA # ( ℳ → M ) SCRIPT CAPITAL M → LATIN CAPITAL LETTER M # +1CCE2 ; 004D ; MA #* ( → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 1D40C ; 004D ; MA # ( 𝐌 → M ) MATHEMATICAL BOLD CAPITAL M → LATIN CAPITAL LETTER M # 1D440 ; 004D ; MA # ( 𝑀 → M ) MATHEMATICAL ITALIC CAPITAL M → LATIN CAPITAL LETTER M # 1D474 ; 004D ; MA # ( 𝑴 → M ) MATHEMATICAL BOLD ITALIC CAPITAL M → LATIN CAPITAL LETTER M # @@ -2832,7 +2833,6 @@ FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPI A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # 102B0 ; 004D ; MA # ( 𐊰 → M ) CARIAN LETTER S → LATIN CAPITAL LETTER M # 10311 ; 004D ; MA # ( 𐌑 → M ) OLD ITALIC LETTER SHE → LATIN CAPITAL LETTER M # -1CCE2 ; 004D ; MA #* ( → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 04CD ; 004D 0326 ; MA # ( Ӎ → M̦ ) CYRILLIC CAPITAL LETTER EM WITH TAIL → LATIN CAPITAL LETTER M, COMBINING COMMA BELOW # →М̡→ @@ -2858,6 +2858,7 @@ A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # →Ν→ 2115 ; 004E ; MA # ( ℕ → N ) DOUBLE-STRUCK CAPITAL N → LATIN CAPITAL LETTER N # +1CCE3 ; 004E ; MA #* ( → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1D40D ; 004E ; MA # ( 𝐍 → N ) MATHEMATICAL BOLD CAPITAL N → LATIN CAPITAL LETTER N # 1D441 ; 004E ; MA # ( 𝑁 → N ) MATHEMATICAL ITALIC CAPITAL N → LATIN CAPITAL LETTER N # 1D475 ; 004E ; MA # ( 𝑵 → N ) MATHEMATICAL BOLD ITALIC CAPITAL N → LATIN CAPITAL LETTER N # @@ -2879,7 +2880,6 @@ FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPI 2C9A ; 004E ; MA # ( Ⲛ → N ) COPTIC CAPITAL LETTER NI → LATIN CAPITAL LETTER N # A4E0 ; 004E ; MA # ( ꓠ → N ) LISU LETTER NA → LATIN CAPITAL LETTER N # 10513 ; 004E ; MA # ( 𐔓 → N ) ELBASAN LETTER NE → LATIN CAPITAL LETTER N # -1CCE3 ; 004E ; MA #* ( → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1018E ; 004E 030A ; MA #* ( 𐆎 → N̊ ) NOMISMA SIGN → LATIN CAPITAL LETTER N, COMBINING RING ABOVE # →Νͦ→ @@ -2994,6 +2994,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 3007 ; 004F ; MA # ( 〇 → O ) IDEOGRAPHIC NUMBER ZERO → LATIN CAPITAL LETTER O # 114D0 ; 004F ; MA # ( 𑓐 → O ) TIRHUTA DIGIT ZERO → LATIN CAPITAL LETTER O # →০→→0→ 118E0 ; 004F ; MA # ( 𑣠 → O ) WARANG CITI DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ +1CCF0 ; 004F ; MA # ( → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7CE ; 004F ; MA # ( 𝟎 → O ) MATHEMATICAL BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7D8 ; 004F ; MA # ( 𝟘 → O ) MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ @@ -3001,6 +3002,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ FF2F ; 004F ; MA # ( O → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→ +1CCE4 ; 004F ; MA #* ( → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # 1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O # 1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O # 1D476 ; 004F ; MA # ( 𝑶 → O ) MATHEMATICAL BOLD ITALIC CAPITAL O → LATIN CAPITAL LETTER O # @@ -3033,8 +3035,6 @@ A4F3 ; 004F ; MA # ( ꓳ → O ) LISU LETTER O → LATIN CAPITAL LETTER O # 102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O # 10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O # 10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O # -1CCE4 ; 004F ; MA #* ( → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # -1CCF0 ; 004F ; MA # ( → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR # 1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→ @@ -3202,6 +3202,7 @@ FF50 ; 0070 ; MA # ( p → p ) FULLWIDTH LATIN SMALL LETTER P → LATIN SMALL FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # →Р→ 2119 ; 0050 ; MA # ( ℙ → P ) DOUBLE-STRUCK CAPITAL P → LATIN CAPITAL LETTER P # +1CCE5 ; 0050 ; MA #* ( → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 1D40F ; 0050 ; MA # ( 𝐏 → P ) MATHEMATICAL BOLD CAPITAL P → LATIN CAPITAL LETTER P # 1D443 ; 0050 ; MA # ( 𝑃 → P ) MATHEMATICAL ITALIC CAPITAL P → LATIN CAPITAL LETTER P # 1D477 ; 0050 ; MA # ( 𝑷 → P ) MATHEMATICAL BOLD ITALIC CAPITAL P → LATIN CAPITAL LETTER P # @@ -3226,7 +3227,6 @@ FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPI 146D ; 0050 ; MA # ( ᑭ → P ) CANADIAN SYLLABICS KI → LATIN CAPITAL LETTER P # A4D1 ; 0050 ; MA # ( ꓑ → P ) LISU LETTER PA → LATIN CAPITAL LETTER P # 10295 ; 0050 ; MA # ( 𐊕 → P ) LYCIAN LETTER R → LATIN CAPITAL LETTER P # -1CCE5 ; 0050 ; MA #* ( → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 01A5 ; 0070 0314 ; MA # ( ƥ → p̔ ) LATIN SMALL LETTER P WITH HOOK → LATIN SMALL LETTER P, COMBINING REVERSED COMMA ABOVE # @@ -3272,6 +3272,7 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 0566 ; 0071 ; MA # ( զ → q ) ARMENIAN SMALL LETTER ZA → LATIN SMALL LETTER Q # 211A ; 0051 ; MA # ( ℚ → Q ) DOUBLE-STRUCK CAPITAL Q → LATIN CAPITAL LETTER Q # +1CCE6 ; 0051 ; MA #* ( → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 1D410 ; 0051 ; MA # ( 𝐐 → Q ) MATHEMATICAL BOLD CAPITAL Q → LATIN CAPITAL LETTER Q # 1D444 ; 0051 ; MA # ( 𝑄 → Q ) MATHEMATICAL ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D478 ; 0051 ; MA # ( 𝑸 → Q ) MATHEMATICAL BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # @@ -3285,7 +3286,6 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 1D64C ; 0051 ; MA # ( 𝙌 → Q ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D680 ; 0051 ; MA # ( 𝚀 → Q ) MATHEMATICAL MONOSPACE CAPITAL Q → LATIN CAPITAL LETTER Q # 2D55 ; 0051 ; MA # ( ⵕ → Q ) TIFINAGH LETTER YARR → LATIN CAPITAL LETTER Q # -1CCE6 ; 0051 ; MA #* ( → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 02A0 ; 0071 0314 ; MA # ( ʠ → q̔ ) LATIN SMALL LETTER Q WITH HOOK → LATIN SMALL LETTER Q, COMBINING REVERSED COMMA ABOVE # @@ -3338,6 +3338,7 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 211B ; 0052 ; MA # ( ℛ → R ) SCRIPT CAPITAL R → LATIN CAPITAL LETTER R # 211C ; 0052 ; MA # ( ℜ → R ) BLACK-LETTER CAPITAL R → LATIN CAPITAL LETTER R # 211D ; 0052 ; MA # ( ℝ → R ) DOUBLE-STRUCK CAPITAL R → LATIN CAPITAL LETTER R # +1CCE7 ; 0052 ; MA #* ( → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 1D411 ; 0052 ; MA # ( 𝐑 → R ) MATHEMATICAL BOLD CAPITAL R → LATIN CAPITAL LETTER R # 1D445 ; 0052 ; MA # ( 𝑅 → R ) MATHEMATICAL ITALIC CAPITAL R → LATIN CAPITAL LETTER R # 1D479 ; 0052 ; MA # ( 𝑹 → R ) MATHEMATICAL BOLD ITALIC CAPITAL R → LATIN CAPITAL LETTER R # @@ -3355,7 +3356,6 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 1587 ; 0052 ; MA # ( ᖇ → R ) CANADIAN SYLLABICS TLHI → LATIN CAPITAL LETTER R # A4E3 ; 0052 ; MA # ( ꓣ → R ) LISU LETTER ZHA → LATIN CAPITAL LETTER R # 16F35 ; 0052 ; MA # ( 𖼵 → R ) MIAO LETTER ZHA → LATIN CAPITAL LETTER R # -1CCE7 ; 0052 ; MA #* ( → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 027D ; 0072 0328 ; MA # ( ɽ → r̨ ) LATIN SMALL LETTER R WITH TAIL → LATIN SMALL LETTER R, COMBINING OGONEK # @@ -3425,6 +3425,7 @@ ABAA ; 0073 ; MA # ( ꮪ → s ) CHEROKEE SMALL LETTER DU → LATIN SMALL LETTER 10448 ; 0073 ; MA # ( 𐑈 → s ) DESERET SMALL LETTER ZHEE → LATIN SMALL LETTER S # FF33 ; 0053 ; MA # ( S → S ) FULLWIDTH LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # →Ѕ→ +1CCE8 ; 0053 ; MA #* ( → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 1D412 ; 0053 ; MA # ( 𝐒 → S ) MATHEMATICAL BOLD CAPITAL S → LATIN CAPITAL LETTER S # 1D446 ; 0053 ; MA # ( 𝑆 → S ) MATHEMATICAL ITALIC CAPITAL S → LATIN CAPITAL LETTER S # 1D47A ; 0053 ; MA # ( 𝑺 → S ) MATHEMATICAL BOLD ITALIC CAPITAL S → LATIN CAPITAL LETTER S # @@ -3446,13 +3447,14 @@ A4E2 ; 0053 ; MA # ( ꓢ → S ) LISU LETTER SA → LATIN CAPITAL LETTER S # 16F3A ; 0053 ; MA # ( 𖼺 → S ) MIAO LETTER SA → LATIN CAPITAL LETTER S # 10296 ; 0053 ; MA # ( 𐊖 → S ) LYCIAN LETTER S → LATIN CAPITAL LETTER S # 10420 ; 0053 ; MA # ( 𐐠 → S ) DESERET CAPITAL LETTER ZHEE → LATIN CAPITAL LETTER S # -1CCE8 ; 0053 ; MA #* ( → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 0282 ; 0073 0328 ; MA # ( ʂ → s̨ ) LATIN SMALL LETTER S WITH HOOK → LATIN SMALL LETTER S, COMBINING OGONEK # 1D74 ; 0073 0334 ; MA # ( ᵴ → s̴ ) LATIN SMALL LETTER S WITH MIDDLE TILDE → LATIN SMALL LETTER S, COMBINING TILDE OVERLAY # A7B5 ; 00DF ; MA # ( ꞵ → ß ) LATIN SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # →β→ +1E9E ; 00DF ; MA # ( ẞ → ß ) LATIN CAPITAL LETTER SHARP S → LATIN SMALL LETTER SHARP S # +A7D6 ; 00DF ; MA # ( Ꟗ → ß ) LATIN CAPITAL LETTER MIDDLE SCOTS S → LATIN SMALL LETTER SHARP S # →β→ 03B2 ; 00DF ; MA # ( β → ß ) GREEK SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # 03D0 ; 00DF ; MA # ( ϐ → ß ) GREEK BETA SYMBOL → LATIN SMALL LETTER SHARP S # →β→ 1D6C3 ; 00DF ; MA # ( 𝛃 → ß ) MATHEMATICAL BOLD SMALL BETA → LATIN SMALL LETTER SHARP S # →β→ @@ -3503,6 +3505,7 @@ AB4D ; 0283 ; MA # ( ꭍ → ʃ ) LATIN SMALL LETTER BASELINE ESH → LATIN SMAL 27D9 ; 0054 ; MA #* ( ⟙ → T ) LARGE DOWN TACK → LATIN CAPITAL LETTER T # 1F768 ; 0054 ; MA #* ( 🝨 → T ) ALCHEMICAL SYMBOL FOR CRUCIBLE-4 → LATIN CAPITAL LETTER T # FF34 ; 0054 ; MA # ( T → T ) FULLWIDTH LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # →Т→ +1CCE9 ; 0054 ; MA #* ( → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 1D413 ; 0054 ; MA # ( 𝐓 → T ) MATHEMATICAL BOLD CAPITAL T → LATIN CAPITAL LETTER T # 1D447 ; 0054 ; MA # ( 𝑇 → T ) MATHEMATICAL ITALIC CAPITAL T → LATIN CAPITAL LETTER T # 1D47B ; 0054 ; MA # ( 𝑻 → T ) MATHEMATICAL BOLD ITALIC CAPITAL T → LATIN CAPITAL LETTER T # @@ -3531,7 +3534,6 @@ A4D4 ; 0054 ; MA # ( ꓔ → T ) LISU LETTER TA → LATIN CAPITAL LETTER T # 10297 ; 0054 ; MA # ( 𐊗 → T ) LYCIAN LETTER T → LATIN CAPITAL LETTER T # 102B1 ; 0054 ; MA # ( 𐊱 → T ) CARIAN LETTER C-18 → LATIN CAPITAL LETTER T # 10315 ; 0054 ; MA # ( 𐌕 → T ) OLD ITALIC LETTER TE → LATIN CAPITAL LETTER T # -1CCE9 ; 0054 ; MA #* ( → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 01AD ; 0074 0314 ; MA # ( ƭ → t̔ ) LATIN SMALL LETTER T WITH HOOK → LATIN SMALL LETTER T, COMBINING REVERSED COMMA ABOVE # @@ -3614,6 +3616,7 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S 222A ; 0055 ; MA #* ( ∪ → U ) UNION → LATIN CAPITAL LETTER U # →ᑌ→ 22C3 ; 0055 ; MA #* ( ⋃ → U ) N-ARY UNION → LATIN CAPITAL LETTER U # →∪→→ᑌ→ +1CCEA ; 0055 ; MA #* ( → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 1D414 ; 0055 ; MA # ( 𝐔 → U ) MATHEMATICAL BOLD CAPITAL U → LATIN CAPITAL LETTER U # 1D448 ; 0055 ; MA # ( 𝑈 → U ) MATHEMATICAL ITALIC CAPITAL U → LATIN CAPITAL LETTER U # 1D47C ; 0055 ; MA # ( 𝑼 → U ) MATHEMATICAL BOLD ITALIC CAPITAL U → LATIN CAPITAL LETTER U # @@ -3634,7 +3637,6 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S A4F4 ; 0055 ; MA # ( ꓴ → U ) LISU LETTER U → LATIN CAPITAL LETTER U # 16F42 ; 0055 ; MA # ( 𖽂 → U ) MIAO LETTER WA → LATIN CAPITAL LETTER U # 118B8 ; 0055 ; MA # ( 𑢸 → U ) WARANG CITI CAPITAL LETTER PU → LATIN CAPITAL LETTER U # -1CCEA ; 0055 ; MA #* ( → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 01D4 ; 016D ; MA # ( ǔ → ŭ ) LATIN SMALL LETTER U WITH CARON → LATIN SMALL LETTER U WITH BREVE # @@ -3699,6 +3701,7 @@ ABA9 ; 0076 ; MA # ( ꮩ → v ) CHEROKEE SMALL LETTER DO → LATIN SMALL LETTER 0667 ; 0056 ; MA # ( ٧ → V ) ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # 06F7 ; 0056 ; MA # ( ۷ → V ) EXTENDED ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # →٧→ 2164 ; 0056 ; MA # ( Ⅴ → V ) ROMAN NUMERAL FIVE → LATIN CAPITAL LETTER V # +1CCEB ; 0056 ; MA #* ( → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 1D415 ; 0056 ; MA # ( 𝐕 → V ) MATHEMATICAL BOLD CAPITAL V → LATIN CAPITAL LETTER V # 1D449 ; 0056 ; MA # ( 𝑉 → V ) MATHEMATICAL ITALIC CAPITAL V → LATIN CAPITAL LETTER V # 1D47D ; 0056 ; MA # ( 𝑽 → V ) MATHEMATICAL BOLD ITALIC CAPITAL V → LATIN CAPITAL LETTER V # @@ -3721,7 +3724,6 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 16F08 ; 0056 ; MA # ( 𖼈 → V ) MIAO LETTER VA → LATIN CAPITAL LETTER V # 118A0 ; 0056 ; MA # ( 𑢠 → V ) WARANG CITI CAPITAL LETTER NGAA → LATIN CAPITAL LETTER V # 1051D ; 0056 ; MA # ( 𐔝 → V ) ELBASAN LETTER TE → LATIN CAPITAL LETTER V # -1CCEB ; 0056 ; MA #* ( → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 10197 ; 0056 0335 ; MA #* ( 𐆗 → V̵ ) ROMAN QUINARIUS SIGN → LATIN CAPITAL LETTER V, COMBINING SHORT STROKE OVERLAY # →V̶→ @@ -3748,6 +3750,7 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 0668 ; 0245 ; MA # ( ٨ → Ʌ ) ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →Λ→ 06F8 ; 0245 ; MA # ( ۸ → Ʌ ) EXTENDED ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →٨→→Λ→ +A7DA ; 0245 ; MA # ( → Ʌ ) LATIN CAPITAL LETTER LAMBDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 039B ; 0245 ; MA # ( Λ → Ʌ ) GREEK CAPITAL LETTER LAMDA → LATIN CAPITAL LETTER TURNED V # 1D6B2 ; 0245 ; MA # ( 𝚲 → Ʌ ) MATHEMATICAL BOLD CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 1D6EC ; 0245 ; MA # ( 𝛬 → Ʌ ) MATHEMATICAL ITALIC CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ @@ -3763,6 +3766,8 @@ A4E5 ; 0245 ; MA # ( ꓥ → Ʌ ) LISU LETTER NGA → LATIN CAPITAL LETTER TURNE 16F3D ; 0245 ; MA # ( 𖼽 → Ʌ ) MIAO LETTER ZZA → LATIN CAPITAL LETTER TURNED V # 1028D ; 0245 ; MA # ( 𐊍 → Ʌ ) LYCIAN LETTER L → LATIN CAPITAL LETTER TURNED V # →Λ→ +A7DC ; 0245 0338 ; MA # ( → Ʌ̸ ) LATIN CAPITAL LETTER LAMBDA WITH STROKE → LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ + 04C5 ; 0245 0326 ; MA # ( Ӆ → Ʌ̦ ) CYRILLIC CAPITAL LETTER EL WITH TAIL → LATIN CAPITAL LETTER TURNED V, COMBINING COMMA BELOW # →Л̡→ 143D ; 0245 00B7 ; MA # ( ᐽ → Ʌ· ) CANADIAN SYLLABICS WEST-CREE PWI → LATIN CAPITAL LETTER TURNED V, MIDDLE DOT # →ᐱᐧ→→ᐱ·→ @@ -3792,6 +3797,7 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 118EF ; 0057 ; MA #* ( 𑣯 → W ) WARANG CITI NUMBER SIXTY → LATIN CAPITAL LETTER W # 118E6 ; 0057 ; MA # ( 𑣦 → W ) WARANG CITI DIGIT SIX → LATIN CAPITAL LETTER W # +1CCEC ; 0057 ; MA #* ( → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 1D416 ; 0057 ; MA # ( 𝐖 → W ) MATHEMATICAL BOLD CAPITAL W → LATIN CAPITAL LETTER W # 1D44A ; 0057 ; MA # ( 𝑊 → W ) MATHEMATICAL ITALIC CAPITAL W → LATIN CAPITAL LETTER W # 1D47E ; 0057 ; MA # ( 𝑾 → W ) MATHEMATICAL BOLD ITALIC CAPITAL W → LATIN CAPITAL LETTER W # @@ -3809,7 +3815,6 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 13B3 ; 0057 ; MA # ( Ꮃ → W ) CHEROKEE LETTER LA → LATIN CAPITAL LETTER W # 13D4 ; 0057 ; MA # ( Ꮤ → W ) CHEROKEE LETTER TA → LATIN CAPITAL LETTER W # A4EA ; 0057 ; MA # ( ꓪ → W ) LISU LETTER WA → LATIN CAPITAL LETTER W # -1CCEC ; 0057 ; MA #* ( → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 047D ; 0077 0486 0487 ; MA # ( ѽ → w҆҇ ) CYRILLIC SMALL LETTER OMEGA WITH TITLO → LATIN SMALL LETTER W, COMBINING CYRILLIC PSILI PNEUMATA, COMBINING CYRILLIC POKRYTIE # →ѡ҆҇→ @@ -3857,6 +3862,7 @@ FF58 ; 0078 ; MA # ( x → x ) FULLWIDTH LATIN SMALL LETTER X → LATIN SMALL 118EC ; 0058 ; MA #* ( 𑣬 → X ) WARANG CITI NUMBER THIRTY → LATIN CAPITAL LETTER X # FF38 ; 0058 ; MA # ( X → X ) FULLWIDTH LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # →Х→ 2169 ; 0058 ; MA # ( Ⅹ → X ) ROMAN NUMERAL TEN → LATIN CAPITAL LETTER X # +1CCED ; 0058 ; MA #* ( → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 1D417 ; 0058 ; MA # ( 𝐗 → X ) MATHEMATICAL BOLD CAPITAL X → LATIN CAPITAL LETTER X # 1D44B ; 0058 ; MA # ( 𝑋 → X ) MATHEMATICAL ITALIC CAPITAL X → LATIN CAPITAL LETTER X # 1D47F ; 0058 ; MA # ( 𝑿 → X ) MATHEMATICAL BOLD ITALIC CAPITAL X → LATIN CAPITAL LETTER X # @@ -3886,7 +3892,6 @@ A4EB ; 0058 ; MA # ( ꓫ → X ) LISU LETTER SHA → LATIN CAPITAL LETTER X # 102B4 ; 0058 ; MA # ( 𐊴 → X ) CARIAN LETTER X → LATIN CAPITAL LETTER X # 10317 ; 0058 ; MA # ( 𐌗 → X ) OLD ITALIC LETTER EKS → LATIN CAPITAL LETTER X # 10527 ; 0058 ; MA # ( 𐔧 → X ) ELBASAN LETTER KHE → LATIN CAPITAL LETTER X # -1CCED ; 0058 ; MA #* ( → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 2A30 ; 0078 0307 ; MA #* ( ⨰ → ẋ ) MULTIPLICATION SIGN WITH DOT ABOVE → LATIN SMALL LETTER X, COMBINING DOT ABOVE # →×̇→ @@ -3934,6 +3939,7 @@ AB5A ; 0079 ; MA # ( ꭚ → y ) LATIN SMALL LETTER Y WITH SHORT RIGHT LEG → L 118DC ; 0079 ; MA # ( 𑣜 → y ) WARANG CITI SMALL LETTER HAR → LATIN SMALL LETTER Y # →ɣ→→γ→ FF39 ; 0059 ; MA # ( Y → Y ) FULLWIDTH LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # →Υ→ +1CCEE ; 0059 ; MA #* ( → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 1D418 ; 0059 ; MA # ( 𝐘 → Y ) MATHEMATICAL BOLD CAPITAL Y → LATIN CAPITAL LETTER Y # 1D44C ; 0059 ; MA # ( 𝑌 → Y ) MATHEMATICAL ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # 1D480 ; 0059 ; MA # ( 𝒀 → Y ) MATHEMATICAL BOLD ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # @@ -3963,7 +3969,6 @@ A4EC ; 0059 ; MA # ( ꓬ → Y ) LISU LETTER YA → LATIN CAPITAL LETTER Y # 16F43 ; 0059 ; MA # ( 𖽃 → Y ) MIAO LETTER AH → LATIN CAPITAL LETTER Y # 118A4 ; 0059 ; MA # ( 𑢤 → Y ) WARANG CITI CAPITAL LETTER YA → LATIN CAPITAL LETTER Y # 102B2 ; 0059 ; MA # ( 𐊲 → Y ) CARIAN LETTER U → LATIN CAPITAL LETTER Y # -1CCEE ; 0059 ; MA #* ( → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 01B4 ; 0079 0314 ; MA # ( ƴ → y̔ ) LATIN SMALL LETTER Y WITH HOOK → LATIN SMALL LETTER Y, COMBINING REVERSED COMMA ABOVE # @@ -4002,6 +4007,7 @@ AB93 ; 007A ; MA # ( ꮓ → z ) CHEROKEE SMALL LETTER NO → LATIN SMALL LETTER FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # →Ζ→ 2124 ; 005A ; MA # ( ℤ → Z ) DOUBLE-STRUCK CAPITAL Z → LATIN CAPITAL LETTER Z # 2128 ; 005A ; MA # ( ℨ → Z ) BLACK-LETTER CAPITAL Z → LATIN CAPITAL LETTER Z # +1CCEF ; 005A ; MA #* ( → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 1D419 ; 005A ; MA # ( 𝐙 → Z ) MATHEMATICAL BOLD CAPITAL Z → LATIN CAPITAL LETTER Z # 1D44D ; 005A ; MA # ( 𝑍 → Z ) MATHEMATICAL ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # 1D481 ; 005A ; MA # ( 𝒁 → Z ) MATHEMATICAL BOLD ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # @@ -4022,7 +4028,6 @@ FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPI 13C3 ; 005A ; MA # ( Ꮓ → Z ) CHEROKEE LETTER NO → LATIN CAPITAL LETTER Z # A4DC ; 005A ; MA # ( ꓜ → Z ) LISU LETTER DZA → LATIN CAPITAL LETTER Z # 118A9 ; 005A ; MA # ( 𑢩 → Z ) WARANG CITI CAPITAL LETTER O → LATIN CAPITAL LETTER Z # -1CCEF ; 005A ; MA #* ( → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 0290 ; 007A 0328 ; MA # ( ʐ → z̨ ) LATIN SMALL LETTER Z WITH RETROFLEX HOOK → LATIN SMALL LETTER Z, COMBINING OGONEK # →z̢→ @@ -4115,6 +4120,7 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2CE4 ; 03D7 ; MA # ( ⳤ → ϗ ) COPTIC SYMBOL KAI → GREEK KAI SYMBOL # +A7DB ; 03BB ; MA # ( → λ ) LATIN SMALL LETTER LAMBDA → GREEK SMALL LETTER LAMDA # 1D6CC ; 03BB ; MA # ( 𝛌 → λ ) MATHEMATICAL BOLD SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D706 ; 03BB ; MA # ( 𝜆 → λ ) MATHEMATICAL ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D740 ; 03BB ; MA # ( 𝝀 → λ ) MATHEMATICAL BOLD ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # @@ -4123,6 +4129,8 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2C96 ; 03BB ; MA # ( Ⲗ → λ ) COPTIC CAPITAL LETTER LAULA → GREEK SMALL LETTER LAMDA # 104DB ; 03BB ; MA # ( 𐓛 → λ ) OSAGE SMALL LETTER AH → GREEK SMALL LETTER LAMDA # +019B ; 03BB 0338 ; MA # ( ƛ → λ̸ ) LATIN SMALL LETTER LAMBDA WITH STROKE → GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ + 00B5 ; 03BC ; MA # ( µ → μ ) MICRO SIGN → GREEK SMALL LETTER MU # 1D6CD ; 03BC ; MA # ( 𝛍 → μ ) MATHEMATICAL BOLD SMALL MU → GREEK SMALL LETTER MU # 1D707 ; 03BC ; MA # ( 𝜇 → μ ) MATHEMATICAL ITALIC SMALL MU → GREEK SMALL LETTER MU # @@ -5982,6 +5990,8 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 11CB2 ; 11CAA ; MA # ( 𑲲 → 𑲪 ) MARCHEN VOWEL SIGN U → MARCHEN SUBJOINED LETTER RA # +1734 ; 1715 ; MA # ( ᜴ → ᜕ ) HANUNOO SIGN PAMUDPOD → TAGALOG SIGN PAMUDPOD # + 1081 ; 1002 103E ; MA # ( ႁ → ဂှ ) MYANMAR LETTER SHAN HA → MYANMAR LETTER GA, MYANMAR CONSONANT SIGN MEDIAL HA # 1000 ; 1002 102C ; MA # ( က → ဂာ ) MYANMAR LETTER KA → MYANMAR LETTER GA, MYANMAR VOWEL SIGN AA # @@ -7221,6 +7231,7 @@ FA31 ; 50E7 ; MA # ( 僧 → 僧 ) CJK COMPATIBILITY IDEOGRAPH-FA31 → CJK UNIF 2F80C ; 349E ; MA # ( 㒞 → 㒞 ) CJK COMPATIBILITY IDEOGRAPH-2F80C → CJK UNIFIED IDEOGRAPH-349E # +3126 ; 513F ; MA # ( ㄦ → 儿 ) BOPOMOFO LETTER ER → CJK UNIFIED IDEOGRAPH-513F # 2F09 ; 513F ; MA #* ( ⼉ → 儿 ) KANGXI RADICAL LEGS → CJK UNIFIED IDEOGRAPH-513F # FA0C ; 5140 ; MA # ( 兀 → 兀 ) CJK COMPATIBILITY IDEOGRAPH-FA0C → CJK UNIFIED IDEOGRAPH-5140 # @@ -7936,16 +7947,16 @@ FA8D ; 63C4 ; MA # ( 揄 → 揄 ) CJK COMPATIBILITY IDEOGRAPH-FA8D → CJK UNIF 2F8BD ; 63E4 ; MA # ( 揤 → 揤 ) CJK COMPATIBILITY IDEOGRAPH-2F8BD → CJK UNIFIED IDEOGRAPH-63E4 # -FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # +FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # 2F8BE ; 22BF1 ; MA # ( 𢯱 → 𢯱 ) CJK COMPATIBILITY IDEOGRAPH-2F8BE → CJK UNIFIED IDEOGRAPH-22BF1 # -FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # - 2F8BF ; 6422 ; MA # ( 搢 → 搢 ) CJK COMPATIBILITY IDEOGRAPH-2F8BF → CJK UNIFIED IDEOGRAPH-6422 # 2F8C0 ; 63C5 ; MA # ( 揅 → 揅 ) CJK COMPATIBILITY IDEOGRAPH-2F8C0 → CJK UNIFIED IDEOGRAPH-63C5 # +FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # + 2F8C3 ; 6469 ; MA # ( 摩 → 摩 ) CJK COMPATIBILITY IDEOGRAPH-2F8C3 → CJK UNIFIED IDEOGRAPH-6469 # 2F8C6 ; 6477 ; MA # ( 摷 → 摷 ) CJK COMPATIBILITY IDEOGRAPH-2F8C6 → CJK UNIFIED IDEOGRAPH-6477 # @@ -9670,5 +9681,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF 2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 # -# total: 6347 +# total: 6355 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index a35691149..093100c4b 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,7 +91,7 @@ ← ( ʽ ) 02BD MODIFIER LETTER REVERSED COMMA # →‘→ ← ( ʾ ) 02BE MODIFIER LETTER RIGHT HALF RING # →ʼ→→′→ ← ( ˈ ) 02C8 MODIFIER LETTER VERTICAL LINE -← ( ˊ ) 02CA MODIFIER LETTER ACUTE ACCENT # →ʹ→→′→ +← ( ˊ ) 02CA MODIFIER LETTER ACUTE ACCENT # →΄→→ʹ→ ← ( ˋ ) 02CB MODIFIER LETTER GRAVE ACCENT # →`→→‘→ ← ( ߴ ) 07F4 NKO HIGH TONE APOSTROPHE # →’→ ← ( ߵ ) 07F5 NKO LOW TONE APOSTROPHE # →‘→ @@ -4925,8 +4925,10 @@ ← ( Ϸ ) 03F7 GREEK CAPITAL LETTER SHO ← ( 𐓄 ) 104C4 OSAGE CAPITAL LETTER PA -# ß β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 +# ß Ꟗ ẞ β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 ( ß ) 00DF LATIN SMALL LETTER SHARP S +← ( Ꟗ ) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← ( ẞ ) 1E9E LATIN CAPITAL LETTER SHARP S ← ( β ) 03B2 GREEK SMALL LETTER BETA ← ( Ᏸ ) 13F0 CHEROKEE LETTER YE # →β→ ← ( ꞵ ) A7B5 LATIN SMALL LETTER BETA # →β→ @@ -5095,6 +5097,11 @@ ← ( 𝈡 ) 1D221 GREEK INSTRUMENTAL NOTATION SYMBOL-7 ← ( ℇ ) 2107 EULER CONSTANT +# λ̸ λ̷ ƛ + ( ƛ ) 019B LATIN SMALL LETTER LAMBDA WITH STROKE +← ( λ̸ ) 03BB 0338 GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ +← ( λ̷ ) 03BB 0337 GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY + # ƨ ᴤ ϩ ꙅ ( ƨ ) 01A8 LATIN SMALL LETTER TONE TWO ← ( ᴤ ) 1D24 LATIN LETTER VOICED LARYNGEAL SPIRANT @@ -5165,8 +5172,9 @@ ( ɂ ) 0242 LATIN SMALL LETTER GLOTTAL STOP ← ( ꭾ ) AB7E CHEROKEE SMALL LETTER HE -# Ʌ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 +# Ʌ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 ( Ʌ ) 0245 LATIN CAPITAL LETTER TURNED V +← ( ) A7DA LATIN CAPITAL LETTER LAMBDA # →Λ→ ← ( ٨ ) 0668 ARABIC-INDIC DIGIT EIGHT # →Λ→ ← ( ۸ ) 06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT # →٨→→Λ→ ← ( Λ ) 039B GREEK CAPITAL LETTER LAMDA @@ -5197,6 +5205,13 @@ ← ( Л̡ ) 041B 0321 CYRILLIC CAPITAL LETTER EL, COMBINING PALATALIZED HOOK BELOW ← ( Ӆ ) 04C5 CYRILLIC CAPITAL LETTER EL WITH TAIL # →Л̡→ +# ̸ Ʌ̸ Λ̸ Λ̷ + ( Ʌ̸ ) 0245 0338 LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY +← ( ̸ ) A7DA 0338 LATIN CAPITAL LETTER LAMBDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← ( Λ̸ ) 039B 0338 GREEK CAPITAL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← ( Λ̷ ) 039B 0337 GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY +← ( ) A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE # →Λ̷→ + # ɋ ᶐ ( ɋ ) 024B LATIN SMALL LETTER Q WITH HOOK TAIL ← ( ᶐ ) 1D90 LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK @@ -5868,8 +5883,9 @@ ← ( 𝝵 ) 1D775 MATHEMATICAL SANS-SERIF BOLD SMALL ZETA ← ( 𝞯 ) 1D7AF MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ZETA -# λ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 +# λ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 ( λ ) 03BB GREEK SMALL LETTER LAMDA +← ( ) A7DB LATIN SMALL LETTER LAMBDA ← ( Ⲗ ) 2C96 COPTIC CAPITAL LETTER LAULA ← ( 𐓛 ) 104DB OSAGE SMALL LETTER AH ← ( 𝛌 ) 1D6CC MATHEMATICAL BOLD SMALL LAMDA @@ -11373,6 +11389,10 @@ ( ᛯ ) 16EF RUNIC TVIMADUR SYMBOL ← ( ⵣ ) 2D63 TIFINAGH LETTER YAZ +# ᜕ ᜴ + ( ᜕ ) 1715 TAGALOG SIGN PAMUDPOD +← ( ᜴ ) 1734 HANUNOO SIGN PAMUDPOD + # អ ឣ ( អ ) 17A2 KHMER LETTER QA ← ( ឣ ) 17A3 KHMER INDEPENDENT VOWEL QAQ @@ -12322,9 +12342,10 @@ ( ⼈ ) 2F08 KANGXI RADICAL MAN ← ( 人 ) 4EBA CJK UNIFIED IDEOGRAPH-4EBA -# 儿 ⼉ +# 儿 ㄦ ⼉ ( ⼉ ) 2F09 KANGXI RADICAL LEGS ← ( 儿 ) 513F CJK UNIFIED IDEOGRAPH-513F +← ( ㄦ ) 3126 BOPOMOFO LETTER ER # →儿→ # 入 ⼊ ( ⼊ ) 2F0A KANGXI RADICAL ENTER @@ -17245,5 +17266,5 @@ ( 𪘀 ) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← ( 𪘀 ) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7290 +# total : 7302 diff --git a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt index 5435b8bff..6bd3611df 100644 --- a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt +++ b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt @@ -1,5 +1,5 @@ # confusablesSummaryIdentifier.txt -# Date: 2024-05-04, 21:31:06 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -542,8 +542,10 @@ ( Ö ) 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS ← ( Ő ) 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -# ß β +# ß Ꟗ ẞ β ( ß ) 00DF LATIN SMALL LETTER SHARP S +← ( Ꟗ ) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← ( ẞ ) 1E9E LATIN CAPITAL LETTER SHARP S ← ( β ) 03B2 GREEK SMALL LETTER BETA # å ȧ @@ -1618,6 +1620,10 @@ ( 二 ) 4E8C CJK UNIFIED IDEOGRAPH-4E8C ← ( ニ ) 30CB KATAKANA LETTER NI +# 儿 ㄦ + ( 儿 ) 513F CJK UNIFIED IDEOGRAPH-513F +← ( ㄦ ) 3126 BOPOMOFO LETTER ER + # 八 ハ ( 八 ) 516B CJK UNIFIED IDEOGRAPH-516B ← ( ハ ) 30CF KATAKANA LETTER HA @@ -1839,5 +1845,5 @@ ( 鹂 ) 9E42 CJK UNIFIED IDEOGRAPH-9E42 ← ( 鹃 ) 9E43 CJK UNIFIED IDEOGRAPH-9E43 -# total : 635 +# total : 638 diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 65804747e..e4c47a779 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -1,4 +1,17 @@ -0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +# See https://github.com/unicode-org/unicodetools/blob/main/docs/security.md for how to use this file. +# The format is +# Source ; Target ; comments # comments +# Source is: +# - a hex code point +# - a literal character +# - a range of the above with .. (need to check this) +# - a UnicodeSet +# Target is: +# - a hex code point +# - a literal character +# - a sequence of hex code points and or literal characters (they can be mixed) +####### +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK 0022 ; 02BA # ( " → ʺ) QUOTATION MARK → MODIFIER LETTER DOUBLE PRIME 0022 ; 0027 0027 0022 ; 05F4 # ( " → ״) QUOTATION MARK → HEBREW PUNCTUATION GERSHAYIM @@ -5437,4 +5450,26 @@ ABBB; 0473; V8_0; ꮻ => ѳ; CHEROKEE SMALL LETTER WI => CYRILLIC SMALL LETTER F 1F16E ; C 20E0 ; V11_0 ; CIRCLED C WITH OVERLAID BACKSLASH # 1F16F ; 🚹 ; V11_0 ; CIRCLED HUMAN FIGURE +# 178-A76 — Section 21 of document L2/24-012 +513F ; 儿 # V16.0 ; U+513F ︎➡︎ U+16FF2 +16FF3 ; 兒 # V16.0 ; U+5152 ➡ U+16FF3 +ㄦ ; 儿 # V16.0 ; U+3126 ㄦ BOPOMOFO LETTER ER ➡ 儿 + +# 176-A116 — Section 2a of L2/23-164 + +A7DA ; Λ # V16.0 ; U+A7DA LATIN CAPITAL LETTER LAMBDA ➡ greek equiv +A7DB ; λ # V16.0 ; U+A7DB LATIN SMALL LETTER LAMBDA ➡ greek equiv +A7DC ; Λ 0337 # V16.0 ; U+A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE ➡ greek equiv +ƛ ; λ 0337 # V16.0 ; existing Latin variant + +# 165-A37 — L2/20-272 + +1715 ; 1734 # V16.0 ; U+1715 TAGALOG SIGN PAMUDPOD ➡ 1734, Hanunoo Sign Pamudpod + +# 166-A55 — Section 3n of L2/21-016R + +ß ; β # sharp S with beta +ẞ ; ß # sharp S upper/lower +A7D6 ; β # Middle Scots S, uppercase +A7D6 ; β # Middle Scots S, lowercase diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index b7475ec78..216c1689e 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2024-05-03, 03:26:38 GMT +# Date: 2024-05-31, 21:12:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -899,6 +899,9 @@ 00DE ; 104C4 # ( Þ ~ 𐓄 ) LATIN CAPITAL LETTER THORN ~ OSAGE CAPITAL LETTER PA +00DF ; 1E9E # ( ß ~ ẞ ) LATIN SMALL LETTER SHARP S ~ LATIN CAPITAL LETTER SHARP S +00DF ; 03B2 # ( ß ~ β ) LATIN SMALL LETTER SHARP S ~ GREEK SMALL LETTER BETA + 00E5 ; 0227 # ( å ~ ȧ ) LATIN SMALL LETTER A WITH RING ABOVE ~ LATIN SMALL LETTER A WITH DOT ABOVE 00F0 ; 1E8CD # ( ð ~ 𞣍 ) LATIN SMALL LETTER ETH ~ MENDE KIKAKUI DIGIT SEVEN @@ -1295,6 +1298,8 @@ 039B ; A6CE # ( Λ ~ ꛎ ) GREEK CAPITAL LETTER LAMDA ~ BAMUM LETTER MI 039B ; 1028D # ( Λ ~ 𐊍 ) GREEK CAPITAL LETTER LAMDA ~ LYCIAN LETTER L +039B 0337 ; A7DC # ( Λ̷ ~ ) GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN CAPITAL LETTER LAMBDA WITH STROKE + 039C ; 041C # ( Μ ~ М ) GREEK CAPITAL LETTER MU ~ CYRILLIC CAPITAL LETTER EM 039C ; 216F # ( Μ ~ Ⅿ ) GREEK CAPITAL LETTER MU ~ ROMAN NUMERAL ONE THOUSAND @@ -1354,8 +1359,11 @@ 03BA ; 043A # ( κ ~ к ) GREEK SMALL LETTER KAPPA ~ CYRILLIC SMALL LETTER KA +03BB ; A7DB # ( λ ~ ) GREEK SMALL LETTER LAMDA ~ LATIN SMALL LETTER LAMBDA 03BB ; 104DB # ( λ ~ 𐓛 ) GREEK SMALL LETTER LAMDA ~ OSAGE SMALL LETTER AH +03BB 0337 ; 019B # ( λ̷ ~ ƛ ) GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN SMALL LETTER LAMBDA WITH STROKE + 03BD ; 2174 # ( ν ~ ⅴ ) GREEK SMALL LETTER NU ~ SMALL ROMAN NUMERAL FIVE 03BF ; 043E # ( ο ~ о ) GREEK SMALL LETTER OMICRON ~ CYRILLIC SMALL LETTER O @@ -3599,6 +3607,8 @@ 16EF ; 2D63 # ( ᛯ ~ ⵣ ) RUNIC TVIMADUR SYMBOL ~ TIFINAGH LETTER YAZ +1715 ; 1734 # ( ᜕ ~ ᜴ ) TAGALOG SIGN PAMUDPOD ~ HANUNOO SIGN PAMUDPOD + 17A2 ; 17A3 # ( អ ~ ឣ ) KHMER LETTER QA ~ KHMER INDEPENDENT VOWEL QAQ 185C ; 1896 # ( ᡜ ~ ᢖ ) MONGOLIAN LETTER TODO DZA ~ MONGOLIAN LETTER ALI GALI ZA @@ -4030,6 +4040,8 @@ 5024 ; 503C # ( 値 ~ 值 ) CJK UNIFIED IDEOGRAPH-5024 ~ CJK UNIFIED IDEOGRAPH-503C +513F ; 3126 # ( 儿 ~ ㄦ ) CJK UNIFIED IDEOGRAPH-513F ~ BOPOMOFO LETTER ER + 5553 ; 555F # ( 啓 ~ 啟 ) CJK UNIFIED IDEOGRAPH-5553 ~ CJK UNIFIED IDEOGRAPH-555F 5861 ; 586B # ( 塡 ~ 填 ) CJK UNIFIED IDEOGRAPH-5861 ~ CJK UNIFIED IDEOGRAPH-586B @@ -4158,6 +4170,10 @@ A792 ; 0404 # ( Ꞓ ~ Є ) LATIN CAPITAL LETTER C WITH BAR ~ CYRILLIC CAPITAL LE A793 ; 0454 # ( ꞓ ~ є ) LATIN SMALL LETTER C WITH BAR ~ CYRILLIC SMALL LETTER UKRAINIAN IE +A7D6 ; 03B2 # ( Ꟗ ~ β ) LATIN CAPITAL LETTER MIDDLE SCOTS S ~ GREEK SMALL LETTER BETA + +A7DA ; 039B # ( ~ Λ ) LATIN CAPITAL LETTER LAMBDA ~ GREEK CAPITAL LETTER LAMDA + A7FB ; 15B7 # ( ꟻ ~ ᖷ ) LATIN EPIGRAPHIC LETTER REVERSED F ~ CANADIAN SYLLABICS BLACKFOOT WA A7FB ; 1D230 # ( ꟻ ~ 𝈰 ) LATIN EPIGRAPHIC LETTER REVERSED F ~ GREEK INSTRUMENTAL NOTATION SYMBOL-30 diff --git a/unicodetools/data/uca/dev/CollationTest.html b/unicodetools/data/uca/dev/CollationTest.html index f204d01c4..82e72214a 100644 --- a/unicodetools/data/uca/dev/CollationTest.html +++ b/unicodetools/data/uca/dev/CollationTest.html @@ -91,7 +91,7 @@
If there are any errors, then the UCA implementation is not compliant.
These files contain test cases that include ill-formed strings, with surrogate code points. Implementations that do not weight surrogate code points the same way as reserved code points - may filter out such lines lines in the test cases, before testing for conformance.
+ may filter out such lines in the test cases, before testing for conformance.© COPY_YEAR Unicode, Inc. All Rights Reserved. diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt index 96274a852..4de1b642a 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt @@ -1,5 +1,5 @@ # CollationTest_NON_IGNORABLE.txt -# Date: 2024-05-02, 01:46:26 GMT +# Date: 2024-06-05, 18:49:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -677,8 +677,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |] 10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 |] 0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |] 10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |] -10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] -10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] +0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] +10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] +0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] +10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |] 0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] 10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |] 0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |] @@ -693,6 +695,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |] 10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |] 0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |] 1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |] +0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |] 1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |] 0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |] 0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |] @@ -840,6 +843,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 | 10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 |] 0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |] 10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |] +0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |] 1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |] 0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |] 3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |] @@ -1022,7 +1026,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 |] 089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 |] 0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |] 10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |] -0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] +0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] +0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |] 0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |] 0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |] 0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 |] @@ -1167,7 +1172,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 |] 1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] -10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] +0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] +10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |] 0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] 0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 |] @@ -1218,7 +1224,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |] 10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 |] 0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |] 10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |] -10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] +0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] +10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |] 0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |] 0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |] 0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 |] @@ -1594,11 +1601,11 @@ FE63 0062; # (﹣) SMALL HYPHEN-MINUS [020D 239A | 0020 0020 | 000F 0002 |] 1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0002 |] 1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0008 |] 1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 239A | 0020 0020 | 0002 0002 |] -10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |] -10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |] -10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |] -10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |] -10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |] +10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |] +10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |] +10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |] +10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |] +10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |] 2010 0021; # (‐) HYPHEN [0214 0269 | 0020 0020 | 0002 0002 |] 2011 0021; # (‑) NON-BREAKING HYPHEN [0214 0269 | 0020 0020 | 001B 0002 |] 2010 003F; # (‐) HYPHEN [0214 0270 | 0020 0020 | 0002 0002 |] @@ -3184,8 +3191,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [0269 | 0033 0020 | 0002 0002 |] 10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0269 | 0033 0020 | 0002 0002 |] 10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [0269 | 0033 0020 | 0002 0002 |] 10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0269 | 0033 0020 | 0002 0002 |] -10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] -10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |] +10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] +10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |] 10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |] 10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0269 | 0033 0020 | 0002 0002 |] 10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0269 | 0033 0020 | 0002 0002 |] @@ -3410,7 +3417,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00 089F 0021; # (࢟) ARABIC HALF MADDA OVER MADDA [0269 | 0082 0020 | 0002 0002 |] 0AFC 0021; # (ૼ) GUJARATI SIGN MADDAH [0269 | 0082 0020 | 0002 0002 |] 10EAC 0021; # (𐺬) YEZIDI COMBINING MADDA MARK [0269 | 0082 0020 | 0002 0002 |] -0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |] +0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |] 0654 0021; # (ٔ) ARABIC HAMZA ABOVE [0269 | 0083 0020 | 0002 0002 |] 10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [0269 | 0083 0020 | 0002 0002 |] 0655 0021; # (ٕ) ARABIC HAMZA BELOW [0269 | 0084 0020 | 0002 0002 |] @@ -3434,7 +3441,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00 08F9 0021; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0269 | 0096 0020 | 0002 0002 |] 08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0269 | 0097 0020 | 0002 0002 |] 0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0269 | 0098 0020 | 0002 0002 |] -10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |] +10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |] 0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0269 | 0099 0020 | 0002 0002 |] 0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [0269 | 009A 0020 | 0002 0002 |] 0731 0021; # (ܱ) SYRIAC PTHAHA BELOW [0269 | 009B 0020 | 0002 0002 |] @@ -3492,7 +3499,7 @@ A6F1 0021; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0269 | 00B7 0020 | 0002 0002 1BE6 0021; # (᯦) BATAK SIGN TOMPI [0269 | 00C2 0020 | 0002 0002 |] 1C37 0021; # (᰷) LEPCHA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [0269 | 00C2 0020 | 0002 0002 |] -10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |] +10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |] 110BA 0021; # (𑂺) KAITHI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] 11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] 111CA 0021; # (𑇊) SHARADA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |] @@ -3647,7 +3654,7 @@ ABEC 0021; # (꯬) MEETEI MAYEK LUM IYEK [0269 | 00CC 0020 | 0002 0002 |] 111CB 0021; # (𑇋) SHARADA VOWEL MODIFIER MARK [0269 | 00D0 0020 | 0002 0002 |] 111CC 0021; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0269 | 00D1 0020 | 0002 0002 |] 11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [0269 | 00D2 0020 | 0002 0002 |] -10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |] +10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |] 113D2 0021; # () TULU-TIGALARI GEMINATION MARK [0269 | 00D4 0020 | 0002 0002 |] 0E4E 0021; # (๎) THAI CHARACTER YAMAKKAN [0269 | 00D5 0020 | 0002 0002 |] 0E47 0021; # (็) THAI CHARACTER MAITAIKHU [0269 | 00D6 0020 | 0002 0002 |] @@ -4855,8 +4862,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [0270 | 0033 0020 | 0002 0002 |] 10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0270 | 0033 0020 | 0002 0002 |] 10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [0270 | 0033 0020 | 0002 0002 |] 10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0270 | 0033 0020 | 0002 0002 |] -10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] -10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |] +10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] +10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |] 10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |] 10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0270 | 0033 0020 | 0002 0002 |] 10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0270 | 0033 0020 | 0002 0002 |] @@ -5081,7 +5088,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00 089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [0270 | 0082 0020 | 0002 0002 |] 0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [0270 | 0082 0020 | 0002 0002 |] 10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [0270 | 0082 0020 | 0002 0002 |] -0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |] +0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |] 0654 003F; # (ٔ) ARABIC HAMZA ABOVE [0270 | 0083 0020 | 0002 0002 |] 10EAB 003F; # (𐺫) YEZIDI COMBINING HAMZA MARK [0270 | 0083 0020 | 0002 0002 |] 0655 003F; # (ٕ) ARABIC HAMZA BELOW [0270 | 0084 0020 | 0002 0002 |] @@ -5105,7 +5112,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00 08F9 003F; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0270 | 0096 0020 | 0002 0002 |] 08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0270 | 0097 0020 | 0002 0002 |] 0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0270 | 0098 0020 | 0002 0002 |] -10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |] +10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |] 0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0270 | 0099 0020 | 0002 0002 |] 0730 003F; # (ܰ) SYRIAC PTHAHA ABOVE [0270 | 009A 0020 | 0002 0002 |] 0731 003F; # (ܱ) SYRIAC PTHAHA BELOW [0270 | 009B 0020 | 0002 0002 |] @@ -5163,7 +5170,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0270 | 00B7 0020 | 0002 0002 1BE6 003F; # (᯦) BATAK SIGN TOMPI [0270 | 00C2 0020 | 0002 0002 |] 1C37 003F; # (᰷) LEPCHA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [0270 | 00C2 0020 | 0002 0002 |] -10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |] +10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |] 110BA 003F; # (𑂺) KAITHI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] 11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] 111CA 003F; # (𑇊) SHARADA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |] @@ -5318,7 +5325,7 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [0270 | 00CC 0020 | 0002 0002 |] 111CB 003F; # (𑇋) SHARADA VOWEL MODIFIER MARK [0270 | 00D0 0020 | 0002 0002 |] 111CC 003F; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0270 | 00D1 0020 | 0002 0002 |] 11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [0270 | 00D2 0020 | 0002 0002 |] -10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |] +10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |] 113D2 003F; # () TULU-TIGALARI GEMINATION MARK [0270 | 00D4 0020 | 0002 0002 |] 0E4E 003F; # (๎) THAI CHARACTER YAMAKKAN [0270 | 00D5 0020 | 0002 0002 |] 0E47 003F; # (็) THAI CHARACTER MAITAIKHU [0270 | 00D6 0020 | 0002 0002 |] @@ -58942,8 +58949,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 |] 10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 |] 10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 |] 10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 |] -10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] -10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |] +10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] +10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |] 10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |] 10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 |] 10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 |] @@ -59029,8 +59036,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 |] 10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 |] 10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 |] 10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 |] -10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] -10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |] +10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] +10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |] 10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |] 10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 |] 10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 |] @@ -59476,8 +59483,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 |] 0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 |] 10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 |] -0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |] -0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |] +0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |] +0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |] 0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 |] 10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 |] 0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 |] @@ -59523,9 +59530,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 |] 08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 |] 0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 |] -10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |] +10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |] 0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 |] -10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |] +10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |] 0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 |] 0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 |] 0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 |] @@ -59626,7 +59633,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008 1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 |] 1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |] -10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |] +10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |] 110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] 11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] 111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |] @@ -59657,7 +59664,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |] 1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 |] 1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 |] -10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |] +10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |] 110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] 11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] 111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |] @@ -59950,8 +59957,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 |] 111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 |] 11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 |] 11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 |] -10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |] -10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |] +10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |] +10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |] 113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 |] 113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 |] 0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 |] @@ -62026,8 +62033,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 |] 10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 |] 10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 |] 10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 |] -10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] -10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |] +10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] +10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |] 10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |] 10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 |] 10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 |] @@ -62254,7 +62261,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 |] 0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 |] 10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 |] -0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |] +0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |] 0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 |] 10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 |] 0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 |] @@ -62278,7 +62285,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 |] 08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 |] 0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 |] -10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |] +10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |] 0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 |] 0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 |] 0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 |] @@ -62336,7 +62343,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002 1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 |] 1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 |] -10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |] +10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |] 110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] 11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] 111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |] @@ -62491,7 +62498,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 |] 111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 |] 111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 |] 11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 |] -10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |] +10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |] 113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 |] 0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 |] 0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 |] @@ -102854,7 +102861,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |] 113C8 0041; # () TULU-TIGALARI VOWEL SIGN AU [3329 2380 | 0020 0020 | 0002 0008 |] 113C2 113C9 0062; # () TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI AU LENGTH MARK [3329 239A | 0020 0020 | 0002 0002 |] 113C8 0062; # () TULU-TIGALARI VOWEL SIGN AU [3329 239A | 0020 0020 | 0002 0002 |] +0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |] 113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |] +0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |] 113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |] 113CE 0021; # () TULU-TIGALARI SIGN VIRAMA [332A 0269 | 0020 0020 | 0002 0002 |] 113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 0269 | 0020 0020 | 0004 0002 |] @@ -102866,6 +102875,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |] 113CF 0041; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 2380 | 0020 0020 | 0004 0008 |] 113CE 0062; # () TULU-TIGALARI SIGN VIRAMA [332A 239A | 0020 0020 | 0002 0002 |] 113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 |] +0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |] 113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |] 113D0 0021; # () TULU-TIGALARI CONJOINER [332B 0269 | 0020 0020 | 0002 0002 |] 113D0 003F; # () TULU-TIGALARI CONJOINER [332B 0270 | 0020 0020 | 0002 0002 |] @@ -136655,12 +136665,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 |] 10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 |] 10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 |] 10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 |] -10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] -10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |] -10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |] -10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |] -10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |] -10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |] +0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] +10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |] +10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |] +10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |] +10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |] +10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |] +10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |] 10D70 0021; # () GARAY SMALL LETTER A [4648 0269 | 0020 0020 | 0002 0002 |] 10D50 0021; # () GARAY CAPITAL LETTER A [4648 0269 | 0020 0020 | 0008 0002 |] 10D70 003F; # () GARAY SMALL LETTER A [4648 0270 | 0020 0020 | 0002 0002 |] @@ -155814,6 +155825,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 105C8 0041; # () TODHRI LETTER DHA [5236 2380 | 0020 0020 | 0002 0008 |] 105C8 0062; # () TODHRI LETTER DHA [5236 239A | 0020 0020 | 0002 0002 |] 105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 |] +105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 |] +105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 |] 105C9 0021; # () TODHRI LETTER EI [5237 0269 | 0020 0020 | 0002 0002 |] 105D2 0307 0021; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 0269 | 0020 0020 | 0002 0002 |] 105C9 003F; # () TODHRI LETTER EI [5237 0270 | 0020 0020 | 0002 0002 |] @@ -155958,6 +155971,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 105E3 0061; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0002 |] 105E3 0041; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0008 |] 105E3 0062; # () TODHRI LETTER THA [5251 239A | 0020 0020 | 0002 0002 |] +105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 |] +105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 |] 105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 |] 105DA 0307 0021; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 0269 | 0020 0020 | 0002 0002 |] 105E4 0021; # () TODHRI LETTER U [5252 0269 | 0020 0020 | 0002 0002 |] @@ -157213,6 +157228,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |] 1612E 0061; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0002 |] 1612E 0041; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0008 |] 1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 |] +0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |] 1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |] 1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 0269 | 0020 0020 | 0002 0002 |] 1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 0270 | 0020 0020 | 0002 0002 |] diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt index f236ee7be..d03e4ee81 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt @@ -1,5 +1,5 @@ # CollationTest_NON_IGNORABLE_SHORT.txt -# Date: 2024-05-02, 01:46:28 GMT +# Date: 2024-06-05, 18:49:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -677,7 +677,9 @@ A67D 0334 10D26 0334 0334 10D27 10D27 0334 +0334 10D6B 10D6B 0334 +0334 10D6D 10D6D 0334 0334 10F48 10F48 0334 @@ -693,6 +695,7 @@ A67D 0334 10F84 0334 0334 1E2AE 1E2AE 0334 +0334 1E5EE 1E5EE 0334 0316 0334 0334 0316 @@ -840,6 +843,7 @@ FE27 0334 10F83 0334 0334 10F85 10F85 0334 +0334 1E5EF 1E5EF 0334 0334 3099 3099 0334 @@ -1022,6 +1026,7 @@ FB1E 0334 089F 0334 0334 10EAC 10EAC 0334 +0334 0897 0897 0334 0334 0654 0654 0334 @@ -1167,6 +1172,7 @@ A6F1 0334 1C37 0334 0334 A9B3 A9B3 0334 +0334 10D6C 10D6C 0334 0334 110BA 110BA 0334 @@ -1218,6 +1224,7 @@ A9B3 0334 10A38 0334 0334 10A3A 10A3A 0334 +0334 10D6A 10D6A 0334 0334 0E48 0E48 0334 @@ -102854,7 +102861,9 @@ A8C4 0062 113C8 0041 113C2 113C9 0062 113C8 0062 +0334 113CE 113CE 0334 +0334 113CF 113CF 0334 113CE 0021 113CF 0021 @@ -102866,6 +102875,7 @@ A8C4 0062 113CF 0041 113CE 0062 113CF 0062 +0334 113D0 113D0 0334 113D0 0021 113D0 003F @@ -136655,6 +136665,7 @@ A6EF 0062 10D4F 0061 10D4F 0041 10D4F 0062 +0334 10D69 10D69 0334 10D69 0021 10D69 003F @@ -155814,6 +155825,8 @@ A4F7 0062 105C8 0041 105C8 0062 105C9 0334 +105D2 0307 0334 +105D2 0334 0307 105C9 0021 105D2 0307 0021 105C9 003F @@ -155958,6 +155971,8 @@ A4F7 0062 105E3 0061 105E3 0041 105E3 0062 +105DA 0307 0334 +105DA 0334 0307 105E4 0334 105DA 0307 0021 105E4 0021 @@ -157213,6 +157228,7 @@ A4F7 0062 1612E 0061 1612E 0041 1612E 0062 +0334 1612F 1612F 0334 1612F 0021 1612F 003F diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt index 8aeee7c00..5ba2ce5ae 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt @@ -1,5 +1,5 @@ # CollationTest_SHIFTED.txt -# Date: 2024-05-02, 01:46:28 GMT +# Date: 2024-06-05, 18:49:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -99,9 +99,9 @@ FF0D 003F; # (-) FULLWIDTH HYPHEN-MINUS [| | | 020D 0270 |] 1806 003F; # (᠆) MONGOLIAN TODO SOFT HYPHEN [| | | 0211 0270 |] 1807 0021; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0269 |] 1807 003F; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0270 |] -10D6E 0334; # () GARAY HYPHEN [| | | 0213 |] -10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |] -10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |] +10D6E 0334; # () GARAY HYPHEN [| | | 0213 |] +10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |] +10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |] 2010 0021; # (‐) HYPHEN [| | | 0214 0269 |] 2011 0021; # (‑) NON-BREAKING HYPHEN [| | | 0214 0269 |] 2010 003F; # (‐) HYPHEN [| | | 0214 0270 |] @@ -23365,8 +23365,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0269 |] 10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0269 |] 10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0269 |] 10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0269 |] -10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] -10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |] +10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] +10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |] 10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |] 10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0269 |] 10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0269 |] @@ -23452,8 +23452,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0270 |] 10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0270 |] 10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0270 |] 10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0270 |] -10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] -10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |] +10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] +10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |] 10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |] 10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0270 |] 10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0270 |] @@ -24941,8 +24941,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF 10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |] 10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |] -10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] -10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] +10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] @@ -24957,6 +24959,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF 10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |] 1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |] +0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |] 1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |] 0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] @@ -25104,6 +25107,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 | 10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] 10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |] +0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |] 1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |] 0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |] 3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |] @@ -25286,7 +25290,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 | 089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 | FFFF FFFF |] 0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |] 10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |] -0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] +0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] +0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |] 0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |] 0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |] 0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 | FFFF FFFF |] @@ -25431,7 +25436,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 | FFF 1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |] A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |] -10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] +0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] +10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] 0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |] @@ -25482,7 +25488,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF 10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 | FFFF FFFF |] 0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |] 10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |] -10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] +0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] +10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |] 0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |] 0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |] 0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 | FFFF FFFF |] @@ -25825,8 +25832,8 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF 089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [| 0082 | 0002 | FFFF 0270 |] 0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [| 0082 | 0002 | FFFF 0270 |] 10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [| 0082 | 0002 | FFFF 0270 |] -0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |] -0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |] +0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |] +0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |] 0654 0021; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0269 |] 10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 0083 | 0002 | FFFF 0269 |] 0654 003F; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0270 |] @@ -25872,10 +25879,10 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF 08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0269 |] 08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0270 |] 0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0269 |] -10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |] +10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |] 0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0270 |] -10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |] -10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |] +10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |] +10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |] 0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0269 |] 0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0270 |] 0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [| 009A | 0002 | FFFF 0269 |] @@ -25981,7 +25988,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 00B7 | 0002 | FFFF 0270 |] 1BE6 0021; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0269 |] 1C37 0021; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |] -10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |] +10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |] 110BA 0021; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] 11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] 111CA 0021; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |] @@ -26012,7 +26019,7 @@ A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |] 1BE6 003F; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0270 |] 1C37 003F; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0270 |] -10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |] +10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |] 110BA 003F; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] 11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] 111CA 003F; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |] @@ -26373,8 +26380,8 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [| 00CC | 0002 | FFFF 0270 |] 11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0269 |] 11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0270 |] 11A98 0334; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 004A | 0002 0002 | FFFF FFFF |] -10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |] -10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |] +10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |] +10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |] 113D2 0021; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0269 |] 113D2 003F; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0270 |] 113D2 0334; # () TULU-TIGALARI GEMINATION MARK [| 00D4 004A | 0002 0002 | FFFF FFFF |] @@ -37850,7 +37857,7 @@ FF0D 0061; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0002 | 020D FFFF |] 1B60 0061; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0002 | 0210 FFFF |] 1806 0061; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0002 | 0211 FFFF |] 1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0002 | 0212 FFFF |] -10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |] +10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |] 2010 0061; # (‐) HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |] 2011 0061; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |] 2012 0061; # (‒) FIGURE DASH [2380 | 0020 | 0002 | 0215 FFFF |] @@ -47089,7 +47096,7 @@ FF0D 0041; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0008 | 020D FFFF |] 1B60 0041; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0008 | 0210 FFFF |] 1806 0041; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0008 | 0211 FFFF |] 1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0008 | 0212 FFFF |] -10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |] +10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |] 2010 0041; # (‐) HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |] 2011 0041; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |] 2012 0041; # (‒) FIGURE DASH [2380 | 0020 | 0008 | 0215 FFFF |] @@ -56794,8 +56801,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 | FF 10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |] @@ -56881,8 +56888,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 | FF 10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] -10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] -10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] +10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] +10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] 10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |] @@ -57328,8 +57335,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] 0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] 10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |] -0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |] -0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |] +0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |] +0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |] 0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |] 10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |] 0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 | FFFF FFFF |] @@ -57375,9 +57382,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00 08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 | FFFF FFFF |] 08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 | FFFF FFFF |] 0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] -10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] +10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |] 0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] -10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] +10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |] 0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 | FFFF FFFF |] 0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 | FFFF FFFF |] 0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 | FFFF FFFF |] @@ -57478,7 +57485,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008 1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] -10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] +10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] 111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |] @@ -57509,7 +57516,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF 1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] -10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] +10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] 111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |] @@ -57802,8 +57809,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 | FFFF FF 111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 | FFFF FFFF |] 11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 | FFFF FFFF |] 11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 | FFFF FFFF |] -10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |] -10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |] +10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |] +10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |] 113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 | FFFF FFFF |] 113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 | FFFF FFFF |] 0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 | FFFF FFFF |] @@ -58638,7 +58645,7 @@ FF0D 0062; # (-) FULLWIDTH HYPHEN-MINUS [239A | 0020 | 0002 | 020D FFFF |] 1B60 0062; # (᭠) BALINESE PAMENENG [239A | 0020 | 0002 | 0210 FFFF |] 1806 0062; # (᠆) MONGOLIAN TODO SOFT HYPHEN [239A | 0020 | 0002 | 0211 FFFF |] 1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [239A | 0020 | 0002 | 0212 FFFF |] -10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |] +10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |] 2010 0062; # (‐) HYPHEN [239A | 0020 | 0002 | 0214 FFFF |] 2011 0062; # (‑) NON-BREAKING HYPHEN [239A | 0020 | 0002 | 0214 FFFF |] 2012 0062; # (‒) FIGURE DASH [239A | 0020 | 0002 | 0215 FFFF |] @@ -68041,8 +68048,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 | FF 10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] -10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] +10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] 10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |] @@ -68269,7 +68276,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] 0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] 10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 | FFFF FFFF |] -0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |] +0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |] 0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 | FFFF FFFF |] 10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 | FFFF FFFF |] 0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 | FFFF FFFF |] @@ -68293,7 +68300,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00 08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 | FFFF FFFF |] 08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 | FFFF FFFF |] 0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] -10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] +10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |] 0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 | FFFF FFFF |] 0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 | FFFF FFFF |] 0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 | FFFF FFFF |] @@ -68351,7 +68358,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002 1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] -10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] +10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] 111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |] @@ -68506,7 +68513,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 | FFFF FF 111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 | FFFF FFFF |] 111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 | FFFF FFFF |] 11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 | FFFF FFFF |] -10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |] +10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |] 113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 | FFFF FFFF |] 0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 | FFFF FFFF |] 0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 | FFFF FFFF |] @@ -110915,7 +110922,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F 113CE 003F; # () TULU-TIGALARI SIGN VIRAMA [332A | 0020 | 0002 | FFFF 0270 |] 113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0269 |] 113CF 003F; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0270 |] +0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |] 113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |] +0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |] 113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |] 113CE 0061; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 113CE 0041; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] @@ -110925,6 +110934,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F 113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 | FFFF FFFF |] 113D0 0021; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0269 |] 113D0 003F; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0270 |] +0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |] 113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |] 113D0 0061; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 113D0 0041; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] @@ -147061,12 +147071,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 | FFFF 10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] 10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] -10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |] -10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |] -10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] -10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] -10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] -10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] +10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |] +10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |] +0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] +10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |] +10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] +10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] +10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] 10D70 0021; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0269 |] 10D70 003F; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0270 |] 10D50 0021; # () GARAY CAPITAL LETTER A [4648 | 0020 | 0008 | FFFF 0269 |] @@ -167946,6 +167957,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 105D2 0307 003F; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 | 0002 | FFFF 0270 |] 105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] 105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] +105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 | FFFF FFFF |] 105C9 0061; # () TODHRI LETTER EI [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105D2 0307 0061; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105D2 0591 0307 0061; # (֑) TODHRI LETTER I, HEBREW ACCENT ETNAHTA, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] @@ -168117,6 +168129,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 105DA 0307 003F; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 | 0002 | FFFF 0270 |] 105E4 003F; # () TODHRI LETTER U [5252 | 0020 | 0002 | FFFF 0270 |] 105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] +105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] 105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 | FFFF FFFF |] 105DA 0307 0061; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 105E4 0061; # () TODHRI LETTER U [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] @@ -169610,6 +169623,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF 1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 | FFFF FFFF |] 1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0269 |] 1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0270 |] +0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |] 1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |] 1612F 0061; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |] 1612F 0041; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |] diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt index be9ccae62..4d1117edc 100644 --- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt +++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt @@ -1,5 +1,5 @@ # CollationTest_SHIFTED_SHORT.txt -# Date: 2024-05-02, 01:46:29 GMT +# Date: 2024-06-05, 18:49:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -24941,7 +24941,9 @@ A67D 0334 10D26 0334 0334 10D27 10D27 0334 +0334 10D6B 10D6B 0334 +0334 10D6D 10D6D 0334 0334 10F48 10F48 0334 @@ -24957,6 +24959,7 @@ A67D 0334 10F84 0334 0334 1E2AE 1E2AE 0334 +0334 1E5EE 1E5EE 0334 0316 0334 0334 0316 @@ -25104,6 +25107,7 @@ FE27 0334 10F83 0334 0334 10F85 10F85 0334 +0334 1E5EF 1E5EF 0334 0334 3099 3099 0334 @@ -25286,6 +25290,7 @@ FB1E 0334 089F 0334 0334 10EAC 10EAC 0334 +0334 0897 0897 0334 0334 0654 0654 0334 @@ -25431,6 +25436,7 @@ A6F1 0334 1C37 0334 0334 A9B3 A9B3 0334 +0334 10D6C 10D6C 0334 0334 110BA 110BA 0334 @@ -25482,6 +25488,7 @@ A9B3 0334 10A38 0334 0334 10A3A 10A3A 0334 +0334 10D6A 10D6A 0334 0334 0E48 0E48 0334 @@ -110915,7 +110922,9 @@ A8C4 0062 113CE 003F 113CF 0021 113CF 003F +0334 113CE 113CE 0334 +0334 113CF 113CF 0334 113CE 0061 113CE 0041 @@ -110925,6 +110934,7 @@ A8C4 0062 113CF 0062 113D0 0021 113D0 003F +0334 113D0 113D0 0334 113D0 0061 113D0 0041 @@ -147063,6 +147073,7 @@ A6EF 0062 10D4F 0062 10D69 0021 10D69 003F +0334 10D69 10D69 0334 10D69 0061 10D69 0041 @@ -167946,6 +167957,7 @@ A4F7 0062 105D2 0307 003F 105C9 0334 105D2 0307 0334 +105D2 0334 0307 105C9 0061 105D2 0307 0061 105D2 0591 0307 0061 @@ -168117,6 +168129,7 @@ A4F7 0062 105DA 0307 003F 105E4 003F 105DA 0307 0334 +105DA 0334 0307 105E4 0334 105DA 0307 0061 105E4 0061 @@ -169610,6 +169623,7 @@ A4F7 0062 1612E 0062 1612F 0021 1612F 003F +0334 1612F 1612F 0334 1612F 0061 1612F 0041 diff --git a/unicodetools/data/ucd/dev/DoNotEmit.txt b/unicodetools/data/ucd/dev/DoNotEmit.txt index 43f4eaa09..c6d96f58a 100644 --- a/unicodetools/data/ucd/dev/DoNotEmit.txt +++ b/unicodetools/data/ucd/dev/DoNotEmit.txt @@ -82,8 +82,6 @@ # combining dot above. # Hamza_Form: # Sequences containing Arabic hamza above, which should be avoided. -# Precomposed_Hieroglyph: -# Precomposed sequences for Egyptian Hieroglyphs which should be avoided. # Precomposed_Form: # Sequences for which a precomposed form exists, but without canonical # equivalence. @@ -101,12 +99,6 @@ # "Do Not Use" tables from the Core Specification # ================================================ -# Egyptian Hieroglyphs, from Table 11-2 -# Note: This list may be incomplete. -13217; 13216 13430 13216 13430 13216; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH N035A; EGYPTIAN HIEROGLYPH N035, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N035, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N035 -130C1; 130C0 13436 1309D; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH D059; EGYPTIAN HIEROGLYPH D058, EGYPTIAN HIEROGLYPH OVERLAY MIDDLE, EGYPTIAN HIEROGLYPH D036 -13196; 13193 13433 13437 133CF 13430 131FF 13438; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH I011A; EGYPTIAN HIEROGLYPH I010, EGYPTIAN HIEROGLYPH INSERT AT BOTTOM START, EGYPTIAN HIEROGLYPH BEGIN SEGMENT, EGYPTIAN HIEROGLYPH X001, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N017, EGYPTIAN HIEROGLYPH END SEGMENT - # Devanagari, from Table 12-1 0905 0946; 0904; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E; DEVANAGARI LETTER SHORT A 0905 093E; 0906; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER AA diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 3eb759238..3b3f3c35a 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -863,6 +863,20 @@ Let $TwoVietnameseReadingMarks = [\p{U15.1.0:ccc=6}] # an LV or V, respectively. [\p{NFC_QC=Maybe}&\p{ccc=0}] ⊆ [\p{GCB=Extend}\p{GCB=T}\p{GCB=V}] +# ICU relies on this to avoid carrying data for HST which would be mostly +# redundant with GCB. If this breaks, it should be noted on the landing page, +# and ICU-TC should be notified. +# See https://github.com/unicode-org/icu/pull/3026. +\p{HST=V} = [\p{GCB=V} & [\u0000-\uFFFF]] +# A more principled (if less practically useful) statement is that the +# dual-conjoining Hangul characters are exactly the Hangul vowels. +\p{HST=V} = [\p{GCB=V} & \p{Script=Hangul}] +# The other types are still straightforwardly related to their GCB counterparts. +\p{HST=L} = \p{GCB=L} +\p{HST=LV} = \p{GCB=LV} +\p{HST=LVT} = \p{GCB=LVT} +\p{HST=T} = \p{GCB=T} + ########################## # Emoji ########################## diff --git a/unicodetools/src/main/resources/org/unicode/tools/emoji/emojiOrdering.txt b/unicodetools/src/main/resources/org/unicode/tools/emoji/emojiOrdering.txt index b8a0b631b..f3bfaccdd 100644 --- a/unicodetools/src/main/resources/org/unicode/tools/emoji/emojiOrdering.txt +++ b/unicodetools/src/main/resources/org/unicode/tools/emoji/emojiOrdering.txt @@ -408,7 +408,7 @@ 🐊 🐢 🦎 🐍 🐲 🐉 🦕 🦖 @ animal-marine -🐳 🐋 🐬 🦭 🐟 🐠 🐡 🦈 🐙 🐚 🪸 🪼 +🐳 🐋 🐬 🦭 🐟 🐠 🐡 🦈 🐙 🐚 🪸 🪼 🦀 🦞 🦐 🦑 🦪 @ animal-bug 🐌 🦋 🐛 🐜 🐝 🪲 🐞 🦗 🪳 🕷️ 🕸️ 🦂 🦟 🪰 🪱 🦠 @ plant-flower @@ -428,8 +428,6 @@ 🥫 @ food-asian 🍱 🍘 🍙 🍚 🍛 🍜 🍝 🍠 🍢 🍣 🍤 🍥 🥮 🍡 🥟 🥠 🥡 -@ food-marine -🦀 🦞 🦐 🦑 🦪 @ food-sweet 🍦 🍧 🍨 🍩 🍪 🎂 🍰 🧁 🥧 🍫 🍬 🍭 🍮 🍯 @ drink