diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index c818d6948..a3a12dc65 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,6 +275,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -316,6 +332,10 @@ jobs: - name: Run command - UCA - collation validity log run: | cd unicodetools/mine/src + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then set +e + fi # invoke main() in class ...UCA.Main mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file @@ -333,6 +353,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -372,6 +408,16 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then ERROR="::notice" + else ERROR="::error" + fi + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" + STATUS=${PIPESTATUS[0]} + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then exit 0 + else exit $STATUS + fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/build.md b/docs/build.md index 4f5b25173..cd7a62971 100644 --- a/docs/build.md +++ b/docs/build.md @@ -550,7 +550,7 @@ We no longer post files to FTP folders, nor publish individual files without con * org.unicode.text.UCD * TestUnicodeInvariants.java 1. Run>Run As... Java Application\ - Will create the following file of results: + Will create the following files of results: ``` {Generated}/UnicodeTestResults.txt ``` @@ -623,6 +623,8 @@ We no longer post files to FTP folders, nor publish individual files without con and what are likely remedies (changing properties, adding to an exceptions list, changing the test case). Improve these comments as needed. +1. Additional tests for UTS #39 data are found in [unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt](https://github.com/unicode-org/unicodetools/blob/main/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt). + 1. These are reported in `{Generated}/UnicodeTestResults-security.txt` when running `TestTestUnicodeInvariants`. ### Options diff --git a/docs/pipeline.md b/docs/pipeline.md index 695f659ec..69f2f4b88 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -49,7 +49,7 @@ Indic scripts only: - [ ] Commit --- -- [ ] PropsList.txt — Add Other_Alphabetic, Diacritic, and Extender to satisfy invariants, or to taste +- [ ] PropsList.txt — Add Other_Alphabetic, Other_Lowercase, Diacritic, and Extender to satisfy invariants, or to taste - [ ] Commit --- @@ -67,8 +67,13 @@ PR preparation: - [ ] PR button — Set to DRAFT pull request - unless approved for the upcoming version - [ ] PR button — Press - - The **Check UCA data** CI check might fail; many character additions need separate handling there, - but that is out of scope for the PAG work of preparing `data-for-new`. This will get resolved later. + - The **Check UCA data** and **Check security data invariants** CI checks are + suppressed; many character additions need separate handling there, + but that is out of scope for the PAG work of preparing `data-for-new`, + so reporting those failures could distract from real issues + in the UCD invariants. + UCA and security data issues are addressed later in the process, + before the start of β review. ## Scripts diff --git a/docs/security.md b/docs/security.md index 3229c35a9..01bb8955c 100644 --- a/docs/security.md +++ b/docs/security.md @@ -9,8 +9,8 @@ machine-generated, then tweaked. They have names like source/confusables-winFonts.txt. The main file is confusables-source.txt. ***There is fairly complex processing for the confusables, so carefully diff the -results. Sometimes you may get an unexpected union of two equivalence sets. Look -at Testing below for help.*** +results. Sometimes you may get an unexpected union of two equivalence sets. +Look at Testing below for help.*** Look at the following spreadsheets / bugs to see if there are any additional suggestions. @@ -19,17 +19,38 @@ suggestions. Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dHRXelRVbXRYSVp2QTNDdTBlV1I5X1E&usp=drive_web#gid=0)** * **[Identifier Restriction Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dEJJWkdzZzk4cDRYbEVLTmhraGN0Q3c&usp=drive_web#gid=0)** -* *[Unicode - Bugs](http://www.unicode.org/edcom/bugtrack/query?status=accepted&status=assigned&status=new&status=reopened&group=component&order=priority&col=id&col=summary&col=status&col=type&col=priority&col=milestone&col=component&owner=mark&report=10) - (under TR #36/39)*\ - :construction: **TODO**: That Trac instance is gone. - Markus thinks we decided that there was nothing useful in it, - and deleted it without saving data. Check with Mark. +* *[Sample PRs](https://github.com/unicode-org/unicodetools/pull/841) If so, assess and add to unicodetools/data/security/{version}/data/source/confusables-source.txt — *if needed.* - Then in the spreadsheets, move the "new stuff" line to the end. +### File Format +There is a brief description of the file format at the top. +Each line represents a mapping from a code point or set of code points to a sequence of one or more code points. + +For example: +``` +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +``` + +The ordering of characters doesn't matter. +So it doesn't matter whether you have the above line, or +``` +01C3 ; 0021 # ( ǃ → !) LATIN LETTER RETROFLEX CLICK → EXCLAMATION MARK +``` +It also doesn't matter if you have identical lines; the second one will be a NOOP. + +The mappings are used to generate equivalence classes. +From each equivalence class, one representative member will be chosen, +and in the resulting data file, all the other characters will map to that representative. +Because of transitivity, the equivalence class will tend to be somewhat looser than expected. + +We've discussed possible future enhancements: +- Have a second, narrower mapping that is more exact. +- Allow for mappings from sequences to sequences (instead of just code points to sequences). +- Provide for context, perhaps like the Transform rules. + Eg [x { a } y → A](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Aarabic_type%3A%5D&g=&i=) + ## Before generating First, in CLDR, update the script metadata: @@ -51,13 +72,10 @@ Run GenerateConfusables -c -b to generate the files. They will appear in two pla * reformatted source, log * $UNICODETOOLS_DIR/data/security/11.0.0/* *including log.txt* -**Run TestSecurity to verify that the confusable mappings are idempotent!** +The TestSecurity.java test is part of the unit test suite, run by a github CI. +It verifies that the confusable mappings are idempotent. -With the same VM arguments as the generator. -Starting in 2021q3, TestSecurity needs to be run as a JUnit test. -It is also now part of the unit test suite and run on GitHub CI. - -Copy the following from the output directory to the top level of the revision directory: +Copy the following from the output directory to the top level of the revision directory, and check in. * confusables.txt * confusablesSummary.txt @@ -66,6 +84,12 @@ Copy the following from the output directory to the top level of the revision di * ReadMe.txt * xidmodifications.txt +### Review + +Review the mappings to make sure that there are no surprises. +The biggest issue is if two equivalence classes are mistakenly joined. +For example, if you map b to d, then that will join the equivalence class for b with that of d. + ### IdentifierStatus.txt & IdentifierType.txt Markus 2020-feb-07 for Unicode 13.0: diff --git a/unicodetools/data/emoji/dev/emoji-test.txt b/unicodetools/data/emoji/dev/emoji-test.txt index e81fe0b19..95c5d5431 100644 --- a/unicodetools/data/emoji/dev/emoji-test.txt +++ b/unicodetools/data/emoji/dev/emoji-test.txt @@ -1,5 +1,5 @@ # emoji-test.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1751,12 +1751,12 @@ 1F936 1F3FD ; fully-qualified # 🤶🏽 E3.0 Mrs. Claus: medium skin tone 1F936 1F3FE ; fully-qualified # 🤶🏾 E3.0 Mrs. Claus: medium-dark skin tone 1F936 1F3FF ; fully-qualified # 🤶🏿 E3.0 Mrs. Claus: dark skin tone -1F9D1 200D 1F384 ; fully-qualified # 🧑🎄 E13.0 mx claus -1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻🎄 E13.0 mx claus: light skin tone -1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼🎄 E13.0 mx claus: medium-light skin tone -1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽🎄 E13.0 mx claus: medium skin tone -1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾🎄 E13.0 mx claus: medium-dark skin tone -1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿🎄 E13.0 mx claus: dark skin tone +1F9D1 200D 1F384 ; fully-qualified # 🧑🎄 E13.0 Mx claus +1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻🎄 E13.0 Mx claus: light skin tone +1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼🎄 E13.0 Mx claus: medium-light skin tone +1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽🎄 E13.0 Mx claus: medium skin tone +1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾🎄 E13.0 Mx claus: medium-dark skin tone +1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿🎄 E13.0 Mx claus: dark skin tone 1F9B8 ; fully-qualified # 🦸 E11.0 superhero 1F9B8 1F3FB ; fully-qualified # 🦸🏻 E11.0 superhero: light skin tone 1F9B8 1F3FC ; fully-qualified # 🦸🏼 E11.0 superhero: medium-light skin tone @@ -3721,6 +3721,11 @@ 1F41A ; fully-qualified # 🐚 E0.6 spiral shell 1FAB8 ; fully-qualified # 🪸 E14.0 coral 1FABC ; fully-qualified # 🪼 E15.0 jellyfish +1F980 ; fully-qualified # 🦀 E1.0 crab +1F99E ; fully-qualified # 🦞 E11.0 lobster +1F990 ; fully-qualified # 🦐 E3.0 shrimp +1F991 ; fully-qualified # 🦑 E3.0 squid +1F9AA ; fully-qualified # 🦪 E12.0 oyster # subgroup: animal-bug 1F40C ; fully-qualified # 🐌 E0.6 snail @@ -3777,8 +3782,8 @@ 1F344 ; fully-qualified # 🍄 E0.6 mushroom 1FABE ; fully-qualified # E16.0 leafless tree -# Animals & Nature subtotal: 161 -# Animals & Nature subtotal: 161 w/o modifiers +# Animals & Nature subtotal: 166 +# Animals & Nature subtotal: 166 w/o modifiers # group: Food & Drink @@ -3881,13 +3886,6 @@ 1F960 ; fully-qualified # 🥠 E5.0 fortune cookie 1F961 ; fully-qualified # 🥡 E5.0 takeout box -# subgroup: food-marine -1F980 ; fully-qualified # 🦀 E1.0 crab -1F99E ; fully-qualified # 🦞 E11.0 lobster -1F990 ; fully-qualified # 🦐 E3.0 shrimp -1F991 ; fully-qualified # 🦑 E3.0 squid -1F9AA ; fully-qualified # 🦪 E12.0 oyster - # subgroup: food-sweet 1F366 ; fully-qualified # 🍦 E0.6 soft ice cream 1F367 ; fully-qualified # 🍧 E0.6 shaved ice @@ -3936,8 +3934,8 @@ 1FAD9 ; fully-qualified # 🫙 E14.0 jar 1F3FA ; fully-qualified # 🏺 E1.0 amphora -# Food & Drink subtotal: 138 -# Food & Drink subtotal: 138 w/o modifiers +# Food & Drink subtotal: 133 +# Food & Drink subtotal: 133 w/o modifiers # group: Travel & Places diff --git a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt index e471645bf..ce31f22ce 100644 --- a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt +++ b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt @@ -1,5 +1,5 @@ # emoji-zwj-sequences.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -665,7 +665,7 @@ 1F9D1 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer # E12.1 [1] (🧑🌾) 1F9D1 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook # E12.1 [1] (🧑🍳) 1F9D1 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby # E13.0 [1] (🧑🍼) -1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus # E13.0 [1] (🧑🎄) +1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus # E13.0 [1] (🧑🎄) 1F9D1 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student # E12.1 [1] (🧑🎓) 1F9D1 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer # E12.1 [1] (🧑🎤) 1F9D1 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist # E12.1 [1] (🧑🎨) @@ -689,7 +689,7 @@ 1F9D1 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: light skin tone # E12.1 [1] (🧑🏻🌾) 1F9D1 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: light skin tone # E12.1 [1] (🧑🏻🍳) 1F9D1 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: light skin tone # E13.0 [1] (🧑🏻🍼) -1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: light skin tone # E13.0 [1] (🧑🏻🎄) +1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: light skin tone # E13.0 [1] (🧑🏻🎄) 1F9D1 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: light skin tone # E12.1 [1] (🧑🏻🎓) 1F9D1 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: light skin tone # E12.1 [1] (🧑🏻🎤) 1F9D1 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: light skin tone # E12.1 [1] (🧑🏻🎨) @@ -713,7 +713,7 @@ 1F9D1 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-light skin tone # E12.1 [1] (🧑🏼🌾) 1F9D1 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-light skin tone # E12.1 [1] (🧑🏼🍳) 1F9D1 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-light skin tone # E13.0 [1] (🧑🏼🍼) -1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-light skin tone # E13.0 [1] (🧑🏼🎄) +1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-light skin tone # E13.0 [1] (🧑🏼🎄) 1F9D1 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-light skin tone # E12.1 [1] (🧑🏼🎓) 1F9D1 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-light skin tone # E12.1 [1] (🧑🏼🎤) 1F9D1 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-light skin tone # E12.1 [1] (🧑🏼🎨) @@ -737,7 +737,7 @@ 1F9D1 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium skin tone # E12.1 [1] (🧑🏽🌾) 1F9D1 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium skin tone # E12.1 [1] (🧑🏽🍳) 1F9D1 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium skin tone # E13.0 [1] (🧑🏽🍼) -1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium skin tone # E13.0 [1] (🧑🏽🎄) +1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium skin tone # E13.0 [1] (🧑🏽🎄) 1F9D1 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium skin tone # E12.1 [1] (🧑🏽🎓) 1F9D1 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium skin tone # E12.1 [1] (🧑🏽🎤) 1F9D1 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium skin tone # E12.1 [1] (🧑🏽🎨) @@ -761,7 +761,7 @@ 1F9D1 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-dark skin tone # E12.1 [1] (🧑🏾🌾) 1F9D1 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-dark skin tone # E12.1 [1] (🧑🏾🍳) 1F9D1 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-dark skin tone # E13.0 [1] (🧑🏾🍼) -1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾🎄) +1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾🎄) 1F9D1 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-dark skin tone # E12.1 [1] (🧑🏾🎓) 1F9D1 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-dark skin tone # E12.1 [1] (🧑🏾🎤) 1F9D1 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-dark skin tone # E12.1 [1] (🧑🏾🎨) @@ -785,7 +785,7 @@ 1F9D1 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: dark skin tone # E12.1 [1] (🧑🏿🌾) 1F9D1 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: dark skin tone # E12.1 [1] (🧑🏿🍳) 1F9D1 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: dark skin tone # E13.0 [1] (🧑🏿🍼) -1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: dark skin tone # E13.0 [1] (🧑🏿🎄) +1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: dark skin tone # E13.0 [1] (🧑🏿🎄) 1F9D1 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: dark skin tone # E12.1 [1] (🧑🏿🎓) 1F9D1 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: dark skin tone # E12.1 [1] (🧑🏿🎤) 1F9D1 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: dark skin tone # E12.1 [1] (🧑🏿🎨) diff --git a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt index edc931dbb..8769e7477 100644 --- a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt +++ b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt @@ -1925,7 +1925,7 @@ 1F9CF 1F3FF; L2/18-229, L2/14-173 # 2019 (🧏🏿) deaf person: dark skin tone # L2/19-231 -1F9D1 200D 1F384; L2/19-231 # 2020 (🧑🎄) mx claus +1F9D1 200D 1F384; L2/19-231 # 2020 (🧑🎄) Mx claus # L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 1F9D1 200D 1F91D 200D 1F9D1; L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 # 2019 (🧑🤝🧑) people holding hands diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index 531fd2a7f..ecbd58c23 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -577,10 +577,10 @@ FF07 ; 0027 ; MA #* ( ' → ' ) FULLWIDTH APOSTROPHE → APOSTROPHE # →’ 2018 ; 0027 ; MA #* ( ‘ → ' ) LEFT SINGLE QUOTATION MARK → APOSTROPHE # 2019 ; 0027 ; MA #* ( ’ → ' ) RIGHT SINGLE QUOTATION MARK → APOSTROPHE # 201B ; 0027 ; MA #* ( ‛ → ' ) SINGLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE # →′→ +05F3 ; 0027 ; MA #* ( ׳ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 2032 ; 0027 ; MA #* ( ′ → ' ) PRIME → APOSTROPHE # 2035 ; 0027 ; MA #* ( ‵ → ' ) REVERSED PRIME → APOSTROPHE # →ʽ→→‘→ 055A ; 0027 ; MA #* ( ՚ → ' ) ARMENIAN APOSTROPHE → APOSTROPHE # →’→ -05F3 ; 0027 ; MA #* ( ׳ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 0060 ; 0027 ; MA #* ( ` → ' ) GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 1FEF ; 0027 ; MA #* ( ` → ' ) GREEK VARIA → APOSTROPHE # →ˋ→→`→→‘→ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘→ @@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘ 02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE # 0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→ 02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE # -02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′→ +02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ→ 02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→ 02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→ @@ -615,10 +615,10 @@ FF02 ; 0027 0027 ; MA #* ( " → '' ) FULLWIDTH QUOTATION MARK → APOSTROPHE, 201C ; 0027 0027 ; MA #* ( “ → '' ) LEFT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201D ; 0027 0027 ; MA #* ( ” → '' ) RIGHT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201F ; 0027 0027 ; MA #* ( ‟ → '' ) DOUBLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE, APOSTROPHE # →“→→"→ +05F4 ; 0027 0027 ; MA #* ( ״ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 2033 ; 0027 0027 ; MA #* ( ″ → '' ) DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 2036 ; 0027 0027 ; MA #* ( ‶ → '' ) REVERSED DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →‵‵→ 3003 ; 0027 0027 ; MA #* ( 〃 → '' ) DITTO MARK → APOSTROPHE, APOSTROPHE # →″→→"→ -05F4 ; 0027 0027 ; MA #* ( ״ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 02DD ; 0027 0027 ; MA #* ( ˝ → '' ) DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →"→ 02BA ; 0027 0027 ; MA # ( ʺ → '' ) MODIFIER LETTER DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 02F6 ; 0027 0027 ; MA #* ( ˶ → '' ) MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →˝→→"→ @@ -1417,6 +1417,7 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO 23E8 ; 2081 2080 ; MA #* ( ⏨ → ₁₀ ) DECIMAL EXPONENT SYMBOL → SUBSCRIPT ONE, SUBSCRIPT ZERO # +1CCF2 ; 0032 ; MA # ( → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # 1D7D0 ; 0032 ; MA # ( 𝟐 → 2 ) MATHEMATICAL BOLD DIGIT TWO → DIGIT TWO # 1D7DA ; 0032 ; MA # ( 𝟚 → 2 ) MATHEMATICAL DOUBLE-STRUCK DIGIT TWO → DIGIT TWO # 1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO # @@ -1429,7 +1430,6 @@ A75A ; 0032 ; MA # ( Ꝛ → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO # A644 ; 0032 ; MA # ( Ꙅ → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→ 14BF ; 0032 ; MA # ( ᒿ → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO # A6EF ; 0032 ; MA # ( ꛯ → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→ -1CCF2 ; 0032 ; MA # ( → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO # 06F2 ; 0662 ; MA # ( ۲ → ٢ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO # @@ -1491,6 +1491,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI 335A ; 0032 70B9 ; MA #* ( ㍚ → 2点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO → DIGIT TWO, CJK UNIFIED IDEOGRAPH-70B9 # 1D206 ; 0033 ; MA #* ( 𝈆 → 3 ) GREEK VOCAL NOTATION SYMBOL-7 → DIGIT THREE # +1CCF3 ; 0033 ; MA # ( → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 1D7D1 ; 0033 ; MA # ( 𝟑 → 3 ) MATHEMATICAL BOLD DIGIT THREE → DIGIT THREE # 1D7DB ; 0033 ; MA # ( 𝟛 → 3 ) MATHEMATICAL DOUBLE-STRUCK DIGIT THREE → DIGIT THREE # 1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE # @@ -1506,7 +1507,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→ 16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→ 118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE # -1CCF3 ; 0033 ; MA # ( → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 06F3 ; 0663 ; MA # ( ۳ → ٣ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE # 1E8C9 ; 0663 ; MA #* ( 𞣉 → ٣ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE # @@ -1531,6 +1531,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335B ; 0033 70B9 ; MA #* ( ㍛ → 3点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE → DIGIT THREE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF4 ; 0034 ; MA # ( → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 1D7D2 ; 0034 ; MA # ( 𝟒 → 4 ) MATHEMATICAL BOLD DIGIT FOUR → DIGIT FOUR # 1D7DC ; 0034 ; MA # ( 𝟜 → 4 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR → DIGIT FOUR # 1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR # @@ -1539,7 +1540,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR # 13CE ; 0034 ; MA # ( Ꮞ → 4 ) CHEROKEE LETTER SE → DIGIT FOUR # 118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR # -1CCF4 ; 0034 ; MA # ( → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 06F4 ; 0664 ; MA # ( ۴ → ٤ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR # @@ -1559,6 +1559,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335C ; 0034 70B9 ; MA #* ( ㍜ → 4点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR → DIGIT FOUR, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF5 ; 0035 ; MA # ( → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 1D7D3 ; 0035 ; MA # ( 𝟓 → 5 ) MATHEMATICAL BOLD DIGIT FIVE → DIGIT FIVE # 1D7DD ; 0035 ; MA # ( 𝟝 → 5 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FIVE → DIGIT FIVE # 1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE # @@ -1567,7 +1568,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE # 01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE # 118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE # -1CCF5 ; 0035 ; MA # ( → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE # @@ -1581,6 +1581,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335D ; 0035 70B9 ; MA #* ( ㍝ → 5点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE → DIGIT FIVE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF6 ; 0036 ; MA # ( → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 1D7D4 ; 0036 ; MA # ( 𝟔 → 6 ) MATHEMATICAL BOLD DIGIT SIX → DIGIT SIX # 1D7DE ; 0036 ; MA # ( 𝟞 → 6 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SIX → DIGIT SIX # 1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX # @@ -1591,7 +1592,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX # 13EE ; 0036 ; MA # ( Ꮾ → 6 ) CHEROKEE LETTER WV → DIGIT SIX # 118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX # -1CCF6 ; 0036 ; MA # ( → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 06F6 ; 0666 ; MA # ( ۶ → ٦ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX # @@ -1610,6 +1610,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335E ; 0036 70B9 ; MA #* ( ㍞ → 6点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX → DIGIT SIX, CJK UNIFIED IDEOGRAPH-70B9 # 1D212 ; 0037 ; MA #* ( 𝈒 → 7 ) GREEK VOCAL NOTATION SYMBOL-19 → DIGIT SEVEN # +1CCF7 ; 0037 ; MA # ( → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 1D7D5 ; 0037 ; MA # ( 𝟕 → 7 ) MATHEMATICAL BOLD DIGIT SEVEN → DIGIT SEVEN # 1D7DF ; 0037 ; MA # ( 𝟟 → 7 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN → DIGIT SEVEN # 1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN # @@ -1618,7 +1619,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN # 104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN # 118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN # -1CCF7 ; 0037 ; MA # ( → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN # @@ -1636,6 +1636,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 09EA ; 0038 ; MA # ( ৪ → 8 ) BENGALI DIGIT FOUR → DIGIT EIGHT # 0A6A ; 0038 ; MA # ( ੪ → 8 ) GURMUKHI DIGIT FOUR → DIGIT EIGHT # 1E8CB ; 0038 ; MA #* ( 𞣋 → 8 ) MENDE KIKAKUI DIGIT FIVE → DIGIT EIGHT # +1CCF8 ; 0038 ; MA # ( → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 1D7D6 ; 0038 ; MA # ( 𝟖 → 8 ) MATHEMATICAL BOLD DIGIT EIGHT → DIGIT EIGHT # 1D7E0 ; 0038 ; MA # ( 𝟠 → 8 ) MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT → DIGIT EIGHT # 1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT # @@ -1645,7 +1646,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT # 0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT # 1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT # -1CCF8 ; 0038 ; MA # ( → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT # @@ -1665,6 +1665,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0B68 ; 0039 ; MA # ( ୨ → 9 ) ORIYA DIGIT TWO → DIGIT NINE # 09ED ; 0039 ; MA # ( ৭ → 9 ) BENGALI DIGIT SEVEN → DIGIT NINE # 0D6D ; 0039 ; MA # ( ൭ → 9 ) MALAYALAM DIGIT SEVEN → DIGIT NINE # +1CCF9 ; 0039 ; MA # ( → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 1D7D7 ; 0039 ; MA # ( 𝟗 → 9 ) MATHEMATICAL BOLD DIGIT NINE → DIGIT NINE # 1D7E1 ; 0039 ; MA # ( 𝟡 → 9 ) MATHEMATICAL DOUBLE-STRUCK DIGIT NINE → DIGIT NINE # 1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE # @@ -1676,7 +1677,6 @@ A76E ; 0039 ; MA # ( Ꝯ → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE # 118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE # 118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE # 118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE # -1CCF9 ; 0039 ; MA # ( → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 0967 ; 0669 ; MA # ( १ → ٩ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE # 118E4 ; 0669 ; MA # ( 𑣤 → ٩ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE # @@ -1723,6 +1723,7 @@ FF41 ; 0061 ; MA # ( a → a ) FULLWIDTH LATIN SMALL LETTER A → LATIN SMALL 2DF6 ; 0363 ; MA # ( ⷶ → ͣ ) COMBINING CYRILLIC LETTER A → COMBINING LATIN SMALL LETTER A # FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # →А→ +1CCD6 ; 0041 ; MA #* ( → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 1D400 ; 0041 ; MA # ( 𝐀 → A ) MATHEMATICAL BOLD CAPITAL A → LATIN CAPITAL LETTER A # 1D434 ; 0041 ; MA # ( 𝐴 → A ) MATHEMATICAL ITALIC CAPITAL A → LATIN CAPITAL LETTER A # 1D468 ; 0041 ; MA # ( 𝑨 → A ) MATHEMATICAL BOLD ITALIC CAPITAL A → LATIN CAPITAL LETTER A # @@ -1748,7 +1749,6 @@ FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPI A4EE ; 0041 ; MA # ( ꓮ → A ) LISU LETTER A → LATIN CAPITAL LETTER A # 16F40 ; 0041 ; MA # ( 𖽀 → A ) MIAO LETTER ZZYA → LATIN CAPITAL LETTER A # 102A0 ; 0041 ; MA # ( 𐊠 → A ) CARIAN LETTER A → LATIN CAPITAL LETTER A # -1CCD6 ; 0041 ; MA #* ( → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 2376 ; 0061 0332 ; MA #* ( ⍶ → a̲ ) APL FUNCTIONAL SYMBOL ALPHA UNDERBAR → LATIN SMALL LETTER A, COMBINING LOW LINE # →α̲→→ɑ̲→ @@ -1826,6 +1826,7 @@ A4EF ; 2C6F ; MA # ( ꓯ → Ɐ ) LISU LETTER AE → LATIN CAPITAL LETTER TURNE FF22 ; 0042 ; MA # ( B → B ) FULLWIDTH LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # →Β→ 212C ; 0042 ; MA # ( ℬ → B ) SCRIPT CAPITAL B → LATIN CAPITAL LETTER B # +1CCD7 ; 0042 ; MA #* ( → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 1D401 ; 0042 ; MA # ( 𝐁 → B ) MATHEMATICAL BOLD CAPITAL B → LATIN CAPITAL LETTER B # 1D435 ; 0042 ; MA # ( 𝐵 → B ) MATHEMATICAL ITALIC CAPITAL B → LATIN CAPITAL LETTER B # 1D469 ; 0042 ; MA # ( 𝑩 → B ) MATHEMATICAL BOLD ITALIC CAPITAL B → LATIN CAPITAL LETTER B # @@ -1852,7 +1853,6 @@ A4D0 ; 0042 ; MA # ( ꓐ → B ) LISU LETTER BA → LATIN CAPITAL LETTER B # 10282 ; 0042 ; MA # ( 𐊂 → B ) LYCIAN LETTER B → LATIN CAPITAL LETTER B # 102A1 ; 0042 ; MA # ( 𐊡 → B ) CARIAN LETTER P2 → LATIN CAPITAL LETTER B # 10301 ; 0042 ; MA # ( 𐌁 → B ) OLD ITALIC LETTER BE → LATIN CAPITAL LETTER B # -1CCD7 ; 0042 ; MA #* ( → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 0253 ; 0062 0314 ; MA # ( ɓ → b̔ ) LATIN SMALL LETTER B WITH HOOK → LATIN SMALL LETTER B, COMBINING REVERSED COMMA ABOVE # @@ -1910,6 +1910,7 @@ FF23 ; 0043 ; MA # ( C → C ) FULLWIDTH LATIN CAPITAL LETTER C → LATIN CAPI 216D ; 0043 ; MA # ( Ⅽ → C ) ROMAN NUMERAL ONE HUNDRED → LATIN CAPITAL LETTER C # 2102 ; 0043 ; MA # ( ℂ → C ) DOUBLE-STRUCK CAPITAL C → LATIN CAPITAL LETTER C # 212D ; 0043 ; MA # ( ℭ → C ) BLACK-LETTER CAPITAL C → LATIN CAPITAL LETTER C # +1CCD8 ; 0043 ; MA #* ( → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 1D402 ; 0043 ; MA # ( 𝐂 → C ) MATHEMATICAL BOLD CAPITAL C → LATIN CAPITAL LETTER C # 1D436 ; 0043 ; MA # ( 𝐶 → C ) MATHEMATICAL ITALIC CAPITAL C → LATIN CAPITAL LETTER C # 1D46A ; 0043 ; MA # ( 𝑪 → C ) MATHEMATICAL BOLD ITALIC CAPITAL C → LATIN CAPITAL LETTER C # @@ -1930,7 +1931,6 @@ A4DA ; 0043 ; MA # ( ꓚ → C ) LISU LETTER CA → LATIN CAPITAL LETTER C # 10302 ; 0043 ; MA # ( 𐌂 → C ) OLD ITALIC LETTER KE → LATIN CAPITAL LETTER C # 10415 ; 0043 ; MA # ( 𐐕 → C ) DESERET CAPITAL LETTER CHEE → LATIN CAPITAL LETTER C # 1051C ; 0043 ; MA # ( 𐔜 → C ) ELBASAN LETTER SHE → LATIN CAPITAL LETTER C # -1CCD8 ; 0043 ; MA #* ( → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 00A2 ; 0063 0338 ; MA #* ( ¢ → c̸ ) CENT SIGN → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # 023C ; 0063 0338 ; MA # ( ȼ → c̸ ) LATIN SMALL LETTER C WITH STROKE → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # →¢→ @@ -2006,6 +2006,7 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 216E ; 0044 ; MA # ( Ⅾ → D ) ROMAN NUMERAL FIVE HUNDRED → LATIN CAPITAL LETTER D # 2145 ; 0044 ; MA # ( ⅅ → D ) DOUBLE-STRUCK ITALIC CAPITAL D → LATIN CAPITAL LETTER D # +1CCD9 ; 0044 ; MA #* ( → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 1D403 ; 0044 ; MA # ( 𝐃 → D ) MATHEMATICAL BOLD CAPITAL D → LATIN CAPITAL LETTER D # 1D437 ; 0044 ; MA # ( 𝐷 → D ) MATHEMATICAL ITALIC CAPITAL D → LATIN CAPITAL LETTER D # 1D46B ; 0044 ; MA # ( 𝑫 → D ) MATHEMATICAL BOLD ITALIC CAPITAL D → LATIN CAPITAL LETTER D # @@ -2023,7 +2024,6 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 15DE ; 0044 ; MA # ( ᗞ → D ) CANADIAN SYLLABICS CARRIER THE → LATIN CAPITAL LETTER D # 15EA ; 0044 ; MA # ( ᗪ → D ) CANADIAN SYLLABICS CARRIER PE → LATIN CAPITAL LETTER D # →ᗞ→ A4D3 ; 0044 ; MA # ( ꓓ → D ) LISU LETTER DA → LATIN CAPITAL LETTER D # -1CCD9 ; 0044 ; MA #* ( → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 0257 ; 0064 0314 ; MA # ( ɗ → d̔ ) LATIN SMALL LETTER D WITH HOOK → LATIN SMALL LETTER D, COMBINING REVERSED COMMA ABOVE # @@ -2099,6 +2099,7 @@ AB32 ; 0065 ; MA # ( ꬲ → e ) LATIN SMALL LETTER BLACKLETTER E → LATIN SMAL 22FF ; 0045 ; MA #* ( ⋿ → E ) Z NOTATION BAG MEMBERSHIP → LATIN CAPITAL LETTER E # FF25 ; 0045 ; MA # ( E → E ) FULLWIDTH LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # →Ε→ 2130 ; 0045 ; MA # ( ℰ → E ) SCRIPT CAPITAL E → LATIN CAPITAL LETTER E # +1CCDA ; 0045 ; MA #* ( → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 1D404 ; 0045 ; MA # ( 𝐄 → E ) MATHEMATICAL BOLD CAPITAL E → LATIN CAPITAL LETTER E # 1D438 ; 0045 ; MA # ( 𝐸 → E ) MATHEMATICAL ITALIC CAPITAL E → LATIN CAPITAL LETTER E # 1D46C ; 0045 ; MA # ( 𝑬 → E ) MATHEMATICAL BOLD ITALIC CAPITAL E → LATIN CAPITAL LETTER E # @@ -2124,7 +2125,6 @@ A4F0 ; 0045 ; MA # ( ꓰ → E ) LISU LETTER E → LATIN CAPITAL LETTER E # 118A6 ; 0045 ; MA # ( 𑢦 → E ) WARANG CITI CAPITAL LETTER II → LATIN CAPITAL LETTER E # 118AE ; 0045 ; MA # ( 𑢮 → E ) WARANG CITI CAPITAL LETTER YUJ → LATIN CAPITAL LETTER E # 10286 ; 0045 ; MA # ( 𐊆 → E ) LYCIAN LETTER I → LATIN CAPITAL LETTER E # -1CCDA ; 0045 ; MA #* ( → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 011B ; 0115 ; MA # ( ě → ĕ ) LATIN SMALL LETTER E WITH CARON → LATIN SMALL LETTER E WITH BREVE # @@ -2195,6 +2195,7 @@ A799 ; 0066 ; MA # ( ꞙ → f ) LATIN SMALL LETTER F WITH STROKE → LATIN SMAL 1D213 ; 0046 ; MA #* ( 𝈓 → F ) GREEK VOCAL NOTATION SYMBOL-20 → LATIN CAPITAL LETTER F # →Ϝ→ 2131 ; 0046 ; MA # ( ℱ → F ) SCRIPT CAPITAL F → LATIN CAPITAL LETTER F # +1CCDB ; 0046 ; MA #* ( → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 1D405 ; 0046 ; MA # ( 𝐅 → F ) MATHEMATICAL BOLD CAPITAL F → LATIN CAPITAL LETTER F # 1D439 ; 0046 ; MA # ( 𝐹 → F ) MATHEMATICAL ITALIC CAPITAL F → LATIN CAPITAL LETTER F # 1D46D ; 0046 ; MA # ( 𝑭 → F ) MATHEMATICAL BOLD ITALIC CAPITAL F → LATIN CAPITAL LETTER F # @@ -2217,7 +2218,6 @@ A4DD ; 0046 ; MA # ( ꓝ → F ) LISU LETTER TSA → LATIN CAPITAL LETTER F # 10287 ; 0046 ; MA # ( 𐊇 → F ) LYCIAN LETTER W → LATIN CAPITAL LETTER F # 102A5 ; 0046 ; MA # ( 𐊥 → F ) CARIAN LETTER R → LATIN CAPITAL LETTER F # 10525 ; 0046 ; MA # ( 𐔥 → F ) ELBASAN LETTER GHE → LATIN CAPITAL LETTER F # -1CCDB ; 0046 ; MA #* ( → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 0192 ; 0066 0326 ; MA # ( ƒ → f̦ ) LATIN SMALL LETTER F WITH HOOK → LATIN SMALL LETTER F, COMBINING COMMA BELOW # →f̡→ @@ -2264,6 +2264,7 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 018D ; 0067 ; MA # ( ƍ → g ) LATIN SMALL LETTER TURNED DELTA → LATIN SMALL LETTER G # 0581 ; 0067 ; MA # ( ց → g ) ARMENIAN SMALL LETTER CO → LATIN SMALL LETTER G # +1CCDC ; 0047 ; MA #* ( → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1D406 ; 0047 ; MA # ( 𝐆 → G ) MATHEMATICAL BOLD CAPITAL G → LATIN CAPITAL LETTER G # 1D43A ; 0047 ; MA # ( 𝐺 → G ) MATHEMATICAL ITALIC CAPITAL G → LATIN CAPITAL LETTER G # 1D46E ; 0047 ; MA # ( 𝑮 → G ) MATHEMATICAL BOLD ITALIC CAPITAL G → LATIN CAPITAL LETTER G # @@ -2281,7 +2282,6 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 13C0 ; 0047 ; MA # ( Ꮐ → G ) CHEROKEE LETTER NAH → LATIN CAPITAL LETTER G # 13F3 ; 0047 ; MA # ( Ᏻ → G ) CHEROKEE LETTER YU → LATIN CAPITAL LETTER G # A4D6 ; 0047 ; MA # ( ꓖ → G ) LISU LETTER GA → LATIN CAPITAL LETTER G # -1CCDC ; 0047 ; MA #* ( → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1DA2 ; 1D4D ; MA # ( ᶢ → ᵍ ) MODIFIER LETTER SMALL SCRIPT G → MODIFIER LETTER SMALL G # @@ -2325,6 +2325,7 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 210B ; 0048 ; MA # ( ℋ → H ) SCRIPT CAPITAL H → LATIN CAPITAL LETTER H # 210C ; 0048 ; MA # ( ℌ → H ) BLACK-LETTER CAPITAL H → LATIN CAPITAL LETTER H # 210D ; 0048 ; MA # ( ℍ → H ) DOUBLE-STRUCK CAPITAL H → LATIN CAPITAL LETTER H # +1CCDD ; 0048 ; MA #* ( → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D407 ; 0048 ; MA # ( 𝐇 → H ) MATHEMATICAL BOLD CAPITAL H → LATIN CAPITAL LETTER H # 1D43B ; 0048 ; MA # ( 𝐻 → H ) MATHEMATICAL ITALIC CAPITAL H → LATIN CAPITAL LETTER H # 1D46F ; 0048 ; MA # ( 𝑯 → H ) MATHEMATICAL BOLD ITALIC CAPITAL H → LATIN CAPITAL LETTER H # @@ -2347,7 +2348,6 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 157C ; 0048 ; MA # ( ᕼ → H ) CANADIAN SYLLABICS NUNAVUT H → LATIN CAPITAL LETTER H # A4E7 ; 0048 ; MA # ( ꓧ → H ) LISU LETTER XA → LATIN CAPITAL LETTER H # 102CF ; 0048 ; MA # ( 𐋏 → H ) CARIAN LETTER E2 → LATIN CAPITAL LETTER H # -1CCDD ; 0048 ; MA #* ( → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D78 ; 1D34 ; MA # ( ᵸ → ᴴ ) MODIFIER LETTER CYRILLIC EN → MODIFIER LETTER CAPITAL H # @@ -2465,6 +2465,7 @@ FF4A ; 006A ; MA # ( j → j ) FULLWIDTH LATIN SMALL LETTER J → LATIN SMALL 0458 ; 006A ; MA # ( ј → j ) CYRILLIC SMALL LETTER JE → LATIN SMALL LETTER J # FF2A ; 004A ; MA # ( J → J ) FULLWIDTH LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # →Ј→ +1CCDF ; 004A ; MA #* ( → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 1D409 ; 004A ; MA # ( 𝐉 → J ) MATHEMATICAL BOLD CAPITAL J → LATIN CAPITAL LETTER J # 1D43D ; 004A ; MA # ( 𝐽 → J ) MATHEMATICAL ITALIC CAPITAL J → LATIN CAPITAL LETTER J # 1D471 ; 004A ; MA # ( 𝑱 → J ) MATHEMATICAL BOLD ITALIC CAPITAL J → LATIN CAPITAL LETTER J # @@ -2484,7 +2485,6 @@ A7B2 ; 004A ; MA # ( Ʝ → J ) LATIN CAPITAL LETTER J WITH CROSSED-TAIL → LA 13AB ; 004A ; MA # ( Ꭻ → J ) CHEROKEE LETTER GU → LATIN CAPITAL LETTER J # 148D ; 004A ; MA # ( ᒍ → J ) CANADIAN SYLLABICS CO → LATIN CAPITAL LETTER J # A4D9 ; 004A ; MA # ( ꓙ → J ) LISU LETTER JA → LATIN CAPITAL LETTER J # -1CCDF ; 004A ; MA #* ( → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 0249 ; 006A 0335 ; MA # ( ɉ → j̵ ) LATIN SMALL LETTER J WITH STROKE → LATIN SMALL LETTER J, COMBINING SHORT STROKE OVERLAY # @@ -2513,6 +2513,7 @@ AB7B ; 1D0A ; MA # ( ꭻ → ᴊ ) CHEROKEE SMALL LETTER GU → LATIN LETTER SMA 212A ; 004B ; MA # ( K → K ) KELVIN SIGN → LATIN CAPITAL LETTER K # FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # →Κ→ +1CCE0 ; 004B ; MA #* ( → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 1D40A ; 004B ; MA # ( 𝐊 → K ) MATHEMATICAL BOLD CAPITAL K → LATIN CAPITAL LETTER K # 1D43E ; 004B ; MA # ( 𝐾 → K ) MATHEMATICAL ITALIC CAPITAL K → LATIN CAPITAL LETTER K # 1D472 ; 004B ; MA # ( 𝑲 → K ) MATHEMATICAL BOLD ITALIC CAPITAL K → LATIN CAPITAL LETTER K # @@ -2538,7 +2539,6 @@ FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPI 16D5 ; 004B ; MA # ( ᛕ → K ) RUNIC LETTER OPEN-P → LATIN CAPITAL LETTER K # A4D7 ; 004B ; MA # ( ꓗ → K ) LISU LETTER KA → LATIN CAPITAL LETTER K # 10518 ; 004B ; MA # ( 𐔘 → K ) ELBASAN LETTER QE → LATIN CAPITAL LETTER K # -1CCE0 ; 004B ; MA #* ( → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 0199 ; 006B 0314 ; MA # ( ƙ → k̔ ) LATIN SMALL LETTER K WITH HOOK → LATIN SMALL LETTER K, COMBINING REVERSED COMMA ABOVE # @@ -2561,6 +2561,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL 06F1 ; 006C ; MA # ( ۱ → l ) EXTENDED ARABIC-INDIC DIGIT ONE → LATIN SMALL LETTER L # →1→ 10320 ; 006C ; MA #* ( 𐌠 → l ) OLD ITALIC NUMERAL ONE → LATIN SMALL LETTER L # →𐌉→→I→ 1E8C7 ; 006C ; MA #* ( 𞣇 → l ) MENDE KIKAKUI DIGIT ONE → LATIN SMALL LETTER L # +1CCF1 ; 006C ; MA # ( → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7CF ; 006C ; MA # ( 𝟏 → l ) MATHEMATICAL BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7D9 ; 006C ; MA # ( 𝟙 → l ) MATHEMATICAL DOUBLE-STRUCK DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→ @@ -2572,6 +2573,7 @@ FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMAL 2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→ 2110 ; 006C ; MA # ( ℐ → l ) SCRIPT CAPITAL I → LATIN SMALL LETTER L # →I→ 2111 ; 006C ; MA # ( ℑ → l ) BLACK-LETTER CAPITAL I → LATIN SMALL LETTER L # →I→ +1CCDE ; 006C ; MA #* ( → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ 1D408 ; 006C ; MA # ( 𝐈 → l ) MATHEMATICAL BOLD CAPITAL I → LATIN SMALL LETTER L # →I→ 1D43C ; 006C ; MA # ( 𝐼 → l ) MATHEMATICAL ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ 1D470 ; 006C ; MA # ( 𝑰 → l ) MATHEMATICAL BOLD ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ @@ -2624,12 +2626,11 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I 16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→ 1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→ 10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→ -1CCDE ; 006C ; MA #* ( → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ -1CCF1 ; 006C ; MA # ( → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L # 216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L # 2112 ; 004C ; MA # ( ℒ → L ) SCRIPT CAPITAL L → LATIN CAPITAL LETTER L # +1CCE1 ; 004C ; MA #* ( → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # 1D40B ; 004C ; MA # ( 𝐋 → L ) MATHEMATICAL BOLD CAPITAL L → LATIN CAPITAL LETTER L # 1D43F ; 004C ; MA # ( 𝐿 → L ) MATHEMATICAL ITALIC CAPITAL L → LATIN CAPITAL LETTER L # 1D473 ; 004C ; MA # ( 𝑳 → L ) MATHEMATICAL BOLD ITALIC CAPITAL L → LATIN CAPITAL LETTER L # @@ -2651,7 +2652,6 @@ A4E1 ; 004C ; MA # ( ꓡ → L ) LISU LETTER LA → LATIN CAPITAL LETTER L # 118B2 ; 004C ; MA # ( 𑢲 → L ) WARANG CITI CAPITAL LETTER TTE → LATIN CAPITAL LETTER L # 1041B ; 004C ; MA # ( 𐐛 → L ) DESERET CAPITAL LETTER ETH → LATIN CAPITAL LETTER L # 10526 ; 004C ; MA # ( 𐔦 → L ) ELBASAN LETTER GHAMMA → LATIN CAPITAL LETTER L # -1CCE1 ; 004C ; MA #* ( → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # FD3C ; 006C 030B ; MA # ( ﴼ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →اً→ FD3D ; 006C 030B ; MA # ( ﴽ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →اً→ @@ -2805,6 +2805,7 @@ ABAE ; 029F ; MA # ( ꮮ → ʟ ) CHEROKEE SMALL LETTER TLE → LATIN LETTER SMA FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # →Μ→ 216F ; 004D ; MA # ( Ⅿ → M ) ROMAN NUMERAL ONE THOUSAND → LATIN CAPITAL LETTER M # 2133 ; 004D ; MA # ( ℳ → M ) SCRIPT CAPITAL M → LATIN CAPITAL LETTER M # +1CCE2 ; 004D ; MA #* ( → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 1D40C ; 004D ; MA # ( 𝐌 → M ) MATHEMATICAL BOLD CAPITAL M → LATIN CAPITAL LETTER M # 1D440 ; 004D ; MA # ( 𝑀 → M ) MATHEMATICAL ITALIC CAPITAL M → LATIN CAPITAL LETTER M # 1D474 ; 004D ; MA # ( 𝑴 → M ) MATHEMATICAL BOLD ITALIC CAPITAL M → LATIN CAPITAL LETTER M # @@ -2832,7 +2833,6 @@ FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPI A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # 102B0 ; 004D ; MA # ( 𐊰 → M ) CARIAN LETTER S → LATIN CAPITAL LETTER M # 10311 ; 004D ; MA # ( 𐌑 → M ) OLD ITALIC LETTER SHE → LATIN CAPITAL LETTER M # -1CCE2 ; 004D ; MA #* ( → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 04CD ; 004D 0326 ; MA # ( Ӎ → M̦ ) CYRILLIC CAPITAL LETTER EM WITH TAIL → LATIN CAPITAL LETTER M, COMBINING COMMA BELOW # →М̡→ @@ -2858,6 +2858,7 @@ A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # →Ν→ 2115 ; 004E ; MA # ( ℕ → N ) DOUBLE-STRUCK CAPITAL N → LATIN CAPITAL LETTER N # +1CCE3 ; 004E ; MA #* ( → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1D40D ; 004E ; MA # ( 𝐍 → N ) MATHEMATICAL BOLD CAPITAL N → LATIN CAPITAL LETTER N # 1D441 ; 004E ; MA # ( 𝑁 → N ) MATHEMATICAL ITALIC CAPITAL N → LATIN CAPITAL LETTER N # 1D475 ; 004E ; MA # ( 𝑵 → N ) MATHEMATICAL BOLD ITALIC CAPITAL N → LATIN CAPITAL LETTER N # @@ -2879,7 +2880,6 @@ FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPI 2C9A ; 004E ; MA # ( Ⲛ → N ) COPTIC CAPITAL LETTER NI → LATIN CAPITAL LETTER N # A4E0 ; 004E ; MA # ( ꓠ → N ) LISU LETTER NA → LATIN CAPITAL LETTER N # 10513 ; 004E ; MA # ( 𐔓 → N ) ELBASAN LETTER NE → LATIN CAPITAL LETTER N # -1CCE3 ; 004E ; MA #* ( → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1018E ; 004E 030A ; MA #* ( 𐆎 → N̊ ) NOMISMA SIGN → LATIN CAPITAL LETTER N, COMBINING RING ABOVE # →Νͦ→ @@ -2994,6 +2994,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 3007 ; 004F ; MA # ( 〇 → O ) IDEOGRAPHIC NUMBER ZERO → LATIN CAPITAL LETTER O # 114D0 ; 004F ; MA # ( 𑓐 → O ) TIRHUTA DIGIT ZERO → LATIN CAPITAL LETTER O # →০→→0→ 118E0 ; 004F ; MA # ( 𑣠 → O ) WARANG CITI DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ +1CCF0 ; 004F ; MA # ( → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7CE ; 004F ; MA # ( 𝟎 → O ) MATHEMATICAL BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7D8 ; 004F ; MA # ( 𝟘 → O ) MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ @@ -3001,6 +3002,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ FF2F ; 004F ; MA # ( O → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→ +1CCE4 ; 004F ; MA #* ( → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # 1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O # 1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O # 1D476 ; 004F ; MA # ( 𝑶 → O ) MATHEMATICAL BOLD ITALIC CAPITAL O → LATIN CAPITAL LETTER O # @@ -3033,8 +3035,6 @@ A4F3 ; 004F ; MA # ( ꓳ → O ) LISU LETTER O → LATIN CAPITAL LETTER O # 102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O # 10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O # 10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O # -1CCE4 ; 004F ; MA #* ( → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # -1CCF0 ; 004F ; MA # ( → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR # 1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→ @@ -3202,6 +3202,7 @@ FF50 ; 0070 ; MA # ( p → p ) FULLWIDTH LATIN SMALL LETTER P → LATIN SMALL FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # →Р→ 2119 ; 0050 ; MA # ( ℙ → P ) DOUBLE-STRUCK CAPITAL P → LATIN CAPITAL LETTER P # +1CCE5 ; 0050 ; MA #* ( → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 1D40F ; 0050 ; MA # ( 𝐏 → P ) MATHEMATICAL BOLD CAPITAL P → LATIN CAPITAL LETTER P # 1D443 ; 0050 ; MA # ( 𝑃 → P ) MATHEMATICAL ITALIC CAPITAL P → LATIN CAPITAL LETTER P # 1D477 ; 0050 ; MA # ( 𝑷 → P ) MATHEMATICAL BOLD ITALIC CAPITAL P → LATIN CAPITAL LETTER P # @@ -3226,7 +3227,6 @@ FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPI 146D ; 0050 ; MA # ( ᑭ → P ) CANADIAN SYLLABICS KI → LATIN CAPITAL LETTER P # A4D1 ; 0050 ; MA # ( ꓑ → P ) LISU LETTER PA → LATIN CAPITAL LETTER P # 10295 ; 0050 ; MA # ( 𐊕 → P ) LYCIAN LETTER R → LATIN CAPITAL LETTER P # -1CCE5 ; 0050 ; MA #* ( → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 01A5 ; 0070 0314 ; MA # ( ƥ → p̔ ) LATIN SMALL LETTER P WITH HOOK → LATIN SMALL LETTER P, COMBINING REVERSED COMMA ABOVE # @@ -3272,6 +3272,7 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 0566 ; 0071 ; MA # ( զ → q ) ARMENIAN SMALL LETTER ZA → LATIN SMALL LETTER Q # 211A ; 0051 ; MA # ( ℚ → Q ) DOUBLE-STRUCK CAPITAL Q → LATIN CAPITAL LETTER Q # +1CCE6 ; 0051 ; MA #* ( → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 1D410 ; 0051 ; MA # ( 𝐐 → Q ) MATHEMATICAL BOLD CAPITAL Q → LATIN CAPITAL LETTER Q # 1D444 ; 0051 ; MA # ( 𝑄 → Q ) MATHEMATICAL ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D478 ; 0051 ; MA # ( 𝑸 → Q ) MATHEMATICAL BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # @@ -3285,7 +3286,6 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 1D64C ; 0051 ; MA # ( 𝙌 → Q ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D680 ; 0051 ; MA # ( 𝚀 → Q ) MATHEMATICAL MONOSPACE CAPITAL Q → LATIN CAPITAL LETTER Q # 2D55 ; 0051 ; MA # ( ⵕ → Q ) TIFINAGH LETTER YARR → LATIN CAPITAL LETTER Q # -1CCE6 ; 0051 ; MA #* ( → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 02A0 ; 0071 0314 ; MA # ( ʠ → q̔ ) LATIN SMALL LETTER Q WITH HOOK → LATIN SMALL LETTER Q, COMBINING REVERSED COMMA ABOVE # @@ -3338,6 +3338,7 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 211B ; 0052 ; MA # ( ℛ → R ) SCRIPT CAPITAL R → LATIN CAPITAL LETTER R # 211C ; 0052 ; MA # ( ℜ → R ) BLACK-LETTER CAPITAL R → LATIN CAPITAL LETTER R # 211D ; 0052 ; MA # ( ℝ → R ) DOUBLE-STRUCK CAPITAL R → LATIN CAPITAL LETTER R # +1CCE7 ; 0052 ; MA #* ( → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 1D411 ; 0052 ; MA # ( 𝐑 → R ) MATHEMATICAL BOLD CAPITAL R → LATIN CAPITAL LETTER R # 1D445 ; 0052 ; MA # ( 𝑅 → R ) MATHEMATICAL ITALIC CAPITAL R → LATIN CAPITAL LETTER R # 1D479 ; 0052 ; MA # ( 𝑹 → R ) MATHEMATICAL BOLD ITALIC CAPITAL R → LATIN CAPITAL LETTER R # @@ -3355,7 +3356,6 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 1587 ; 0052 ; MA # ( ᖇ → R ) CANADIAN SYLLABICS TLHI → LATIN CAPITAL LETTER R # A4E3 ; 0052 ; MA # ( ꓣ → R ) LISU LETTER ZHA → LATIN CAPITAL LETTER R # 16F35 ; 0052 ; MA # ( 𖼵 → R ) MIAO LETTER ZHA → LATIN CAPITAL LETTER R # -1CCE7 ; 0052 ; MA #* ( → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 027D ; 0072 0328 ; MA # ( ɽ → r̨ ) LATIN SMALL LETTER R WITH TAIL → LATIN SMALL LETTER R, COMBINING OGONEK # @@ -3425,6 +3425,7 @@ ABAA ; 0073 ; MA # ( ꮪ → s ) CHEROKEE SMALL LETTER DU → LATIN SMALL LETTER 10448 ; 0073 ; MA # ( 𐑈 → s ) DESERET SMALL LETTER ZHEE → LATIN SMALL LETTER S # FF33 ; 0053 ; MA # ( S → S ) FULLWIDTH LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # →Ѕ→ +1CCE8 ; 0053 ; MA #* ( → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 1D412 ; 0053 ; MA # ( 𝐒 → S ) MATHEMATICAL BOLD CAPITAL S → LATIN CAPITAL LETTER S # 1D446 ; 0053 ; MA # ( 𝑆 → S ) MATHEMATICAL ITALIC CAPITAL S → LATIN CAPITAL LETTER S # 1D47A ; 0053 ; MA # ( 𝑺 → S ) MATHEMATICAL BOLD ITALIC CAPITAL S → LATIN CAPITAL LETTER S # @@ -3446,13 +3447,14 @@ A4E2 ; 0053 ; MA # ( ꓢ → S ) LISU LETTER SA → LATIN CAPITAL LETTER S # 16F3A ; 0053 ; MA # ( 𖼺 → S ) MIAO LETTER SA → LATIN CAPITAL LETTER S # 10296 ; 0053 ; MA # ( 𐊖 → S ) LYCIAN LETTER S → LATIN CAPITAL LETTER S # 10420 ; 0053 ; MA # ( 𐐠 → S ) DESERET CAPITAL LETTER ZHEE → LATIN CAPITAL LETTER S # -1CCE8 ; 0053 ; MA #* ( → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 0282 ; 0073 0328 ; MA # ( ʂ → s̨ ) LATIN SMALL LETTER S WITH HOOK → LATIN SMALL LETTER S, COMBINING OGONEK # 1D74 ; 0073 0334 ; MA # ( ᵴ → s̴ ) LATIN SMALL LETTER S WITH MIDDLE TILDE → LATIN SMALL LETTER S, COMBINING TILDE OVERLAY # A7B5 ; 00DF ; MA # ( ꞵ → ß ) LATIN SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # →β→ +1E9E ; 00DF ; MA # ( ẞ → ß ) LATIN CAPITAL LETTER SHARP S → LATIN SMALL LETTER SHARP S # +A7D6 ; 00DF ; MA # ( Ꟗ → ß ) LATIN CAPITAL LETTER MIDDLE SCOTS S → LATIN SMALL LETTER SHARP S # →β→ 03B2 ; 00DF ; MA # ( β → ß ) GREEK SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # 03D0 ; 00DF ; MA # ( ϐ → ß ) GREEK BETA SYMBOL → LATIN SMALL LETTER SHARP S # →β→ 1D6C3 ; 00DF ; MA # ( 𝛃 → ß ) MATHEMATICAL BOLD SMALL BETA → LATIN SMALL LETTER SHARP S # →β→ @@ -3503,6 +3505,7 @@ AB4D ; 0283 ; MA # ( ꭍ → ʃ ) LATIN SMALL LETTER BASELINE ESH → LATIN SMAL 27D9 ; 0054 ; MA #* ( ⟙ → T ) LARGE DOWN TACK → LATIN CAPITAL LETTER T # 1F768 ; 0054 ; MA #* ( 🝨 → T ) ALCHEMICAL SYMBOL FOR CRUCIBLE-4 → LATIN CAPITAL LETTER T # FF34 ; 0054 ; MA # ( T → T ) FULLWIDTH LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # →Т→ +1CCE9 ; 0054 ; MA #* ( → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 1D413 ; 0054 ; MA # ( 𝐓 → T ) MATHEMATICAL BOLD CAPITAL T → LATIN CAPITAL LETTER T # 1D447 ; 0054 ; MA # ( 𝑇 → T ) MATHEMATICAL ITALIC CAPITAL T → LATIN CAPITAL LETTER T # 1D47B ; 0054 ; MA # ( 𝑻 → T ) MATHEMATICAL BOLD ITALIC CAPITAL T → LATIN CAPITAL LETTER T # @@ -3531,7 +3534,6 @@ A4D4 ; 0054 ; MA # ( ꓔ → T ) LISU LETTER TA → LATIN CAPITAL LETTER T # 10297 ; 0054 ; MA # ( 𐊗 → T ) LYCIAN LETTER T → LATIN CAPITAL LETTER T # 102B1 ; 0054 ; MA # ( 𐊱 → T ) CARIAN LETTER C-18 → LATIN CAPITAL LETTER T # 10315 ; 0054 ; MA # ( 𐌕 → T ) OLD ITALIC LETTER TE → LATIN CAPITAL LETTER T # -1CCE9 ; 0054 ; MA #* ( → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 01AD ; 0074 0314 ; MA # ( ƭ → t̔ ) LATIN SMALL LETTER T WITH HOOK → LATIN SMALL LETTER T, COMBINING REVERSED COMMA ABOVE # @@ -3614,6 +3616,7 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S 222A ; 0055 ; MA #* ( ∪ → U ) UNION → LATIN CAPITAL LETTER U # →ᑌ→ 22C3 ; 0055 ; MA #* ( ⋃ → U ) N-ARY UNION → LATIN CAPITAL LETTER U # →∪→→ᑌ→ +1CCEA ; 0055 ; MA #* ( → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 1D414 ; 0055 ; MA # ( 𝐔 → U ) MATHEMATICAL BOLD CAPITAL U → LATIN CAPITAL LETTER U # 1D448 ; 0055 ; MA # ( 𝑈 → U ) MATHEMATICAL ITALIC CAPITAL U → LATIN CAPITAL LETTER U # 1D47C ; 0055 ; MA # ( 𝑼 → U ) MATHEMATICAL BOLD ITALIC CAPITAL U → LATIN CAPITAL LETTER U # @@ -3634,7 +3637,6 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S A4F4 ; 0055 ; MA # ( ꓴ → U ) LISU LETTER U → LATIN CAPITAL LETTER U # 16F42 ; 0055 ; MA # ( 𖽂 → U ) MIAO LETTER WA → LATIN CAPITAL LETTER U # 118B8 ; 0055 ; MA # ( 𑢸 → U ) WARANG CITI CAPITAL LETTER PU → LATIN CAPITAL LETTER U # -1CCEA ; 0055 ; MA #* ( → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 01D4 ; 016D ; MA # ( ǔ → ŭ ) LATIN SMALL LETTER U WITH CARON → LATIN SMALL LETTER U WITH BREVE # @@ -3699,6 +3701,7 @@ ABA9 ; 0076 ; MA # ( ꮩ → v ) CHEROKEE SMALL LETTER DO → LATIN SMALL LETTER 0667 ; 0056 ; MA # ( ٧ → V ) ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # 06F7 ; 0056 ; MA # ( ۷ → V ) EXTENDED ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # →٧→ 2164 ; 0056 ; MA # ( Ⅴ → V ) ROMAN NUMERAL FIVE → LATIN CAPITAL LETTER V # +1CCEB ; 0056 ; MA #* ( → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 1D415 ; 0056 ; MA # ( 𝐕 → V ) MATHEMATICAL BOLD CAPITAL V → LATIN CAPITAL LETTER V # 1D449 ; 0056 ; MA # ( 𝑉 → V ) MATHEMATICAL ITALIC CAPITAL V → LATIN CAPITAL LETTER V # 1D47D ; 0056 ; MA # ( 𝑽 → V ) MATHEMATICAL BOLD ITALIC CAPITAL V → LATIN CAPITAL LETTER V # @@ -3721,7 +3724,6 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 16F08 ; 0056 ; MA # ( 𖼈 → V ) MIAO LETTER VA → LATIN CAPITAL LETTER V # 118A0 ; 0056 ; MA # ( 𑢠 → V ) WARANG CITI CAPITAL LETTER NGAA → LATIN CAPITAL LETTER V # 1051D ; 0056 ; MA # ( 𐔝 → V ) ELBASAN LETTER TE → LATIN CAPITAL LETTER V # -1CCEB ; 0056 ; MA #* ( → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 10197 ; 0056 0335 ; MA #* ( 𐆗 → V̵ ) ROMAN QUINARIUS SIGN → LATIN CAPITAL LETTER V, COMBINING SHORT STROKE OVERLAY # →V̶→ @@ -3748,6 +3750,7 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 0668 ; 0245 ; MA # ( ٨ → Ʌ ) ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →Λ→ 06F8 ; 0245 ; MA # ( ۸ → Ʌ ) EXTENDED ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →٨→→Λ→ +A7DA ; 0245 ; MA # ( → Ʌ ) LATIN CAPITAL LETTER LAMBDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 039B ; 0245 ; MA # ( Λ → Ʌ ) GREEK CAPITAL LETTER LAMDA → LATIN CAPITAL LETTER TURNED V # 1D6B2 ; 0245 ; MA # ( 𝚲 → Ʌ ) MATHEMATICAL BOLD CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 1D6EC ; 0245 ; MA # ( 𝛬 → Ʌ ) MATHEMATICAL ITALIC CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ @@ -3763,6 +3766,8 @@ A4E5 ; 0245 ; MA # ( ꓥ → Ʌ ) LISU LETTER NGA → LATIN CAPITAL LETTER TURNE 16F3D ; 0245 ; MA # ( 𖼽 → Ʌ ) MIAO LETTER ZZA → LATIN CAPITAL LETTER TURNED V # 1028D ; 0245 ; MA # ( 𐊍 → Ʌ ) LYCIAN LETTER L → LATIN CAPITAL LETTER TURNED V # →Λ→ +A7DC ; 0245 0338 ; MA # ( → Ʌ̸ ) LATIN CAPITAL LETTER LAMBDA WITH STROKE → LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ + 04C5 ; 0245 0326 ; MA # ( Ӆ → Ʌ̦ ) CYRILLIC CAPITAL LETTER EL WITH TAIL → LATIN CAPITAL LETTER TURNED V, COMBINING COMMA BELOW # →Л̡→ 143D ; 0245 00B7 ; MA # ( ᐽ → Ʌ· ) CANADIAN SYLLABICS WEST-CREE PWI → LATIN CAPITAL LETTER TURNED V, MIDDLE DOT # →ᐱᐧ→→ᐱ·→ @@ -3792,6 +3797,7 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 118EF ; 0057 ; MA #* ( 𑣯 → W ) WARANG CITI NUMBER SIXTY → LATIN CAPITAL LETTER W # 118E6 ; 0057 ; MA # ( 𑣦 → W ) WARANG CITI DIGIT SIX → LATIN CAPITAL LETTER W # +1CCEC ; 0057 ; MA #* ( → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 1D416 ; 0057 ; MA # ( 𝐖 → W ) MATHEMATICAL BOLD CAPITAL W → LATIN CAPITAL LETTER W # 1D44A ; 0057 ; MA # ( 𝑊 → W ) MATHEMATICAL ITALIC CAPITAL W → LATIN CAPITAL LETTER W # 1D47E ; 0057 ; MA # ( 𝑾 → W ) MATHEMATICAL BOLD ITALIC CAPITAL W → LATIN CAPITAL LETTER W # @@ -3809,7 +3815,6 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 13B3 ; 0057 ; MA # ( Ꮃ → W ) CHEROKEE LETTER LA → LATIN CAPITAL LETTER W # 13D4 ; 0057 ; MA # ( Ꮤ → W ) CHEROKEE LETTER TA → LATIN CAPITAL LETTER W # A4EA ; 0057 ; MA # ( ꓪ → W ) LISU LETTER WA → LATIN CAPITAL LETTER W # -1CCEC ; 0057 ; MA #* ( → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 047D ; 0077 0486 0487 ; MA # ( ѽ → w҆҇ ) CYRILLIC SMALL LETTER OMEGA WITH TITLO → LATIN SMALL LETTER W, COMBINING CYRILLIC PSILI PNEUMATA, COMBINING CYRILLIC POKRYTIE # →ѡ҆҇→ @@ -3857,6 +3862,7 @@ FF58 ; 0078 ; MA # ( x → x ) FULLWIDTH LATIN SMALL LETTER X → LATIN SMALL 118EC ; 0058 ; MA #* ( 𑣬 → X ) WARANG CITI NUMBER THIRTY → LATIN CAPITAL LETTER X # FF38 ; 0058 ; MA # ( X → X ) FULLWIDTH LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # →Х→ 2169 ; 0058 ; MA # ( Ⅹ → X ) ROMAN NUMERAL TEN → LATIN CAPITAL LETTER X # +1CCED ; 0058 ; MA #* ( → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 1D417 ; 0058 ; MA # ( 𝐗 → X ) MATHEMATICAL BOLD CAPITAL X → LATIN CAPITAL LETTER X # 1D44B ; 0058 ; MA # ( 𝑋 → X ) MATHEMATICAL ITALIC CAPITAL X → LATIN CAPITAL LETTER X # 1D47F ; 0058 ; MA # ( 𝑿 → X ) MATHEMATICAL BOLD ITALIC CAPITAL X → LATIN CAPITAL LETTER X # @@ -3886,7 +3892,6 @@ A4EB ; 0058 ; MA # ( ꓫ → X ) LISU LETTER SHA → LATIN CAPITAL LETTER X # 102B4 ; 0058 ; MA # ( 𐊴 → X ) CARIAN LETTER X → LATIN CAPITAL LETTER X # 10317 ; 0058 ; MA # ( 𐌗 → X ) OLD ITALIC LETTER EKS → LATIN CAPITAL LETTER X # 10527 ; 0058 ; MA # ( 𐔧 → X ) ELBASAN LETTER KHE → LATIN CAPITAL LETTER X # -1CCED ; 0058 ; MA #* ( → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 2A30 ; 0078 0307 ; MA #* ( ⨰ → ẋ ) MULTIPLICATION SIGN WITH DOT ABOVE → LATIN SMALL LETTER X, COMBINING DOT ABOVE # →×̇→ @@ -3934,6 +3939,7 @@ AB5A ; 0079 ; MA # ( ꭚ → y ) LATIN SMALL LETTER Y WITH SHORT RIGHT LEG → L 118DC ; 0079 ; MA # ( 𑣜 → y ) WARANG CITI SMALL LETTER HAR → LATIN SMALL LETTER Y # →ɣ→→γ→ FF39 ; 0059 ; MA # ( Y → Y ) FULLWIDTH LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # →Υ→ +1CCEE ; 0059 ; MA #* ( → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 1D418 ; 0059 ; MA # ( 𝐘 → Y ) MATHEMATICAL BOLD CAPITAL Y → LATIN CAPITAL LETTER Y # 1D44C ; 0059 ; MA # ( 𝑌 → Y ) MATHEMATICAL ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # 1D480 ; 0059 ; MA # ( 𝒀 → Y ) MATHEMATICAL BOLD ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # @@ -3963,7 +3969,6 @@ A4EC ; 0059 ; MA # ( ꓬ → Y ) LISU LETTER YA → LATIN CAPITAL LETTER Y # 16F43 ; 0059 ; MA # ( 𖽃 → Y ) MIAO LETTER AH → LATIN CAPITAL LETTER Y # 118A4 ; 0059 ; MA # ( 𑢤 → Y ) WARANG CITI CAPITAL LETTER YA → LATIN CAPITAL LETTER Y # 102B2 ; 0059 ; MA # ( 𐊲 → Y ) CARIAN LETTER U → LATIN CAPITAL LETTER Y # -1CCEE ; 0059 ; MA #* ( → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 01B4 ; 0079 0314 ; MA # ( ƴ → y̔ ) LATIN SMALL LETTER Y WITH HOOK → LATIN SMALL LETTER Y, COMBINING REVERSED COMMA ABOVE # @@ -4002,6 +4007,7 @@ AB93 ; 007A ; MA # ( ꮓ → z ) CHEROKEE SMALL LETTER NO → LATIN SMALL LETTER FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # →Ζ→ 2124 ; 005A ; MA # ( ℤ → Z ) DOUBLE-STRUCK CAPITAL Z → LATIN CAPITAL LETTER Z # 2128 ; 005A ; MA # ( ℨ → Z ) BLACK-LETTER CAPITAL Z → LATIN CAPITAL LETTER Z # +1CCEF ; 005A ; MA #* ( → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 1D419 ; 005A ; MA # ( 𝐙 → Z ) MATHEMATICAL BOLD CAPITAL Z → LATIN CAPITAL LETTER Z # 1D44D ; 005A ; MA # ( 𝑍 → Z ) MATHEMATICAL ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # 1D481 ; 005A ; MA # ( 𝒁 → Z ) MATHEMATICAL BOLD ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # @@ -4022,7 +4028,6 @@ FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPI 13C3 ; 005A ; MA # ( Ꮓ → Z ) CHEROKEE LETTER NO → LATIN CAPITAL LETTER Z # A4DC ; 005A ; MA # ( ꓜ → Z ) LISU LETTER DZA → LATIN CAPITAL LETTER Z # 118A9 ; 005A ; MA # ( 𑢩 → Z ) WARANG CITI CAPITAL LETTER O → LATIN CAPITAL LETTER Z # -1CCEF ; 005A ; MA #* ( → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 0290 ; 007A 0328 ; MA # ( ʐ → z̨ ) LATIN SMALL LETTER Z WITH RETROFLEX HOOK → LATIN SMALL LETTER Z, COMBINING OGONEK # →z̢→ @@ -4115,6 +4120,7 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2CE4 ; 03D7 ; MA # ( ⳤ → ϗ ) COPTIC SYMBOL KAI → GREEK KAI SYMBOL # +A7DB ; 03BB ; MA # ( → λ ) LATIN SMALL LETTER LAMBDA → GREEK SMALL LETTER LAMDA # 1D6CC ; 03BB ; MA # ( 𝛌 → λ ) MATHEMATICAL BOLD SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D706 ; 03BB ; MA # ( 𝜆 → λ ) MATHEMATICAL ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D740 ; 03BB ; MA # ( 𝝀 → λ ) MATHEMATICAL BOLD ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # @@ -4123,6 +4129,8 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2C96 ; 03BB ; MA # ( Ⲗ → λ ) COPTIC CAPITAL LETTER LAULA → GREEK SMALL LETTER LAMDA # 104DB ; 03BB ; MA # ( 𐓛 → λ ) OSAGE SMALL LETTER AH → GREEK SMALL LETTER LAMDA # +019B ; 03BB 0338 ; MA # ( ƛ → λ̸ ) LATIN SMALL LETTER LAMBDA WITH STROKE → GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ + 00B5 ; 03BC ; MA # ( µ → μ ) MICRO SIGN → GREEK SMALL LETTER MU # 1D6CD ; 03BC ; MA # ( 𝛍 → μ ) MATHEMATICAL BOLD SMALL MU → GREEK SMALL LETTER MU # 1D707 ; 03BC ; MA # ( 𝜇 → μ ) MATHEMATICAL ITALIC SMALL MU → GREEK SMALL LETTER MU # @@ -5982,6 +5990,8 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 11CB2 ; 11CAA ; MA # ( 𑲲 → 𑲪 ) MARCHEN VOWEL SIGN U → MARCHEN SUBJOINED LETTER RA # +1734 ; 1715 ; MA # ( ᜴ → ᜕ ) HANUNOO SIGN PAMUDPOD → TAGALOG SIGN PAMUDPOD # + 1081 ; 1002 103E ; MA # ( ႁ → ဂှ ) MYANMAR LETTER SHAN HA → MYANMAR LETTER GA, MYANMAR CONSONANT SIGN MEDIAL HA # 1000 ; 1002 102C ; MA # ( က → ဂာ ) MYANMAR LETTER KA → MYANMAR LETTER GA, MYANMAR VOWEL SIGN AA # @@ -7221,6 +7231,7 @@ FA31 ; 50E7 ; MA # ( 僧 → 僧 ) CJK COMPATIBILITY IDEOGRAPH-FA31 → CJK UNIF 2F80C ; 349E ; MA # ( 㒞 → 㒞 ) CJK COMPATIBILITY IDEOGRAPH-2F80C → CJK UNIFIED IDEOGRAPH-349E # +3126 ; 513F ; MA # ( ㄦ → 儿 ) BOPOMOFO LETTER ER → CJK UNIFIED IDEOGRAPH-513F # 2F09 ; 513F ; MA #* ( ⼉ → 儿 ) KANGXI RADICAL LEGS → CJK UNIFIED IDEOGRAPH-513F # FA0C ; 5140 ; MA # ( 兀 → 兀 ) CJK COMPATIBILITY IDEOGRAPH-FA0C → CJK UNIFIED IDEOGRAPH-5140 # @@ -7936,16 +7947,16 @@ FA8D ; 63C4 ; MA # ( 揄 → 揄 ) CJK COMPATIBILITY IDEOGRAPH-FA8D → CJK UNIF 2F8BD ; 63E4 ; MA # ( 揤 → 揤 ) CJK COMPATIBILITY IDEOGRAPH-2F8BD → CJK UNIFIED IDEOGRAPH-63E4 # -FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # +FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # 2F8BE ; 22BF1 ; MA # ( 𢯱 → 𢯱 ) CJK COMPATIBILITY IDEOGRAPH-2F8BE → CJK UNIFIED IDEOGRAPH-22BF1 # -FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # - 2F8BF ; 6422 ; MA # ( 搢 → 搢 ) CJK COMPATIBILITY IDEOGRAPH-2F8BF → CJK UNIFIED IDEOGRAPH-6422 # 2F8C0 ; 63C5 ; MA # ( 揅 → 揅 ) CJK COMPATIBILITY IDEOGRAPH-2F8C0 → CJK UNIFIED IDEOGRAPH-63C5 # +FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # + 2F8C3 ; 6469 ; MA # ( 摩 → 摩 ) CJK COMPATIBILITY IDEOGRAPH-2F8C3 → CJK UNIFIED IDEOGRAPH-6469 # 2F8C6 ; 6477 ; MA # ( 摷 → 摷 ) CJK COMPATIBILITY IDEOGRAPH-2F8C6 → CJK UNIFIED IDEOGRAPH-6477 # @@ -9670,5 +9681,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF 2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 # -# total: 6347 +# total: 6355 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index a35691149..093100c4b 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,7 +91,7 @@ ← ( ʽ ) 02BD MODIFIER LETTER REVERSED COMMA # →‘→ ← ( ʾ ) 02BE MODIFIER LETTER RIGHT HALF RING # →ʼ→→′→ ← ( ˈ ) 02C8 MODIFIER LETTER VERTICAL LINE -← ( ˊ ) 02CA MODIFIER LETTER ACUTE ACCENT # →ʹ→→′→ +← ( ˊ ) 02CA MODIFIER LETTER ACUTE ACCENT # →΄→→ʹ→ ← ( ˋ ) 02CB MODIFIER LETTER GRAVE ACCENT # →`→→‘→ ← ( ߴ ) 07F4 NKO HIGH TONE APOSTROPHE # →’→ ← ( ߵ ) 07F5 NKO LOW TONE APOSTROPHE # →‘→ @@ -4925,8 +4925,10 @@ ← ( Ϸ ) 03F7 GREEK CAPITAL LETTER SHO ← ( 𐓄 ) 104C4 OSAGE CAPITAL LETTER PA -# ß β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 +# ß Ꟗ ẞ β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 ( ß ) 00DF LATIN SMALL LETTER SHARP S +← ( Ꟗ ) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← ( ẞ ) 1E9E LATIN CAPITAL LETTER SHARP S ← ( β ) 03B2 GREEK SMALL LETTER BETA ← ( Ᏸ ) 13F0 CHEROKEE LETTER YE # →β→ ← ( ꞵ ) A7B5 LATIN SMALL LETTER BETA # →β→ @@ -5095,6 +5097,11 @@ ← ( 𝈡 ) 1D221 GREEK INSTRUMENTAL NOTATION SYMBOL-7 ← ( ℇ ) 2107 EULER CONSTANT +# λ̸ λ̷ ƛ + ( ƛ ) 019B LATIN SMALL LETTER LAMBDA WITH STROKE +← ( λ̸ ) 03BB 0338 GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ +← ( λ̷ ) 03BB 0337 GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY + # ƨ ᴤ ϩ ꙅ ( ƨ ) 01A8 LATIN SMALL LETTER TONE TWO ← ( ᴤ ) 1D24 LATIN LETTER VOICED LARYNGEAL SPIRANT @@ -5165,8 +5172,9 @@ ( ɂ ) 0242 LATIN SMALL LETTER GLOTTAL STOP ← ( ꭾ ) AB7E CHEROKEE SMALL LETTER HE -# Ʌ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 +# Ʌ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 ( Ʌ ) 0245 LATIN CAPITAL LETTER TURNED V +← ( ) A7DA LATIN CAPITAL LETTER LAMBDA # →Λ→ ← ( ٨ ) 0668 ARABIC-INDIC DIGIT EIGHT # →Λ→ ← ( ۸ ) 06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT # →٨→→Λ→ ← ( Λ ) 039B GREEK CAPITAL LETTER LAMDA @@ -5197,6 +5205,13 @@ ← ( Л̡ ) 041B 0321 CYRILLIC CAPITAL LETTER EL, COMBINING PALATALIZED HOOK BELOW ← ( Ӆ ) 04C5 CYRILLIC CAPITAL LETTER EL WITH TAIL # →Л̡→ +# ̸ Ʌ̸ Λ̸ Λ̷ + ( Ʌ̸ ) 0245 0338 LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY +← ( ̸ ) A7DA 0338 LATIN CAPITAL LETTER LAMBDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← ( Λ̸ ) 039B 0338 GREEK CAPITAL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← ( Λ̷ ) 039B 0337 GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY +← ( ) A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE # →Λ̷→ + # ɋ ᶐ ( ɋ ) 024B LATIN SMALL LETTER Q WITH HOOK TAIL ← ( ᶐ ) 1D90 LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK @@ -5868,8 +5883,9 @@ ← ( 𝝵 ) 1D775 MATHEMATICAL SANS-SERIF BOLD SMALL ZETA ← ( 𝞯 ) 1D7AF MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ZETA -# λ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 +# λ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 ( λ ) 03BB GREEK SMALL LETTER LAMDA +← ( ) A7DB LATIN SMALL LETTER LAMBDA ← ( Ⲗ ) 2C96 COPTIC CAPITAL LETTER LAULA ← ( 𐓛 ) 104DB OSAGE SMALL LETTER AH ← ( 𝛌 ) 1D6CC MATHEMATICAL BOLD SMALL LAMDA @@ -11373,6 +11389,10 @@ ( ᛯ ) 16EF RUNIC TVIMADUR SYMBOL ← ( ⵣ ) 2D63 TIFINAGH LETTER YAZ +# ᜕ ᜴ + ( ᜕ ) 1715 TAGALOG SIGN PAMUDPOD +← ( ᜴ ) 1734 HANUNOO SIGN PAMUDPOD + # អ ឣ ( អ ) 17A2 KHMER LETTER QA ← ( ឣ ) 17A3 KHMER INDEPENDENT VOWEL QAQ @@ -12322,9 +12342,10 @@ ( ⼈ ) 2F08 KANGXI RADICAL MAN ← ( 人 ) 4EBA CJK UNIFIED IDEOGRAPH-4EBA -# 儿 ⼉ +# 儿 ㄦ ⼉ ( ⼉ ) 2F09 KANGXI RADICAL LEGS ← ( 儿 ) 513F CJK UNIFIED IDEOGRAPH-513F +← ( ㄦ ) 3126 BOPOMOFO LETTER ER # →儿→ # 入 ⼊ ( ⼊ ) 2F0A KANGXI RADICAL ENTER @@ -17245,5 +17266,5 @@ ( 𪘀 ) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← ( 𪘀 ) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7290 +# total : 7302 diff --git a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt index 5435b8bff..6bd3611df 100644 --- a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt +++ b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt @@ -1,5 +1,5 @@ # confusablesSummaryIdentifier.txt -# Date: 2024-05-04, 21:31:06 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -542,8 +542,10 @@ ( Ö ) 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS ← ( Ő ) 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -# ß β +# ß Ꟗ ẞ β ( ß ) 00DF LATIN SMALL LETTER SHARP S +← ( Ꟗ ) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← ( ẞ ) 1E9E LATIN CAPITAL LETTER SHARP S ← ( β ) 03B2 GREEK SMALL LETTER BETA # å ȧ @@ -1618,6 +1620,10 @@ ( 二 ) 4E8C CJK UNIFIED IDEOGRAPH-4E8C ← ( ニ ) 30CB KATAKANA LETTER NI +# 儿 ㄦ + ( 儿 ) 513F CJK UNIFIED IDEOGRAPH-513F +← ( ㄦ ) 3126 BOPOMOFO LETTER ER + # 八 ハ ( 八 ) 516B CJK UNIFIED IDEOGRAPH-516B ← ( ハ ) 30CF KATAKANA LETTER HA @@ -1839,5 +1845,5 @@ ( 鹂 ) 9E42 CJK UNIFIED IDEOGRAPH-9E42 ← ( 鹃 ) 9E43 CJK UNIFIED IDEOGRAPH-9E43 -# total : 635 +# total : 638 diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 65804747e..e4c47a779 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -1,4 +1,17 @@ -0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +# See https://github.com/unicode-org/unicodetools/blob/main/docs/security.md for how to use this file. +# The format is +# Source ; Target ; comments # comments +# Source is: +# - a hex code point +# - a literal character +# - a range of the above with .. (need to check this) +# - a UnicodeSet +# Target is: +# - a hex code point +# - a literal character +# - a sequence of hex code points and or literal characters (they can be mixed) +####### +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK 0022 ; 02BA # ( " → ʺ) QUOTATION MARK → MODIFIER LETTER DOUBLE PRIME 0022 ; 0027 0027 0022 ; 05F4 # ( " → ״) QUOTATION MARK → HEBREW PUNCTUATION GERSHAYIM @@ -5437,4 +5450,26 @@ ABBB; 0473; V8_0; ꮻ => ѳ; CHEROKEE SMALL LETTER WI => CYRILLIC SMALL LETTER F 1F16E ; C 20E0 ; V11_0 ; CIRCLED C WITH OVERLAID BACKSLASH # 1F16F ; 🚹 ; V11_0 ; CIRCLED HUMAN FIGURE +# 178-A76 — Section 21 of document L2/24-012 +513F ; 儿 # V16.0 ; U+513F ︎➡︎ U+16FF2 +16FF3 ; 兒 # V16.0 ; U+5152 ➡ U+16FF3 +ㄦ ; 儿 # V16.0 ; U+3126 ㄦ BOPOMOFO LETTER ER ➡ 儿 + +# 176-A116 — Section 2a of L2/23-164 + +A7DA ; Λ # V16.0 ; U+A7DA LATIN CAPITAL LETTER LAMBDA ➡ greek equiv +A7DB ; λ # V16.0 ; U+A7DB LATIN SMALL LETTER LAMBDA ➡ greek equiv +A7DC ; Λ 0337 # V16.0 ; U+A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE ➡ greek equiv +ƛ ; λ 0337 # V16.0 ; existing Latin variant + +# 165-A37 — L2/20-272 + +1715 ; 1734 # V16.0 ; U+1715 TAGALOG SIGN PAMUDPOD ➡ 1734, Hanunoo Sign Pamudpod + +# 166-A55 — Section 3n of L2/21-016R + +ß ; β # sharp S with beta +ẞ ; ß # sharp S upper/lower +A7D6 ; β # Middle Scots S, uppercase +A7D6 ; β # Middle Scots S, lowercase diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index b7475ec78..216c1689e 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2024-05-03, 03:26:38 GMT +# Date: 2024-05-31, 21:12:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -899,6 +899,9 @@ 00DE ; 104C4 # ( Þ ~ 𐓄 ) LATIN CAPITAL LETTER THORN ~ OSAGE CAPITAL LETTER PA +00DF ; 1E9E # ( ß ~ ẞ ) LATIN SMALL LETTER SHARP S ~ LATIN CAPITAL LETTER SHARP S +00DF ; 03B2 # ( ß ~ β ) LATIN SMALL LETTER SHARP S ~ GREEK SMALL LETTER BETA + 00E5 ; 0227 # ( å ~ ȧ ) LATIN SMALL LETTER A WITH RING ABOVE ~ LATIN SMALL LETTER A WITH DOT ABOVE 00F0 ; 1E8CD # ( ð ~ 𞣍 ) LATIN SMALL LETTER ETH ~ MENDE KIKAKUI DIGIT SEVEN @@ -1295,6 +1298,8 @@ 039B ; A6CE # ( Λ ~ ꛎ ) GREEK CAPITAL LETTER LAMDA ~ BAMUM LETTER MI 039B ; 1028D # ( Λ ~ 𐊍 ) GREEK CAPITAL LETTER LAMDA ~ LYCIAN LETTER L +039B 0337 ; A7DC # ( Λ̷ ~ ) GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN CAPITAL LETTER LAMBDA WITH STROKE + 039C ; 041C # ( Μ ~ М ) GREEK CAPITAL LETTER MU ~ CYRILLIC CAPITAL LETTER EM 039C ; 216F # ( Μ ~ Ⅿ ) GREEK CAPITAL LETTER MU ~ ROMAN NUMERAL ONE THOUSAND @@ -1354,8 +1359,11 @@ 03BA ; 043A # ( κ ~ к ) GREEK SMALL LETTER KAPPA ~ CYRILLIC SMALL LETTER KA +03BB ; A7DB # ( λ ~ ) GREEK SMALL LETTER LAMDA ~ LATIN SMALL LETTER LAMBDA 03BB ; 104DB # ( λ ~ 𐓛 ) GREEK SMALL LETTER LAMDA ~ OSAGE SMALL LETTER AH +03BB 0337 ; 019B # ( λ̷ ~ ƛ ) GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN SMALL LETTER LAMBDA WITH STROKE + 03BD ; 2174 # ( ν ~ ⅴ ) GREEK SMALL LETTER NU ~ SMALL ROMAN NUMERAL FIVE 03BF ; 043E # ( ο ~ о ) GREEK SMALL LETTER OMICRON ~ CYRILLIC SMALL LETTER O @@ -3599,6 +3607,8 @@ 16EF ; 2D63 # ( ᛯ ~ ⵣ ) RUNIC TVIMADUR SYMBOL ~ TIFINAGH LETTER YAZ +1715 ; 1734 # ( ᜕ ~ ᜴ ) TAGALOG SIGN PAMUDPOD ~ HANUNOO SIGN PAMUDPOD + 17A2 ; 17A3 # ( អ ~ ឣ ) KHMER LETTER QA ~ KHMER INDEPENDENT VOWEL QAQ 185C ; 1896 # ( ᡜ ~ ᢖ ) MONGOLIAN LETTER TODO DZA ~ MONGOLIAN LETTER ALI GALI ZA @@ -4030,6 +4040,8 @@ 5024 ; 503C # ( 値 ~ 值 ) CJK UNIFIED IDEOGRAPH-5024 ~ CJK UNIFIED IDEOGRAPH-503C +513F ; 3126 # ( 儿 ~ ㄦ ) CJK UNIFIED IDEOGRAPH-513F ~ BOPOMOFO LETTER ER + 5553 ; 555F # ( 啓 ~ 啟 ) CJK UNIFIED IDEOGRAPH-5553 ~ CJK UNIFIED IDEOGRAPH-555F 5861 ; 586B # ( 塡 ~ 填 ) CJK UNIFIED IDEOGRAPH-5861 ~ CJK UNIFIED IDEOGRAPH-586B @@ -4158,6 +4170,10 @@ A792 ; 0404 # ( Ꞓ ~ Є ) LATIN CAPITAL LETTER C WITH BAR ~ CYRILLIC CAPITAL LE A793 ; 0454 # ( ꞓ ~ є ) LATIN SMALL LETTER C WITH BAR ~ CYRILLIC SMALL LETTER UKRAINIAN IE +A7D6 ; 03B2 # ( Ꟗ ~ β ) LATIN CAPITAL LETTER MIDDLE SCOTS S ~ GREEK SMALL LETTER BETA + +A7DA ; 039B # ( ~ Λ ) LATIN CAPITAL LETTER LAMBDA ~ GREEK CAPITAL LETTER LAMDA + A7FB ; 15B7 # ( ꟻ ~ ᖷ ) LATIN EPIGRAPHIC LETTER REVERSED F ~ CANADIAN SYLLABICS BLACKFOOT WA A7FB ; 1D230 # ( ꟻ ~ 𝈰 ) LATIN EPIGRAPHIC LETTER REVERSED F ~ GREEK INSTRUMENTAL NOTATION SYMBOL-30 diff --git a/unicodetools/data/uca/dev/CollationTest.html b/unicodetools/data/uca/dev/CollationTest.html index f204d01c4..82e72214a 100644 --- a/unicodetools/data/uca/dev/CollationTest.html +++ b/unicodetools/data/uca/dev/CollationTest.html @@ -91,7 +91,7 @@
If there are any errors, then the UCA implementation is not compliant.
These files contain test cases that include ill-formed strings, with surrogate code points. Implementations that do not weight surrogate code points the same way as reserved code points - may filter out such lines lines in the test cases, before testing for conformance.
+ may filter out such lines in the test cases, before testing for conformance.© COPY_YEAR Unicode, Inc. All Rights Reserved.
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
index 96274a852..4de1b642a 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE.txt
@@ -1,5 +1,5 @@
# CollationTest_NON_IGNORABLE.txt
-# Date: 2024-05-02, 01:46:26 GMT
+# Date: 2024-06-05, 18:49:37 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -677,8 +677,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |]
10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 |]
0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |]
10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 |]
-10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |]
-10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |]
+0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |]
+10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |]
+0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |]
+10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 |]
0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |]
10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 |]
0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |]
@@ -693,6 +695,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 |]
10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 |]
0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |]
1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 |]
+0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |]
1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 |]
0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |]
0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 |]
@@ -840,6 +843,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 |
10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 |]
0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |]
10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 |]
+0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |]
1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 |]
0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |]
3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 |]
@@ -1022,7 +1026,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 |]
089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 |]
0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |]
10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 |]
-0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |]
+0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |]
+0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 |]
0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |]
0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 |]
0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 |]
@@ -1167,7 +1172,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 |]
1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 |]
0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |]
A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |]
-10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |]
+0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |]
+10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 |]
0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |]
110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 |]
0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 |]
@@ -1218,7 +1224,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 |]
10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 |]
0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |]
10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 |]
-10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |]
+0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |]
+10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 |]
0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |]
0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 |]
0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 |]
@@ -1594,11 +1601,11 @@ FE63 0062; # (﹣) SMALL HYPHEN-MINUS [020D 239A | 0020 0020 | 000F 0002 |]
1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0002 |]
1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 2380 | 0020 0020 | 0002 0008 |]
1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [0212 239A | 0020 0020 | 0002 0002 |]
-10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |]
-10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |]
-10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |]
-10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |]
-10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |]
+10D6E 0021; # () GARAY HYPHEN [0213 0269 | 0020 0020 | 0002 0002 |]
+10D6E 003F; # () GARAY HYPHEN [0213 0270 | 0020 0020 | 0002 0002 |]
+10D6E 0061; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0002 |]
+10D6E 0041; # () GARAY HYPHEN [0213 2380 | 0020 0020 | 0002 0008 |]
+10D6E 0062; # () GARAY HYPHEN [0213 239A | 0020 0020 | 0002 0002 |]
2010 0021; # (‐) HYPHEN [0214 0269 | 0020 0020 | 0002 0002 |]
2011 0021; # (‑) NON-BREAKING HYPHEN [0214 0269 | 0020 0020 | 001B 0002 |]
2010 003F; # (‐) HYPHEN [0214 0270 | 0020 0020 | 0002 0002 |]
@@ -3184,8 +3191,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [0269 | 0033 0020 | 0002 0002 |]
10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0269 | 0033 0020 | 0002 0002 |]
10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [0269 | 0033 0020 | 0002 0002 |]
10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0269 | 0033 0020 | 0002 0002 |]
-10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |]
-10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |]
+10D6B 0021; # () GARAY COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |]
+10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [0269 | 0033 0020 | 0002 0002 |]
10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0269 | 0033 0020 | 0002 0002 |]
10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0269 | 0033 0020 | 0002 0002 |]
10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0269 | 0033 0020 | 0002 0002 |]
@@ -3410,7 +3417,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00
089F 0021; # (࢟) ARABIC HALF MADDA OVER MADDA [0269 | 0082 0020 | 0002 0002 |]
0AFC 0021; # (ૼ) GUJARATI SIGN MADDAH [0269 | 0082 0020 | 0002 0002 |]
10EAC 0021; # (𐺬) YEZIDI COMBINING MADDA MARK [0269 | 0082 0020 | 0002 0002 |]
-0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |]
+0897 0021; # () ARABIC PEPET [0269 | 0082 0020 | 0004 0002 |]
0654 0021; # (ٔ) ARABIC HAMZA ABOVE [0269 | 0083 0020 | 0002 0002 |]
10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [0269 | 0083 0020 | 0002 0002 |]
0655 0021; # (ٕ) ARABIC HAMZA BELOW [0269 | 0084 0020 | 0002 0002 |]
@@ -3434,7 +3441,7 @@ FE7E 0021; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0269 | 0081 0020 | 00
08F9 0021; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0269 | 0096 0020 | 0002 0002 |]
08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0269 | 0097 0020 | 0002 0002 |]
0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0269 | 0098 0020 | 0002 0002 |]
-10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |]
+10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [0269 | 0098 0020 | 0002 0002 |]
0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0269 | 0099 0020 | 0002 0002 |]
0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [0269 | 009A 0020 | 0002 0002 |]
0731 0021; # (ܱ) SYRIAC PTHAHA BELOW [0269 | 009B 0020 | 0002 0002 |]
@@ -3492,7 +3499,7 @@ A6F1 0021; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0269 | 00B7 0020 | 0002 0002
1BE6 0021; # (᯦) BATAK SIGN TOMPI [0269 | 00C2 0020 | 0002 0002 |]
1C37 0021; # (᰷) LEPCHA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |]
A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [0269 | 00C2 0020 | 0002 0002 |]
-10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |]
+10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [0269 | 00C2 0020 | 0002 0002 |]
110BA 0021; # (𑂺) KAITHI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |]
11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |]
111CA 0021; # (𑇊) SHARADA SIGN NUKTA [0269 | 00C2 0020 | 0002 0002 |]
@@ -3647,7 +3654,7 @@ ABEC 0021; # (꯬) MEETEI MAYEK LUM IYEK [0269 | 00CC 0020 | 0002 0002 |]
111CB 0021; # (𑇋) SHARADA VOWEL MODIFIER MARK [0269 | 00D0 0020 | 0002 0002 |]
111CC 0021; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0269 | 00D1 0020 | 0002 0002 |]
11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [0269 | 00D2 0020 | 0002 0002 |]
-10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |]
+10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [0269 | 00D3 0020 | 0002 0002 |]
113D2 0021; # () TULU-TIGALARI GEMINATION MARK [0269 | 00D4 0020 | 0002 0002 |]
0E4E 0021; # (๎) THAI CHARACTER YAMAKKAN [0269 | 00D5 0020 | 0002 0002 |]
0E47 0021; # (็) THAI CHARACTER MAITAIKHU [0269 | 00D6 0020 | 0002 0002 |]
@@ -4855,8 +4862,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [0270 | 0033 0020 | 0002 0002 |]
10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [0270 | 0033 0020 | 0002 0002 |]
10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [0270 | 0033 0020 | 0002 0002 |]
10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [0270 | 0033 0020 | 0002 0002 |]
-10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |]
-10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |]
+10D6B 003F; # () GARAY COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |]
+10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [0270 | 0033 0020 | 0002 0002 |]
10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [0270 | 0033 0020 | 0002 0002 |]
10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [0270 | 0033 0020 | 0002 0002 |]
10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [0270 | 0033 0020 | 0002 0002 |]
@@ -5081,7 +5088,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00
089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [0270 | 0082 0020 | 0002 0002 |]
0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [0270 | 0082 0020 | 0002 0002 |]
10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [0270 | 0082 0020 | 0002 0002 |]
-0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |]
+0897 003F; # () ARABIC PEPET [0270 | 0082 0020 | 0004 0002 |]
0654 003F; # (ٔ) ARABIC HAMZA ABOVE [0270 | 0083 0020 | 0002 0002 |]
10EAB 003F; # (𐺫) YEZIDI COMBINING HAMZA MARK [0270 | 0083 0020 | 0002 0002 |]
0655 003F; # (ٕ) ARABIC HAMZA BELOW [0270 | 0084 0020 | 0002 0002 |]
@@ -5105,7 +5112,7 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [0270 | 0081 0020 | 00
08F9 003F; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [0270 | 0096 0020 | 0002 0002 |]
08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [0270 | 0097 0020 | 0002 0002 |]
0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [0270 | 0098 0020 | 0002 0002 |]
-10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |]
+10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [0270 | 0098 0020 | 0002 0002 |]
0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [0270 | 0099 0020 | 0002 0002 |]
0730 003F; # (ܰ) SYRIAC PTHAHA ABOVE [0270 | 009A 0020 | 0002 0002 |]
0731 003F; # (ܱ) SYRIAC PTHAHA BELOW [0270 | 009B 0020 | 0002 0002 |]
@@ -5163,7 +5170,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [0270 | 00B7 0020 | 0002 0002
1BE6 003F; # (᯦) BATAK SIGN TOMPI [0270 | 00C2 0020 | 0002 0002 |]
1C37 003F; # (᰷) LEPCHA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |]
A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [0270 | 00C2 0020 | 0002 0002 |]
-10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |]
+10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [0270 | 00C2 0020 | 0002 0002 |]
110BA 003F; # (𑂺) KAITHI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |]
11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |]
111CA 003F; # (𑇊) SHARADA SIGN NUKTA [0270 | 00C2 0020 | 0002 0002 |]
@@ -5318,7 +5325,7 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [0270 | 00CC 0020 | 0002 0002 |]
111CB 003F; # (𑇋) SHARADA VOWEL MODIFIER MARK [0270 | 00D0 0020 | 0002 0002 |]
111CC 003F; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [0270 | 00D1 0020 | 0002 0002 |]
11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [0270 | 00D2 0020 | 0002 0002 |]
-10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |]
+10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [0270 | 00D3 0020 | 0002 0002 |]
113D2 003F; # () TULU-TIGALARI GEMINATION MARK [0270 | 00D4 0020 | 0002 0002 |]
0E4E 003F; # (๎) THAI CHARACTER YAMAKKAN [0270 | 00D5 0020 | 0002 0002 |]
0E47 003F; # (็) THAI CHARACTER MAITAIKHU [0270 | 00D6 0020 | 0002 0002 |]
@@ -58942,8 +58949,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 |]
10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 |]
10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 |]
10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 |]
-10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |]
-10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |]
+10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |]
+10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 |]
10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 |]
10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 |]
10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 |]
@@ -59029,8 +59036,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 |]
10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 |]
10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 |]
10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 |]
-10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |]
-10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |]
+10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |]
+10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 |]
10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 |]
10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 |]
10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 |]
@@ -59476,8 +59483,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00
089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 |]
0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 |]
10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 |]
-0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |]
-0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |]
+0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 |]
+0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 |]
0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 |]
10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 |]
0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 |]
@@ -59523,9 +59530,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00
08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 |]
08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 |]
0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 |]
-10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |]
+10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 |]
0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 |]
-10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |]
+10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 |]
0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 |]
0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 |]
0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 |]
@@ -59626,7 +59633,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008
1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 |]
1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |]
A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |]
-10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |]
+10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 |]
110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |]
11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |]
111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 |]
@@ -59657,7 +59664,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 |]
1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 |]
1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |]
A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 |]
-10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |]
+10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 |]
110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |]
11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |]
111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 |]
@@ -59950,8 +59957,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 |]
111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 |]
11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 |]
11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 |]
-10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |]
-10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |]
+10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 |]
+10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 |]
113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 |]
113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 |]
0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 |]
@@ -62026,8 +62033,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 |]
10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 |]
10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 |]
10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 |]
-10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |]
-10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |]
+10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |]
+10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 |]
10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 |]
10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 |]
10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 |]
@@ -62254,7 +62261,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00
089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 |]
0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 |]
10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 |]
-0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |]
+0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 |]
0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 |]
10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 |]
0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 |]
@@ -62278,7 +62285,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00
08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 |]
08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 |]
0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 |]
-10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |]
+10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 |]
0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 |]
0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 |]
0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 |]
@@ -62336,7 +62343,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002
1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 |]
1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |]
A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 |]
-10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |]
+10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 |]
110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |]
11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |]
111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 |]
@@ -62491,7 +62498,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 |]
111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 |]
111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 |]
11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 |]
-10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |]
+10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 |]
113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 |]
0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 |]
0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 |]
@@ -102854,7 +102861,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |]
113C8 0041; # () TULU-TIGALARI VOWEL SIGN AU [3329 2380 | 0020 0020 | 0002 0008 |]
113C2 113C9 0062; # () TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI AU LENGTH MARK [3329 239A | 0020 0020 | 0002 0002 |]
113C8 0062; # () TULU-TIGALARI VOWEL SIGN AU [3329 239A | 0020 0020 | 0002 0002 |]
+0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |]
113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 |]
+0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |]
113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 |]
113CE 0021; # () TULU-TIGALARI SIGN VIRAMA [332A 0269 | 0020 0020 | 0002 0002 |]
113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 0269 | 0020 0020 | 0004 0002 |]
@@ -102866,6 +102875,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 |]
113CF 0041; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 2380 | 0020 0020 | 0004 0008 |]
113CE 0062; # () TULU-TIGALARI SIGN VIRAMA [332A 239A | 0020 0020 | 0002 0002 |]
113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 |]
+0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |]
113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 |]
113D0 0021; # () TULU-TIGALARI CONJOINER [332B 0269 | 0020 0020 | 0002 0002 |]
113D0 003F; # () TULU-TIGALARI CONJOINER [332B 0270 | 0020 0020 | 0002 0002 |]
@@ -136655,12 +136665,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 |]
10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 |]
10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 |]
10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 |]
-10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |]
-10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |]
-10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |]
-10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |]
-10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |]
-10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |]
+0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |]
+10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 |]
+10D69 0021; # () GARAY VOWEL SIGN E [4647 0269 | 0020 0020 | 0002 0002 |]
+10D69 003F; # () GARAY VOWEL SIGN E [4647 0270 | 0020 0020 | 0002 0002 |]
+10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 |]
+10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 |]
+10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 |]
10D70 0021; # () GARAY SMALL LETTER A [4648 0269 | 0020 0020 | 0002 0002 |]
10D50 0021; # () GARAY CAPITAL LETTER A [4648 0269 | 0020 0020 | 0008 0002 |]
10D70 003F; # () GARAY SMALL LETTER A [4648 0270 | 0020 0020 | 0002 0002 |]
@@ -155814,6 +155825,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |]
105C8 0041; # () TODHRI LETTER DHA [5236 2380 | 0020 0020 | 0002 0008 |]
105C8 0062; # () TODHRI LETTER DHA [5236 239A | 0020 0020 | 0002 0002 |]
105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 |]
+105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 |]
+105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 |]
105C9 0021; # () TODHRI LETTER EI [5237 0269 | 0020 0020 | 0002 0002 |]
105D2 0307 0021; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 0269 | 0020 0020 | 0002 0002 |]
105C9 003F; # () TODHRI LETTER EI [5237 0270 | 0020 0020 | 0002 0002 |]
@@ -155958,6 +155971,8 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |]
105E3 0061; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0002 |]
105E3 0041; # () TODHRI LETTER THA [5251 2380 | 0020 0020 | 0002 0008 |]
105E3 0062; # () TODHRI LETTER THA [5251 239A | 0020 0020 | 0002 0002 |]
+105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 |]
+105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 |]
105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 |]
105DA 0307 0021; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 0269 | 0020 0020 | 0002 0002 |]
105E4 0021; # () TODHRI LETTER U [5252 0269 | 0020 0020 | 0002 0002 |]
@@ -157213,6 +157228,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 |]
1612E 0061; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0002 |]
1612E 0041; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 2380 | 0020 0020 | 0002 0008 |]
1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 |]
+0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |]
1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 |]
1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 0269 | 0020 0020 | 0002 0002 |]
1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 0270 | 0020 0020 | 0002 0002 |]
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
index f236ee7be..d03e4ee81 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt
@@ -1,5 +1,5 @@
# CollationTest_NON_IGNORABLE_SHORT.txt
-# Date: 2024-05-02, 01:46:28 GMT
+# Date: 2024-06-05, 18:49:39 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -677,7 +677,9 @@ A67D 0334
10D26 0334
0334 10D27
10D27 0334
+0334 10D6B
10D6B 0334
+0334 10D6D
10D6D 0334
0334 10F48
10F48 0334
@@ -693,6 +695,7 @@ A67D 0334
10F84 0334
0334 1E2AE
1E2AE 0334
+0334 1E5EE
1E5EE 0334
0316 0334
0334 0316
@@ -840,6 +843,7 @@ FE27 0334
10F83 0334
0334 10F85
10F85 0334
+0334 1E5EF
1E5EF 0334
0334 3099
3099 0334
@@ -1022,6 +1026,7 @@ FB1E 0334
089F 0334
0334 10EAC
10EAC 0334
+0334 0897
0897 0334
0334 0654
0654 0334
@@ -1167,6 +1172,7 @@ A6F1 0334
1C37 0334
0334 A9B3
A9B3 0334
+0334 10D6C
10D6C 0334
0334 110BA
110BA 0334
@@ -1218,6 +1224,7 @@ A9B3 0334
10A38 0334
0334 10A3A
10A3A 0334
+0334 10D6A
10D6A 0334
0334 0E48
0E48 0334
@@ -102854,7 +102861,9 @@ A8C4 0062
113C8 0041
113C2 113C9 0062
113C8 0062
+0334 113CE
113CE 0334
+0334 113CF
113CF 0334
113CE 0021
113CF 0021
@@ -102866,6 +102875,7 @@ A8C4 0062
113CF 0041
113CE 0062
113CF 0062
+0334 113D0
113D0 0334
113D0 0021
113D0 003F
@@ -136655,6 +136665,7 @@ A6EF 0062
10D4F 0061
10D4F 0041
10D4F 0062
+0334 10D69
10D69 0334
10D69 0021
10D69 003F
@@ -155814,6 +155825,8 @@ A4F7 0062
105C8 0041
105C8 0062
105C9 0334
+105D2 0307 0334
+105D2 0334 0307
105C9 0021
105D2 0307 0021
105C9 003F
@@ -155958,6 +155971,8 @@ A4F7 0062
105E3 0061
105E3 0041
105E3 0062
+105DA 0307 0334
+105DA 0334 0307
105E4 0334
105DA 0307 0021
105E4 0021
@@ -157213,6 +157228,7 @@ A4F7 0062
1612E 0061
1612E 0041
1612E 0062
+0334 1612F
1612F 0334
1612F 0021
1612F 003F
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
index 8aeee7c00..5ba2ce5ae 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED.txt
@@ -1,5 +1,5 @@
# CollationTest_SHIFTED.txt
-# Date: 2024-05-02, 01:46:28 GMT
+# Date: 2024-06-05, 18:49:40 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -99,9 +99,9 @@ FF0D 003F; # (-) FULLWIDTH HYPHEN-MINUS [| | | 020D 0270 |]
1806 003F; # (᠆) MONGOLIAN TODO SOFT HYPHEN [| | | 0211 0270 |]
1807 0021; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0269 |]
1807 003F; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [| | | 0212 0270 |]
-10D6E 0334; # () GARAY HYPHEN [| | | 0213 |]
-10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |]
-10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |]
+10D6E 0334; # () GARAY HYPHEN [| | | 0213 |]
+10D6E 0021; # () GARAY HYPHEN [| | | 0213 0269 |]
+10D6E 003F; # () GARAY HYPHEN [| | | 0213 0270 |]
2010 0021; # (‐) HYPHEN [| | | 0214 0269 |]
2011 0021; # (‑) NON-BREAKING HYPHEN [| | | 0214 0269 |]
2010 003F; # (‐) HYPHEN [| | | 0214 0270 |]
@@ -23365,8 +23365,8 @@ A67D 0021; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0269 |]
10D25 0021; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0269 |]
10D26 0021; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0269 |]
10D27 0021; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0269 |]
-10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |]
-10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |]
+10D6B 0021; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |]
+10D6D 0021; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0269 |]
10F48 0021; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0269 |]
10F49 0021; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0269 |]
10F4A 0021; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0269 |]
@@ -23452,8 +23452,8 @@ A67D 003F; # (꙽) COMBINING CYRILLIC PAYEROK [| 0033 | 0002 | FFFF 0270 |]
10D25 003F; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [| 0033 | 0002 | FFFF 0270 |]
10D26 003F; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 0033 | 0002 | FFFF 0270 |]
10D27 003F; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 0033 | 0002 | FFFF 0270 |]
-10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |]
-10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |]
+10D6B 003F; # () GARAY COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |]
+10D6D 003F; # () GARAY CONSONANT NASALIZATION MARK [| 0033 | 0002 | FFFF 0270 |]
10F48 003F; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 0033 | 0002 | FFFF 0270 |]
10F49 003F; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 0033 | 0002 | FFFF 0270 |]
10F4A 003F; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [| 0033 | 0002 | FFFF 0270 |]
@@ -24941,8 +24941,10 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF
10D26 0334; # (𐴦) HANIFI ROHINGYA SIGN TANA [| 004A 0033 | 0002 0002 | FFFF FFFF |]
0334 10D27; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |]
10D27 0334; # (𐴧) HANIFI ROHINGYA SIGN TASSI [| 004A 0033 | 0002 0002 | FFFF FFFF |]
-10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
-10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |]
+0334 10D6B; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
+10D6B 0334; # () GARAY COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
+0334 10D6D; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |]
+10D6D 0334; # () GARAY CONSONANT NASALIZATION MARK [| 004A 0033 | 0002 0002 | FFFF FFFF |]
0334 10F48; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
10F48 0334; # (𐽈) SOGDIAN COMBINING DOT ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
0334 10F49; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
@@ -24957,6 +24959,7 @@ A67D 0334; # (꙽) COMBINING CYRILLIC PAYEROK [| 004A 0033 | 0002 0002 | FFFF FF
10F84 0334; # (𐾄) OLD UYGHUR COMBINING TWO DOTS ABOVE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
0334 1E2AE; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
1E2AE 0334; # (𞊮) TOTO SIGN RISING TONE [| 004A 0033 | 0002 0002 | FFFF FFFF |]
+0334 1E5EE; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |]
1E5EE 0334; # () OL ONAL SIGN MU [| 004A 0033 | 0002 0002 | FFFF FFFF |]
0316 0334; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |]
0334 0316; # (̖) COMBINING GRAVE ACCENT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |]
@@ -25104,6 +25107,7 @@ FE27 0334; # (︧) COMBINING LIGATURE LEFT HALF BELOW [| 004A 0034 | 0002 0002 |
10F83 0334; # (𐾃) OLD UYGHUR COMBINING DOT BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |]
0334 10F85; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |]
10F85 0334; # (𐾅) OLD UYGHUR COMBINING TWO DOTS BELOW [| 004A 0034 | 0002 0002 | FFFF FFFF |]
+0334 1E5EF; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |]
1E5EF 0334; # () OL ONAL SIGN IKIR [| 004A 0034 | 0002 0002 | FFFF FFFF |]
0334 3099; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |]
3099 0334; # (゙) COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK [| 004A 0037 | 0002 0002 | FFFF FFFF |]
@@ -25286,7 +25290,8 @@ FB1E 0334; # (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA [| 004A 0061 | 0002 0002 |
089F 0334; # (࢟) ARABIC HALF MADDA OVER MADDA [| 004A 0082 | 0002 0002 | FFFF FFFF |]
0334 10EAC; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |]
10EAC 0334; # (𐺬) YEZIDI COMBINING MADDA MARK [| 004A 0082 | 0002 0002 | FFFF FFFF |]
-0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |]
+0334 0897; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |]
+0897 0334; # () ARABIC PEPET [| 004A 0082 | 0002 0004 | FFFF FFFF |]
0334 0654; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |]
0654 0334; # (ٔ) ARABIC HAMZA ABOVE [| 004A 0083 | 0002 0002 | FFFF FFFF |]
0334 10EAB; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 004A 0083 | 0002 0002 | FFFF FFFF |]
@@ -25431,7 +25436,8 @@ A6F1 0334; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 004A 00B7 | 0002 0002 | FFF
1C37 0334; # (᰷) LEPCHA SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
0334 A9B3; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
-10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
+0334 10D6C; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
+10D6C 0334; # () GARAY COMBINING DOUBLE DOT ABOVE [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
0334 110BA; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
110BA 0334; # (𑂺) KAITHI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
0334 11173; # (𑅳) MAHAJANI SIGN NUKTA [| 004A 00C2 | 0002 0002 | FFFF FFFF |]
@@ -25482,7 +25488,8 @@ A9B3 0334; # (꦳) JAVANESE SIGN CECAK TELU [| 004A 00C2 | 0002 0002 | FFFF FFFF
10A38 0334; # (𐨸) KHAROSHTHI SIGN BAR ABOVE [| 004A 00CD | 0002 0002 | FFFF FFFF |]
0334 10A3A; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |]
10A3A 0334; # (𐨺) KHAROSHTHI SIGN DOT BELOW [| 004A 00CF | 0002 0002 | FFFF FFFF |]
-10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |]
+0334 10D6A; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |]
+10D6A 0334; # () GARAY CONSONANT GEMINATION MARK [| 004A 00D3 | 0002 0002 | FFFF FFFF |]
0334 0E48; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |]
0E48 0334; # (่) THAI CHARACTER MAI EK [| 004A 00D7 | 0002 0002 | FFFF FFFF |]
0334 0E49; # (้) THAI CHARACTER MAI THO [| 004A 00D8 | 0002 0002 | FFFF FFFF |]
@@ -25825,8 +25832,8 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF
089F 003F; # (࢟) ARABIC HALF MADDA OVER MADDA [| 0082 | 0002 | FFFF 0270 |]
0AFC 003F; # (ૼ) GUJARATI SIGN MADDAH [| 0082 | 0002 | FFFF 0270 |]
10EAC 003F; # (𐺬) YEZIDI COMBINING MADDA MARK [| 0082 | 0002 | FFFF 0270 |]
-0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |]
-0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |]
+0897 0021; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0269 |]
+0897 003F; # () ARABIC PEPET [| 0082 | 0004 | FFFF 0270 |]
0654 0021; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0269 |]
10EAB 0021; # (𐺫) YEZIDI COMBINING HAMZA MARK [| 0083 | 0002 | FFFF 0269 |]
0654 003F; # (ٔ) ARABIC HAMZA ABOVE [| 0083 | 0002 | FFFF 0270 |]
@@ -25872,10 +25879,10 @@ FE7E 003F; # (ﹾ) ARABIC SUKUN ISOLATED FORM [| 0081 | 001A | FFFF
08FA 0021; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0269 |]
08FA 003F; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [| 0097 | 0002 | FFFF 0270 |]
0670 0021; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0269 |]
-10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |]
+10EFC 0021; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0269 |]
0670 003F; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [| 0098 | 0002 | FFFF 0270 |]
-10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |]
-10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |]
+10EFC 003F; # () ARABIC COMBINING ALEF OVERLAY [| 0098 | 0002 | FFFF 0270 |]
+10EFC 0334; # () ARABIC COMBINING ALEF OVERLAY [| 0098 004A | 0002 0002 | FFFF FFFF |]
0711 0021; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0269 |]
0711 003F; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [| 0099 | 0002 | FFFF 0270 |]
0730 0021; # (ܰ) SYRIAC PTHAHA ABOVE [| 009A | 0002 | FFFF 0269 |]
@@ -25981,7 +25988,7 @@ A6F1 003F; # (꛱) BAMUM COMBINING MARK TUKWENTIS [| 00B7 | 0002 | FFFF 0270 |]
1BE6 0021; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0269 |]
1C37 0021; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |]
A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |]
-10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |]
+10D6C 0021; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0269 |]
110BA 0021; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |]
11173 0021; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |]
111CA 0021; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0269 |]
@@ -26012,7 +26019,7 @@ A9B3 0021; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0269 |]
1BE6 003F; # (᯦) BATAK SIGN TOMPI [| 00C2 | 0002 | FFFF 0270 |]
1C37 003F; # (᰷) LEPCHA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |]
A9B3 003F; # (꦳) JAVANESE SIGN CECAK TELU [| 00C2 | 0002 | FFFF 0270 |]
-10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |]
+10D6C 003F; # () GARAY COMBINING DOUBLE DOT ABOVE [| 00C2 | 0002 | FFFF 0270 |]
110BA 003F; # (𑂺) KAITHI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |]
11173 003F; # (𑅳) MAHAJANI SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |]
111CA 003F; # (𑇊) SHARADA SIGN NUKTA [| 00C2 | 0002 | FFFF 0270 |]
@@ -26373,8 +26380,8 @@ ABEC 003F; # (꯬) MEETEI MAYEK LUM IYEK [| 00CC | 0002 | FFFF 0270 |]
11A98 0021; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0269 |]
11A98 003F; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 | 0002 | FFFF 0270 |]
11A98 0334; # (𑪘) SOYOMBO GEMINATION MARK [| 00D2 004A | 0002 0002 | FFFF FFFF |]
-10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |]
-10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |]
+10D6A 0021; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0269 |]
+10D6A 003F; # () GARAY CONSONANT GEMINATION MARK [| 00D3 | 0002 | FFFF 0270 |]
113D2 0021; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0269 |]
113D2 003F; # () TULU-TIGALARI GEMINATION MARK [| 00D4 | 0002 | FFFF 0270 |]
113D2 0334; # () TULU-TIGALARI GEMINATION MARK [| 00D4 004A | 0002 0002 | FFFF FFFF |]
@@ -37850,7 +37857,7 @@ FF0D 0061; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0002 | 020D FFFF |]
1B60 0061; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0002 | 0210 FFFF |]
1806 0061; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0002 | 0211 FFFF |]
1807 0061; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0002 | 0212 FFFF |]
-10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |]
+10D6E 0061; # () GARAY HYPHEN [2380 | 0020 | 0002 | 0213 FFFF |]
2010 0061; # (‐) HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |]
2011 0061; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0002 | 0214 FFFF |]
2012 0061; # (‒) FIGURE DASH [2380 | 0020 | 0002 | 0215 FFFF |]
@@ -47089,7 +47096,7 @@ FF0D 0041; # (-) FULLWIDTH HYPHEN-MINUS [2380 | 0020 | 0008 | 020D FFFF |]
1B60 0041; # (᭠) BALINESE PAMENENG [2380 | 0020 | 0008 | 0210 FFFF |]
1806 0041; # (᠆) MONGOLIAN TODO SOFT HYPHEN [2380 | 0020 | 0008 | 0211 FFFF |]
1807 0041; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [2380 | 0020 | 0008 | 0212 FFFF |]
-10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |]
+10D6E 0041; # () GARAY HYPHEN [2380 | 0020 | 0008 | 0213 FFFF |]
2010 0041; # (‐) HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |]
2011 0041; # (‑) NON-BREAKING HYPHEN [2380 | 0020 | 0008 | 0214 FFFF |]
2012 0041; # (‒) FIGURE DASH [2380 | 0020 | 0008 | 0215 FFFF |]
@@ -56794,8 +56801,8 @@ A67D 0061; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0002 | FF
10D25 0061; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
10D26 0061; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
10D27 0061; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
-10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
-10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
+10D6B 0061; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
+10D6D 0061; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F48 0061; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F49 0061; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F4A 0061; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0002 | FFFF FFFF |]
@@ -56881,8 +56888,8 @@ A67D 0041; # (꙽) COMBINING CYRILLIC PAYEROK [2380 | 0033 0020 | 0002 0008 | FF
10D25 0041; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
10D26 0041; # (𐴦) HANIFI ROHINGYA SIGN TANA [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
10D27 0041; # (𐴧) HANIFI ROHINGYA SIGN TASSI [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
-10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
-10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
+10D6B 0041; # () GARAY COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
+10D6D 0041; # () GARAY CONSONANT NASALIZATION MARK [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
10F48 0041; # (𐽈) SOGDIAN COMBINING DOT ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
10F49 0041; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
10F4A 0041; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [2380 | 0033 0020 | 0002 0008 | FFFF FFFF |]
@@ -57328,8 +57335,8 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00
089F 0041; # (࢟) ARABIC HALF MADDA OVER MADDA [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |]
0AFC 0041; # (ૼ) GUJARATI SIGN MADDAH [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |]
10EAC 0041; # (𐺬) YEZIDI COMBINING MADDA MARK [2380 | 0082 0020 | 0002 0008 | FFFF FFFF |]
-0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |]
-0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |]
+0897 0061; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0002 | FFFF FFFF |]
+0897 0041; # () ARABIC PEPET [2380 | 0082 0020 | 0004 0008 | FFFF FFFF |]
0654 0061; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |]
10EAB 0061; # (𐺫) YEZIDI COMBINING HAMZA MARK [2380 | 0083 0020 | 0002 0002 | FFFF FFFF |]
0654 0041; # (ٔ) ARABIC HAMZA ABOVE [2380 | 0083 0020 | 0002 0008 | FFFF FFFF |]
@@ -57375,9 +57382,9 @@ FE7E 0041; # (ﹾ) ARABIC SUKUN ISOLATED FORM [2380 | 0081 0020 | 00
08FA 0061; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0002 | FFFF FFFF |]
08FA 0041; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [2380 | 0097 0020 | 0002 0008 | FFFF FFFF |]
0670 0061; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |]
-10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |]
+10EFC 0061; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0002 | FFFF FFFF |]
0670 0041; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |]
-10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |]
+10EFC 0041; # () ARABIC COMBINING ALEF OVERLAY [2380 | 0098 0020 | 0002 0008 | FFFF FFFF |]
0711 0061; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0002 | FFFF FFFF |]
0711 0041; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [2380 | 0099 0020 | 0002 0008 | FFFF FFFF |]
0730 0061; # (ܰ) SYRIAC PTHAHA ABOVE [2380 | 009A 0020 | 0002 0002 | FFFF FFFF |]
@@ -57478,7 +57485,7 @@ A6F1 0041; # (꛱) BAMUM COMBINING MARK TUKWENTIS [2380 | 00B7 0020 | 0002 0008
1BE6 0061; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
1C37 0061; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
-10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
+10D6C 0061; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
110BA 0061; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
11173 0061; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
111CA 0061; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0002 | FFFF FFFF |]
@@ -57509,7 +57516,7 @@ A9B3 0061; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0002 | FFFF
1BE6 0041; # (᯦) BATAK SIGN TOMPI [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
1C37 0041; # (᰷) LEPCHA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
A9B3 0041; # (꦳) JAVANESE SIGN CECAK TELU [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
-10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
+10D6C 0041; # () GARAY COMBINING DOUBLE DOT ABOVE [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
110BA 0041; # (𑂺) KAITHI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
11173 0041; # (𑅳) MAHAJANI SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
111CA 0041; # (𑇊) SHARADA SIGN NUKTA [2380 | 00C2 0020 | 0002 0008 | FFFF FFFF |]
@@ -57802,8 +57809,8 @@ ABEC 0041; # (꯬) MEETEI MAYEK LUM IYEK [2380 | 00CC 0020 | 0002 0008 | FFFF FF
111CC 0041; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [2380 | 00D1 0020 | 0002 0008 | FFFF FFFF |]
11A98 0061; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0002 | FFFF FFFF |]
11A98 0041; # (𑪘) SOYOMBO GEMINATION MARK [2380 | 00D2 0020 | 0002 0008 | FFFF FFFF |]
-10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |]
-10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |]
+10D6A 0061; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0002 | FFFF FFFF |]
+10D6A 0041; # () GARAY CONSONANT GEMINATION MARK [2380 | 00D3 0020 | 0002 0008 | FFFF FFFF |]
113D2 0061; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0002 | FFFF FFFF |]
113D2 0041; # () TULU-TIGALARI GEMINATION MARK [2380 | 00D4 0020 | 0002 0008 | FFFF FFFF |]
0E4E 0061; # (๎) THAI CHARACTER YAMAKKAN [2380 | 00D5 0020 | 0002 0002 | FFFF FFFF |]
@@ -58638,7 +58645,7 @@ FF0D 0062; # (-) FULLWIDTH HYPHEN-MINUS [239A | 0020 | 0002 | 020D FFFF |]
1B60 0062; # (᭠) BALINESE PAMENENG [239A | 0020 | 0002 | 0210 FFFF |]
1806 0062; # (᠆) MONGOLIAN TODO SOFT HYPHEN [239A | 0020 | 0002 | 0211 FFFF |]
1807 0062; # (᠇) MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER [239A | 0020 | 0002 | 0212 FFFF |]
-10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |]
+10D6E 0062; # () GARAY HYPHEN [239A | 0020 | 0002 | 0213 FFFF |]
2010 0062; # (‐) HYPHEN [239A | 0020 | 0002 | 0214 FFFF |]
2011 0062; # (‑) NON-BREAKING HYPHEN [239A | 0020 | 0002 | 0214 FFFF |]
2012 0062; # (‒) FIGURE DASH [239A | 0020 | 0002 | 0215 FFFF |]
@@ -68041,8 +68048,8 @@ A67D 0062; # (꙽) COMBINING CYRILLIC PAYEROK [239A | 0033 0020 | 0002 0002 | FF
10D25 0062; # (𐴥) HANIFI ROHINGYA SIGN TAHALA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
10D26 0062; # (𐴦) HANIFI ROHINGYA SIGN TANA [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
10D27 0062; # (𐴧) HANIFI ROHINGYA SIGN TASSI [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
-10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
-10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
+10D6B 0062; # () GARAY COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
+10D6D 0062; # () GARAY CONSONANT NASALIZATION MARK [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F48 0062; # (𐽈) SOGDIAN COMBINING DOT ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F49 0062; # (𐽉) SOGDIAN COMBINING TWO DOTS ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
10F4A 0062; # (𐽊) SOGDIAN COMBINING CURVE ABOVE [239A | 0033 0020 | 0002 0002 | FFFF FFFF |]
@@ -68269,7 +68276,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00
089F 0062; # (࢟) ARABIC HALF MADDA OVER MADDA [239A | 0082 0020 | 0002 0002 | FFFF FFFF |]
0AFC 0062; # (ૼ) GUJARATI SIGN MADDAH [239A | 0082 0020 | 0002 0002 | FFFF FFFF |]
10EAC 0062; # (𐺬) YEZIDI COMBINING MADDA MARK [239A | 0082 0020 | 0002 0002 | FFFF FFFF |]
-0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |]
+0897 0062; # () ARABIC PEPET [239A | 0082 0020 | 0004 0002 | FFFF FFFF |]
0654 0062; # (ٔ) ARABIC HAMZA ABOVE [239A | 0083 0020 | 0002 0002 | FFFF FFFF |]
10EAB 0062; # (𐺫) YEZIDI COMBINING HAMZA MARK [239A | 0083 0020 | 0002 0002 | FFFF FFFF |]
0655 0062; # (ٕ) ARABIC HAMZA BELOW [239A | 0084 0020 | 0002 0002 | FFFF FFFF |]
@@ -68293,7 +68300,7 @@ FE7E 0062; # (ﹾ) ARABIC SUKUN ISOLATED FORM [239A | 0081 0020 | 00
08F9 0062; # (ࣹ) ARABIC LEFT ARROWHEAD BELOW [239A | 0096 0020 | 0002 0002 | FFFF FFFF |]
08FA 0062; # (ࣺ) ARABIC RIGHT ARROWHEAD BELOW [239A | 0097 0020 | 0002 0002 | FFFF FFFF |]
0670 0062; # (ٰ) ARABIC LETTER SUPERSCRIPT ALEF [239A | 0098 0020 | 0002 0002 | FFFF FFFF |]
-10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |]
+10EFC 0062; # () ARABIC COMBINING ALEF OVERLAY [239A | 0098 0020 | 0002 0002 | FFFF FFFF |]
0711 0062; # (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH [239A | 0099 0020 | 0002 0002 | FFFF FFFF |]
0730 0062; # (ܰ) SYRIAC PTHAHA ABOVE [239A | 009A 0020 | 0002 0002 | FFFF FFFF |]
0731 0062; # (ܱ) SYRIAC PTHAHA BELOW [239A | 009B 0020 | 0002 0002 | FFFF FFFF |]
@@ -68351,7 +68358,7 @@ A6F1 0062; # (꛱) BAMUM COMBINING MARK TUKWENTIS [239A | 00B7 0020 | 0002 0002
1BE6 0062; # (᯦) BATAK SIGN TOMPI [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
1C37 0062; # (᰷) LEPCHA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
A9B3 0062; # (꦳) JAVANESE SIGN CECAK TELU [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
-10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
+10D6C 0062; # () GARAY COMBINING DOUBLE DOT ABOVE [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
110BA 0062; # (𑂺) KAITHI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
11173 0062; # (𑅳) MAHAJANI SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
111CA 0062; # (𑇊) SHARADA SIGN NUKTA [239A | 00C2 0020 | 0002 0002 | FFFF FFFF |]
@@ -68506,7 +68513,7 @@ ABEC 0062; # (꯬) MEETEI MAYEK LUM IYEK [239A | 00CC 0020 | 0002 0002 | FFFF FF
111CB 0062; # (𑇋) SHARADA VOWEL MODIFIER MARK [239A | 00D0 0020 | 0002 0002 | FFFF FFFF |]
111CC 0062; # (𑇌) SHARADA EXTRA SHORT VOWEL MARK [239A | 00D1 0020 | 0002 0002 | FFFF FFFF |]
11A98 0062; # (𑪘) SOYOMBO GEMINATION MARK [239A | 00D2 0020 | 0002 0002 | FFFF FFFF |]
-10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |]
+10D6A 0062; # () GARAY CONSONANT GEMINATION MARK [239A | 00D3 0020 | 0002 0002 | FFFF FFFF |]
113D2 0062; # () TULU-TIGALARI GEMINATION MARK [239A | 00D4 0020 | 0002 0002 | FFFF FFFF |]
0E4E 0062; # (๎) THAI CHARACTER YAMAKKAN [239A | 00D5 0020 | 0002 0002 | FFFF FFFF |]
0E47 0062; # (็) THAI CHARACTER MAITAIKHU [239A | 00D6 0020 | 0002 0002 | FFFF FFFF |]
@@ -110915,7 +110922,9 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F
113CE 003F; # () TULU-TIGALARI SIGN VIRAMA [332A | 0020 | 0002 | FFFF 0270 |]
113CF 0021; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0269 |]
113CF 003F; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 0020 | 0004 | FFFF 0270 |]
+0334 113CE; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |]
113CE 0334; # () TULU-TIGALARI SIGN VIRAMA [332A | 004A 0020 | 0002 0002 | FFFF FFFF |]
+0334 113CF; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |]
113CF 0334; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A | 004A 0020 | 0002 0004 | FFFF FFFF |]
113CE 0061; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
113CE 0041; # () TULU-TIGALARI SIGN VIRAMA [332A 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
@@ -110925,6 +110934,7 @@ A8C4 0062; # (꣄) SAURASHTRA SIGN VIRAMA [3171 239A | 0020 0020 | 0002 0002 | F
113CF 0062; # () TULU-TIGALARI SIGN LOOPED VIRAMA [332A 239A | 0020 0020 | 0004 0002 | FFFF FFFF |]
113D0 0021; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0269 |]
113D0 003F; # () TULU-TIGALARI CONJOINER [332B | 0020 | 0002 | FFFF 0270 |]
+0334 113D0; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |]
113D0 0334; # () TULU-TIGALARI CONJOINER [332B | 004A 0020 | 0002 0002 | FFFF FFFF |]
113D0 0061; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
113D0 0041; # () TULU-TIGALARI CONJOINER [332B 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
@@ -147061,12 +147071,13 @@ A6EF 0062; # (ꛯ) BAMUM LETTER KOGHOM [4304 239A | 0020 0020 | 0002 0002 | FFFF
10D4F 0061; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
10D4F 0041; # () GARAY SUKUN [4646 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
10D4F 0062; # () GARAY SUKUN [4646 239A | 0020 0020 | 0002 0002 | FFFF FFFF |]
-10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |]
-10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |]
-10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |]
-10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
-10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
-10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |]
+10D69 0021; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0269 |]
+10D69 003F; # () GARAY VOWEL SIGN E [4647 | 0020 | 0002 | FFFF 0270 |]
+0334 10D69; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |]
+10D69 0334; # () GARAY VOWEL SIGN E [4647 | 004A 0020 | 0002 0002 | FFFF FFFF |]
+10D69 0061; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
+10D69 0041; # () GARAY VOWEL SIGN E [4647 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
+10D69 0062; # () GARAY VOWEL SIGN E [4647 239A | 0020 0020 | 0002 0002 | FFFF FFFF |]
10D70 0021; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0269 |]
10D70 003F; # () GARAY SMALL LETTER A [4648 | 0020 | 0002 | FFFF 0270 |]
10D50 0021; # () GARAY CAPITAL LETTER A [4648 | 0020 | 0008 | FFFF 0269 |]
@@ -167946,6 +167957,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF
105D2 0307 003F; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 | 0002 | FFFF 0270 |]
105C9 0334; # () TODHRI LETTER EI [5237 | 0020 004A | 0002 0002 | FFFF FFFF |]
105D2 0307 0334; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 | 0020 004A | 0002 0002 | FFFF FFFF |]
+105D2 0334 0307; # (̴) TODHRI LETTER I, COMBINING TILDE OVERLAY [5237 | 0020 004A | 0002 0002 | FFFF FFFF |]
105C9 0061; # () TODHRI LETTER EI [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
105D2 0307 0061; # () TODHRI LETTER I, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
105D2 0591 0307 0061; # (֑) TODHRI LETTER I, HEBREW ACCENT ETNAHTA, COMBINING DOT ABOVE [5237 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
@@ -168117,6 +168129,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF
105DA 0307 003F; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 | 0002 | FFFF 0270 |]
105E4 003F; # () TODHRI LETTER U [5252 | 0020 | 0002 | FFFF 0270 |]
105DA 0307 0334; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 | 0020 004A | 0002 0002 | FFFF FFFF |]
+105DA 0334 0307; # (̴) TODHRI LETTER O, COMBINING TILDE OVERLAY [5252 | 0020 004A | 0002 0002 | FFFF FFFF |]
105E4 0334; # () TODHRI LETTER U [5252 | 0020 004A | 0002 0002 | FFFF FFFF |]
105DA 0307 0061; # () TODHRI LETTER O, COMBINING DOT ABOVE [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
105E4 0061; # () TODHRI LETTER U [5252 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
@@ -169610,6 +169623,7 @@ A4F7 0062; # (ꓷ) LISU LETTER OE [4E29 239A | 0020 0020 | 0002 0002 | FFFF FFFF
1612E 0062; # () GURUNG KHEMA CONSONANT SIGN MEDIAL RA [5337 239A | 0020 0020 | 0002 0002 | FFFF FFFF |]
1612F 0021; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0269 |]
1612F 003F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 0020 | 0002 | FFFF 0270 |]
+0334 1612F; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |]
1612F 0334; # () GURUNG KHEMA SIGN THOLHOMA [5338 | 004A 0020 | 0002 0002 | FFFF FFFF |]
1612F 0061; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0002 | FFFF FFFF |]
1612F 0041; # () GURUNG KHEMA SIGN THOLHOMA [5338 2380 | 0020 0020 | 0002 0008 | FFFF FFFF |]
diff --git a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
index be9ccae62..4d1117edc 100644
--- a/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
+++ b/unicodetools/data/uca/dev/CollationTest/CollationTest_SHIFTED_SHORT.txt
@@ -1,5 +1,5 @@
# CollationTest_SHIFTED_SHORT.txt
-# Date: 2024-05-02, 01:46:29 GMT
+# Date: 2024-06-05, 18:49:41 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -24941,7 +24941,9 @@ A67D 0334
10D26 0334
0334 10D27
10D27 0334
+0334 10D6B
10D6B 0334
+0334 10D6D
10D6D 0334
0334 10F48
10F48 0334
@@ -24957,6 +24959,7 @@ A67D 0334
10F84 0334
0334 1E2AE
1E2AE 0334
+0334 1E5EE
1E5EE 0334
0316 0334
0334 0316
@@ -25104,6 +25107,7 @@ FE27 0334
10F83 0334
0334 10F85
10F85 0334
+0334 1E5EF
1E5EF 0334
0334 3099
3099 0334
@@ -25286,6 +25290,7 @@ FB1E 0334
089F 0334
0334 10EAC
10EAC 0334
+0334 0897
0897 0334
0334 0654
0654 0334
@@ -25431,6 +25436,7 @@ A6F1 0334
1C37 0334
0334 A9B3
A9B3 0334
+0334 10D6C
10D6C 0334
0334 110BA
110BA 0334
@@ -25482,6 +25488,7 @@ A9B3 0334
10A38 0334
0334 10A3A
10A3A 0334
+0334 10D6A
10D6A 0334
0334 0E48
0E48 0334
@@ -110915,7 +110922,9 @@ A8C4 0062
113CE 003F
113CF 0021
113CF 003F
+0334 113CE
113CE 0334
+0334 113CF
113CF 0334
113CE 0061
113CE 0041
@@ -110925,6 +110934,7 @@ A8C4 0062
113CF 0062
113D0 0021
113D0 003F
+0334 113D0
113D0 0334
113D0 0061
113D0 0041
@@ -147063,6 +147073,7 @@ A6EF 0062
10D4F 0062
10D69 0021
10D69 003F
+0334 10D69
10D69 0334
10D69 0061
10D69 0041
@@ -167946,6 +167957,7 @@ A4F7 0062
105D2 0307 003F
105C9 0334
105D2 0307 0334
+105D2 0334 0307
105C9 0061
105D2 0307 0061
105D2 0591 0307 0061
@@ -168117,6 +168129,7 @@ A4F7 0062
105DA 0307 003F
105E4 003F
105DA 0307 0334
+105DA 0334 0307
105E4 0334
105DA 0307 0061
105E4 0061
@@ -169610,6 +169623,7 @@ A4F7 0062
1612E 0062
1612F 0021
1612F 003F
+0334 1612F
1612F 0334
1612F 0061
1612F 0041
diff --git a/unicodetools/data/ucd/dev/DoNotEmit.txt b/unicodetools/data/ucd/dev/DoNotEmit.txt
index a57077a0b..f38bc09d9 100644
--- a/unicodetools/data/ucd/dev/DoNotEmit.txt
+++ b/unicodetools/data/ucd/dev/DoNotEmit.txt
@@ -82,8 +82,6 @@
# combining dot above.
# Hamza_Form:
# Sequences containing Arabic hamza above, which should be avoided.
-# Precomposed_Hieroglyph:
-# Precomposed sequences for Egyptian Hieroglyphs which should be avoided.
# Precomposed_Form:
# Sequences for which a precomposed form exists, but without canonical
# equivalence.
@@ -101,12 +99,6 @@
# "Do Not Use" tables from the Core Specification
# ================================================
-# Egyptian Hieroglyphs, from Table 11-2
-# Note: This list may be incomplete.
-13217; 13216 13430 13216 13430 13216; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH N035A; EGYPTIAN HIEROGLYPH N035, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N035, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N035
-130C1; 130C0 13436 1309D; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH D059; EGYPTIAN HIEROGLYPH D058, EGYPTIAN HIEROGLYPH OVERLAY MIDDLE, EGYPTIAN HIEROGLYPH D036
-13196; 13193 13433 13437 133CF 13430 131FF 13438; Precomposed_Hieroglyph # EGYPTIAN HIEROGLYPH I011A; EGYPTIAN HIEROGLYPH I010, EGYPTIAN HIEROGLYPH INSERT AT BOTTOM START, EGYPTIAN HIEROGLYPH BEGIN SEGMENT, EGYPTIAN HIEROGLYPH X001, EGYPTIAN HIEROGLYPH VERTICAL JOINER, EGYPTIAN HIEROGLYPH N017, EGYPTIAN HIEROGLYPH END SEGMENT
-
# Devanagari, from Table 12-1
0905 0946; 0904; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E; DEVANAGARI LETTER SHORT A
0905 093E; 0906; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER AA
diff --git a/unicodetools/data/ucd/dev/NamesList.txt b/unicodetools/data/ucd/dev/NamesList.txt
index ea2436c02..4598934ff 100644
--- a/unicodetools/data/ucd/dev/NamesList.txt
+++ b/unicodetools/data/ucd/dev/NamesList.txt
@@ -1,23 +1,11 @@
; charset=UTF-8
@@@ The Unicode Standard 16.0.0
@@@+ NamesList-16.0.0.txt
-@+ Generation Date: 2024-05-17, 19:24:02 GMT
+@+ Generation Date: 2024-06-03, 12:53:56 GMT
Unicode 16.0.0 names list.
Repertoire synched with UnicodeData-16.0.0d16.txt.
- Pre-beta rollup of various fixes.
- Add xref between 131A6 and 13DEE.
- Add xrefs between 01C3, A71D, 107B9.
- Added xrefs from 1DF0A to A71D and 107B9.
- Added formal aliases and annotation for 1E899, 1E89A
- Removed unneeded subheads for two postponed archaic SHRII characters.
- Added formal alias for 12327.
- Added alias and annotation for 12326.
- Added xrefs between 050F and 1C8A.
- Added an annotation about Amerindian orthographic use for 00B7.
- Add notices about use of colon in Egyptian hieroglyph annotations.
- Add annotation for 0B35; update annotation for 0B55.
- Add annotation for 1DF8.
- Add formal name alias for 1680B.
+ Post-beta rollup of various fixes.
+ Add subheads and annotations for 1FB81, 1FB98, 1FB99.
This file is semi-automatically derived from UnicodeData.txt and
a set of manually created annotations using a script to select
or suppress information from the data file. The rules used
@@ -64096,7 +64084,10 @@ FFFF ");
+ for (String errorMessageLine : errorMessageLines) {
+ out.println("
");
+ printErrorLine("Test Failure", Side.END, testFailureCount);
+ }
+ }
+
private static void equivalencesLine(String line, ParsePosition pp, String file, int lineNumber)
throws ParseException {
pp.setIndex("OnPairsOf".length());
diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
index 465c613c6..98613a31c 100644
--- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
+++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt
@@ -101,9 +101,15 @@
# Overrides for bugs
# TODO(egg): These are specified in their respective files, we should not need them here.
+
# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; ");
+ }
+ out.println("");
+ out.println(toHTML.transform(errorMessageLine).replace("\t", " "));
+ out.println("
+# @missing: 0000..10FFFF; NFKC_SCF;
+
# Extras
# @missing: 0000..10FFFF; Idn_Status ; disallowed
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt
new file mode 100644
index 000000000..57b2f5e96
--- /dev/null
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt
@@ -0,0 +1,19 @@
+# This file uses the invariant test language, but contains comparisons between
+# new and pre-existing characters to aid in PAG review of encoding proposals.
+
+## Unicode 16.0 additions.
+
+# These comparisons were not in place when properties were initially assigned for the 16.0
+# répertoire.
+# We note here the feedback about errors that would have been caught by them.
+
+# U+18CFF is a blank character for the Khitan Small Script; aside from looking blank,
+# it is indistinguishable from other Khitan Small Script characters. See L2/23-065.
+# In particular, it is ideographic: https://www.unicode.org/review/pri497/feedback.html#ID20240216140104.
+Propertywise [\N{KHITAN SMALL SCRIPT CHARACTER-18CFF} \N{KHITAN SMALL SCRIPT CHARACTER-18B00}] AreAlike, Except: Age
+
+# HXG (briefly known as HZXG) and SZP are just like all the other CJK strokes.
+# In particular, they are scx=Hani: https://www.unicode.org/review/pri502/feedback.html#ID20240523095709.
+Propertywise [\N{CJK STROKE T} \N{CJK STROKE HXG}\N{CJK STROKE SZP}] AreAlike, Except: Age
+
+## Provisionally assigned. [placeholder for draft PRs]
\ No newline at end of file
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index d9cbb53a4..3b3f3c35a 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -112,6 +112,20 @@
# OnPairsOf $strings, EqualityOf Case_Folding ⇏ EqualityOf Simple_Case_Folding
# OnPairsOf $strings, EqualityOf Case_Folding ⇐ EqualityOf Simple_Case_Folding
##########################
+# Propertywise
, these assignments are the same.
+#
+# Example: Propertywise [𐛪 𐛫] AreAlike
+# These two Linear A signs (A751 and A752) behave identically.
+# Example: Propertywise [ي ۑ] AreAlike, Except: Confusable_MA Unicode_1_Name
+# This checks that yeh (with two dots) and yeh with three dots behave the same,
+# except for confusability and their name in Unicode 1 (both have one, so it is different).
+##########################
# There is new syntax for testing UnicodeMaps
#
# Map