diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index c818d6948..a3a12dc65 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,6 +275,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -316,6 +332,10 @@ jobs: - name: Run command - UCA - collation validity log run: | cd unicodetools/mine/src + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then set +e + fi # invoke main() in class ...UCA.Main mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file @@ -333,6 +353,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -372,6 +408,16 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then ERROR="::notice" + else ERROR="::error" + fi + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" + STATUS=${PIPESTATUS[0]} + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then exit 0 + else exit $STATUS + fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/pipeline.md b/docs/pipeline.md index 695f659ec..69f2f4b88 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -49,7 +49,7 @@ Indic scripts only: - [ ] Commit --- -- [ ] PropsList.txt — Add Other_Alphabetic, Diacritic, and Extender to satisfy invariants, or to taste +- [ ] PropsList.txt — Add Other_Alphabetic, Other_Lowercase, Diacritic, and Extender to satisfy invariants, or to taste - [ ] Commit --- @@ -67,8 +67,13 @@ PR preparation: - [ ] PR button — Set to DRAFT pull request - unless approved for the upcoming version - [ ] PR button — Press - - The **Check UCA data** CI check might fail; many character additions need separate handling there, - but that is out of scope for the PAG work of preparing `data-for-new`. This will get resolved later. + - The **Check UCA data** and **Check security data invariants** CI checks are + suppressed; many character additions need separate handling there, + but that is out of scope for the PAG work of preparing `data-for-new`, + so reporting those failures could distract from real issues + in the UCD invariants. + UCA and security data issues are addressed later in the process, + before the start of β review. ## Scripts diff --git a/docs/security.md b/docs/security.md index 3229c35a9..01bb8955c 100644 --- a/docs/security.md +++ b/docs/security.md @@ -9,8 +9,8 @@ machine-generated, then tweaked. They have names like source/confusables-winFonts.txt. The main file is confusables-source.txt. ***There is fairly complex processing for the confusables, so carefully diff the -results. Sometimes you may get an unexpected union of two equivalence sets. Look -at Testing below for help.*** +results. Sometimes you may get an unexpected union of two equivalence sets. +Look at Testing below for help.*** Look at the following spreadsheets / bugs to see if there are any additional suggestions. @@ -19,17 +19,38 @@ suggestions. Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dHRXelRVbXRYSVp2QTNDdTBlV1I5X1E&usp=drive_web#gid=0)** * **[Identifier Restriction Suggestions](https://docs.google.com/spreadsheet/ccc?key=0ArRWBHdd5mx-dEJJWkdzZzk4cDRYbEVLTmhraGN0Q3c&usp=drive_web#gid=0)** -* *[Unicode - Bugs](http://www.unicode.org/edcom/bugtrack/query?status=accepted&status=assigned&status=new&status=reopened&group=component&order=priority&col=id&col=summary&col=status&col=type&col=priority&col=milestone&col=component&owner=mark&report=10) - (under TR #36/39)*\ - :construction: **TODO**: That Trac instance is gone. - Markus thinks we decided that there was nothing useful in it, - and deleted it without saving data. Check with Mark. +* *[Sample PRs](https://github.com/unicode-org/unicodetools/pull/841) If so, assess and add to unicodetools/data/security/{version}/data/source/confusables-source.txt — *if needed.* - Then in the spreadsheets, move the "new stuff" line to the end. +### File Format +There is a brief description of the file format at the top. +Each line represents a mapping from a code point or set of code points to a sequence of one or more code points. + +For example: +``` +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +``` + +The ordering of characters doesn't matter. +So it doesn't matter whether you have the above line, or +``` +01C3 ; 0021 # ( ǃ → !) LATIN LETTER RETROFLEX CLICK → EXCLAMATION MARK +``` +It also doesn't matter if you have identical lines; the second one will be a NOOP. + +The mappings are used to generate equivalence classes. +From each equivalence class, one representative member will be chosen, +and in the resulting data file, all the other characters will map to that representative. +Because of transitivity, the equivalence class will tend to be somewhat looser than expected. + +We've discussed possible future enhancements: +- Have a second, narrower mapping that is more exact. +- Allow for mappings from sequences to sequences (instead of just code points to sequences). +- Provide for context, perhaps like the Transform rules. + Eg [x { a } y → A](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Aarabic_type%3A%5D&g=&i=) + ## Before generating First, in CLDR, update the script metadata: @@ -51,13 +72,10 @@ Run GenerateConfusables -c -b to generate the files. They will appear in two pla * reformatted source, log * $UNICODETOOLS_DIR/data/security/11.0.0/* *including log.txt* -**Run TestSecurity to verify that the confusable mappings are idempotent!** +The TestSecurity.java test is part of the unit test suite, run by a github CI. +It verifies that the confusable mappings are idempotent. -With the same VM arguments as the generator. -Starting in 2021q3, TestSecurity needs to be run as a JUnit test. -It is also now part of the unit test suite and run on GitHub CI. - -Copy the following from the output directory to the top level of the revision directory: +Copy the following from the output directory to the top level of the revision directory, and check in. * confusables.txt * confusablesSummary.txt @@ -66,6 +84,12 @@ Copy the following from the output directory to the top level of the revision di * ReadMe.txt * xidmodifications.txt +### Review + +Review the mappings to make sure that there are no surprises. +The biggest issue is if two equivalence classes are mistakenly joined. +For example, if you map b to d, then that will join the equivalence class for b with that of d. + ### IdentifierStatus.txt & IdentifierType.txt Markus 2020-feb-07 for Unicode 13.0: diff --git a/unicodetools/data/emoji/dev/emoji-test.txt b/unicodetools/data/emoji/dev/emoji-test.txt index e81fe0b19..95c5d5431 100644 --- a/unicodetools/data/emoji/dev/emoji-test.txt +++ b/unicodetools/data/emoji/dev/emoji-test.txt @@ -1,5 +1,5 @@ # emoji-test.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1751,12 +1751,12 @@ 1F936 1F3FD ; fully-qualified # 🤶🏽 E3.0 Mrs. Claus: medium skin tone 1F936 1F3FE ; fully-qualified # 🤶🏾 E3.0 Mrs. Claus: medium-dark skin tone 1F936 1F3FF ; fully-qualified # 🤶🏿 E3.0 Mrs. Claus: dark skin tone -1F9D1 200D 1F384 ; fully-qualified # 🧑‍🎄 E13.0 mx claus -1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻‍🎄 E13.0 mx claus: light skin tone -1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼‍🎄 E13.0 mx claus: medium-light skin tone -1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽‍🎄 E13.0 mx claus: medium skin tone -1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾‍🎄 E13.0 mx claus: medium-dark skin tone -1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿‍🎄 E13.0 mx claus: dark skin tone +1F9D1 200D 1F384 ; fully-qualified # 🧑‍🎄 E13.0 Mx claus +1F9D1 1F3FB 200D 1F384 ; fully-qualified # 🧑🏻‍🎄 E13.0 Mx claus: light skin tone +1F9D1 1F3FC 200D 1F384 ; fully-qualified # 🧑🏼‍🎄 E13.0 Mx claus: medium-light skin tone +1F9D1 1F3FD 200D 1F384 ; fully-qualified # 🧑🏽‍🎄 E13.0 Mx claus: medium skin tone +1F9D1 1F3FE 200D 1F384 ; fully-qualified # 🧑🏾‍🎄 E13.0 Mx claus: medium-dark skin tone +1F9D1 1F3FF 200D 1F384 ; fully-qualified # 🧑🏿‍🎄 E13.0 Mx claus: dark skin tone 1F9B8 ; fully-qualified # 🦸 E11.0 superhero 1F9B8 1F3FB ; fully-qualified # 🦸🏻 E11.0 superhero: light skin tone 1F9B8 1F3FC ; fully-qualified # 🦸🏼 E11.0 superhero: medium-light skin tone @@ -3721,6 +3721,11 @@ 1F41A ; fully-qualified # 🐚 E0.6 spiral shell 1FAB8 ; fully-qualified # 🪸 E14.0 coral 1FABC ; fully-qualified # 🪼 E15.0 jellyfish +1F980 ; fully-qualified # 🦀 E1.0 crab +1F99E ; fully-qualified # 🦞 E11.0 lobster +1F990 ; fully-qualified # 🦐 E3.0 shrimp +1F991 ; fully-qualified # 🦑 E3.0 squid +1F9AA ; fully-qualified # 🦪 E12.0 oyster # subgroup: animal-bug 1F40C ; fully-qualified # 🐌 E0.6 snail @@ -3777,8 +3782,8 @@ 1F344 ; fully-qualified # 🍄 E0.6 mushroom 1FABE ; fully-qualified # 🪾 E16.0 leafless tree -# Animals & Nature subtotal: 161 -# Animals & Nature subtotal: 161 w/o modifiers +# Animals & Nature subtotal: 166 +# Animals & Nature subtotal: 166 w/o modifiers # group: Food & Drink @@ -3881,13 +3886,6 @@ 1F960 ; fully-qualified # 🥠 E5.0 fortune cookie 1F961 ; fully-qualified # 🥡 E5.0 takeout box -# subgroup: food-marine -1F980 ; fully-qualified # 🦀 E1.0 crab -1F99E ; fully-qualified # 🦞 E11.0 lobster -1F990 ; fully-qualified # 🦐 E3.0 shrimp -1F991 ; fully-qualified # 🦑 E3.0 squid -1F9AA ; fully-qualified # 🦪 E12.0 oyster - # subgroup: food-sweet 1F366 ; fully-qualified # 🍦 E0.6 soft ice cream 1F367 ; fully-qualified # 🍧 E0.6 shaved ice @@ -3936,8 +3934,8 @@ 1FAD9 ; fully-qualified # 🫙 E14.0 jar 1F3FA ; fully-qualified # 🏺 E1.0 amphora -# Food & Drink subtotal: 138 -# Food & Drink subtotal: 138 w/o modifiers +# Food & Drink subtotal: 133 +# Food & Drink subtotal: 133 w/o modifiers # group: Travel & Places diff --git a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt index e471645bf..ce31f22ce 100644 --- a/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt +++ b/unicodetools/data/emoji/dev/emoji-zwj-sequences.txt @@ -1,5 +1,5 @@ # emoji-zwj-sequences.txt -# Date: 2024-05-01, 21:25:24 GMT +# Date: 2024-06-04, 16:46:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -665,7 +665,7 @@ 1F9D1 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer # E12.1 [1] (🧑‍🌾) 1F9D1 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook # E12.1 [1] (🧑‍🍳) 1F9D1 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby # E13.0 [1] (🧑‍🍼) -1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus # E13.0 [1] (🧑‍🎄) +1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus # E13.0 [1] (🧑‍🎄) 1F9D1 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student # E12.1 [1] (🧑‍🎓) 1F9D1 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer # E12.1 [1] (🧑‍🎤) 1F9D1 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist # E12.1 [1] (🧑‍🎨) @@ -689,7 +689,7 @@ 1F9D1 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: light skin tone # E12.1 [1] (🧑🏻‍🌾) 1F9D1 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: light skin tone # E12.1 [1] (🧑🏻‍🍳) 1F9D1 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: light skin tone # E13.0 [1] (🧑🏻‍🍼) -1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: light skin tone # E13.0 [1] (🧑🏻‍🎄) +1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: light skin tone # E13.0 [1] (🧑🏻‍🎄) 1F9D1 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: light skin tone # E12.1 [1] (🧑🏻‍🎓) 1F9D1 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: light skin tone # E12.1 [1] (🧑🏻‍🎤) 1F9D1 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: light skin tone # E12.1 [1] (🧑🏻‍🎨) @@ -713,7 +713,7 @@ 1F9D1 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-light skin tone # E12.1 [1] (🧑🏼‍🌾) 1F9D1 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-light skin tone # E12.1 [1] (🧑🏼‍🍳) 1F9D1 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-light skin tone # E13.0 [1] (🧑🏼‍🍼) -1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-light skin tone # E13.0 [1] (🧑🏼‍🎄) +1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-light skin tone # E13.0 [1] (🧑🏼‍🎄) 1F9D1 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-light skin tone # E12.1 [1] (🧑🏼‍🎓) 1F9D1 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-light skin tone # E12.1 [1] (🧑🏼‍🎤) 1F9D1 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-light skin tone # E12.1 [1] (🧑🏼‍🎨) @@ -737,7 +737,7 @@ 1F9D1 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium skin tone # E12.1 [1] (🧑🏽‍🌾) 1F9D1 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium skin tone # E12.1 [1] (🧑🏽‍🍳) 1F9D1 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium skin tone # E13.0 [1] (🧑🏽‍🍼) -1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium skin tone # E13.0 [1] (🧑🏽‍🎄) +1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium skin tone # E13.0 [1] (🧑🏽‍🎄) 1F9D1 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium skin tone # E12.1 [1] (🧑🏽‍🎓) 1F9D1 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium skin tone # E12.1 [1] (🧑🏽‍🎤) 1F9D1 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium skin tone # E12.1 [1] (🧑🏽‍🎨) @@ -761,7 +761,7 @@ 1F9D1 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-dark skin tone # E12.1 [1] (🧑🏾‍🌾) 1F9D1 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-dark skin tone # E12.1 [1] (🧑🏾‍🍳) 1F9D1 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-dark skin tone # E13.0 [1] (🧑🏾‍🍼) -1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾‍🎄) +1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾‍🎄) 1F9D1 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-dark skin tone # E12.1 [1] (🧑🏾‍🎓) 1F9D1 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-dark skin tone # E12.1 [1] (🧑🏾‍🎤) 1F9D1 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-dark skin tone # E12.1 [1] (🧑🏾‍🎨) @@ -785,7 +785,7 @@ 1F9D1 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: dark skin tone # E12.1 [1] (🧑🏿‍🌾) 1F9D1 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: dark skin tone # E12.1 [1] (🧑🏿‍🍳) 1F9D1 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: dark skin tone # E13.0 [1] (🧑🏿‍🍼) -1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: dark skin tone # E13.0 [1] (🧑🏿‍🎄) +1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; Mx claus: dark skin tone # E13.0 [1] (🧑🏿‍🎄) 1F9D1 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: dark skin tone # E12.1 [1] (🧑🏿‍🎓) 1F9D1 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: dark skin tone # E12.1 [1] (🧑🏿‍🎤) 1F9D1 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: dark skin tone # E12.1 [1] (🧑🏿‍🎨) diff --git a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt index edc931dbb..8769e7477 100644 --- a/unicodetools/data/emoji/dev/internal/emoji-proposals.txt +++ b/unicodetools/data/emoji/dev/internal/emoji-proposals.txt @@ -1925,7 +1925,7 @@ 1F9CF 1F3FF; L2/18-229, L2/14-173 # 2019 (🧏🏿) deaf person: dark skin tone # L2/19-231 -1F9D1 200D 1F384; L2/19-231 # 2020 (🧑‍🎄) mx claus +1F9D1 200D 1F384; L2/19-231 # 2020 (🧑‍🎄) Mx claus # L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 1F9D1 200D 1F91D 200D 1F9D1; L2/19-275, L2/18-223, L2/18-228, L2/19-021, L2/18-340 # 2019 (🧑‍🤝‍🧑) people holding hands diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index 531fd2a7f..ecbd58c23 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -577,10 +577,10 @@ FF07 ; 0027 ; MA #* ( ' → ' ) FULLWIDTH APOSTROPHE → APOSTROPHE # →’ 2018 ; 0027 ; MA #* ( ‘ → ' ) LEFT SINGLE QUOTATION MARK → APOSTROPHE # 2019 ; 0027 ; MA #* ( ’ → ' ) RIGHT SINGLE QUOTATION MARK → APOSTROPHE # 201B ; 0027 ; MA #* ( ‛ → ' ) SINGLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE # →′→ +05F3 ; 0027 ; MA #* ( ‎׳‎ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 2032 ; 0027 ; MA #* ( ′ → ' ) PRIME → APOSTROPHE # 2035 ; 0027 ; MA #* ( ‵ → ' ) REVERSED PRIME → APOSTROPHE # →ʽ→→‘→ 055A ; 0027 ; MA #* ( ՚ → ' ) ARMENIAN APOSTROPHE → APOSTROPHE # →’→ -05F3 ; 0027 ; MA #* ( ‎׳‎ → ' ) HEBREW PUNCTUATION GERESH → APOSTROPHE # 0060 ; 0027 ; MA #* ( ` → ' ) GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 1FEF ; 0027 ; MA #* ( ` → ' ) GREEK VARIA → APOSTROPHE # →ˋ→→`→→‘→ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘→ @@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘ 02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE # 0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→ 02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE # -02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′→ +02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ→ 02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→ 02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→ 02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→ @@ -615,10 +615,10 @@ FF02 ; 0027 0027 ; MA #* ( " → '' ) FULLWIDTH QUOTATION MARK → APOSTROPHE, 201C ; 0027 0027 ; MA #* ( “ → '' ) LEFT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201D ; 0027 0027 ; MA #* ( ” → '' ) RIGHT DOUBLE QUOTATION MARK → APOSTROPHE, APOSTROPHE # →"→ 201F ; 0027 0027 ; MA #* ( ‟ → '' ) DOUBLE HIGH-REVERSED-9 QUOTATION MARK → APOSTROPHE, APOSTROPHE # →“→→"→ +05F4 ; 0027 0027 ; MA #* ( ‎״‎ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 2033 ; 0027 0027 ; MA #* ( ″ → '' ) DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 2036 ; 0027 0027 ; MA #* ( ‶ → '' ) REVERSED DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →‵‵→ 3003 ; 0027 0027 ; MA #* ( 〃 → '' ) DITTO MARK → APOSTROPHE, APOSTROPHE # →″→→"→ -05F4 ; 0027 0027 ; MA #* ( ‎״‎ → '' ) HEBREW PUNCTUATION GERSHAYIM → APOSTROPHE, APOSTROPHE # →"→ 02DD ; 0027 0027 ; MA #* ( ˝ → '' ) DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →"→ 02BA ; 0027 0027 ; MA # ( ʺ → '' ) MODIFIER LETTER DOUBLE PRIME → APOSTROPHE, APOSTROPHE # →"→ 02F6 ; 0027 0027 ; MA #* ( ˶ → '' ) MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT → APOSTROPHE, APOSTROPHE # →˝→→"→ @@ -1417,6 +1417,7 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO 23E8 ; 2081 2080 ; MA #* ( ⏨ → ₁₀ ) DECIMAL EXPONENT SYMBOL → SUBSCRIPT ONE, SUBSCRIPT ZERO # +1CCF2 ; 0032 ; MA # ( 𜳲 → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # 1D7D0 ; 0032 ; MA # ( 𝟐 → 2 ) MATHEMATICAL BOLD DIGIT TWO → DIGIT TWO # 1D7DA ; 0032 ; MA # ( 𝟚 → 2 ) MATHEMATICAL DOUBLE-STRUCK DIGIT TWO → DIGIT TWO # 1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO # @@ -1429,7 +1430,6 @@ A75A ; 0032 ; MA # ( Ꝛ → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO # A644 ; 0032 ; MA # ( Ꙅ → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→ 14BF ; 0032 ; MA # ( ᒿ → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO # A6EF ; 0032 ; MA # ( ꛯ → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→ -1CCF2 ; 0032 ; MA # ( 𜳲 → 2 ) OUTLINED DIGIT TWO → DIGIT TWO # A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO # 06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO # @@ -1491,6 +1491,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI 335A ; 0032 70B9 ; MA #* ( ㍚ → 2点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO → DIGIT TWO, CJK UNIFIED IDEOGRAPH-70B9 # 1D206 ; 0033 ; MA #* ( 𝈆 → 3 ) GREEK VOCAL NOTATION SYMBOL-7 → DIGIT THREE # +1CCF3 ; 0033 ; MA # ( 𜳳 → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 1D7D1 ; 0033 ; MA # ( 𝟑 → 3 ) MATHEMATICAL BOLD DIGIT THREE → DIGIT THREE # 1D7DB ; 0033 ; MA # ( 𝟛 → 3 ) MATHEMATICAL DOUBLE-STRUCK DIGIT THREE → DIGIT THREE # 1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE # @@ -1506,7 +1507,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→ 16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→ 118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE # -1CCF3 ; 0033 ; MA # ( 𜳳 → 3 ) OUTLINED DIGIT THREE → DIGIT THREE # 06F3 ; 0663 ; MA # ( ۳ → ‎٣‎ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE # 1E8C9 ; 0663 ; MA #* ( ‎𞣉‎ → ‎٣‎ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE # @@ -1531,6 +1531,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335B ; 0033 70B9 ; MA #* ( ㍛ → 3点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE → DIGIT THREE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF4 ; 0034 ; MA # ( 𜳴 → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 1D7D2 ; 0034 ; MA # ( 𝟒 → 4 ) MATHEMATICAL BOLD DIGIT FOUR → DIGIT FOUR # 1D7DC ; 0034 ; MA # ( 𝟜 → 4 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR → DIGIT FOUR # 1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR # @@ -1539,7 +1540,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR # 13CE ; 0034 ; MA # ( Ꮞ → 4 ) CHEROKEE LETTER SE → DIGIT FOUR # 118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR # -1CCF4 ; 0034 ; MA # ( 𜳴 → 4 ) OUTLINED DIGIT FOUR → DIGIT FOUR # 06F4 ; 0664 ; MA # ( ۴ → ‎٤‎ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR # @@ -1559,6 +1559,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335C ; 0034 70B9 ; MA #* ( ㍜ → 4点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR → DIGIT FOUR, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF5 ; 0035 ; MA # ( 𜳵 → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 1D7D3 ; 0035 ; MA # ( 𝟓 → 5 ) MATHEMATICAL BOLD DIGIT FIVE → DIGIT FIVE # 1D7DD ; 0035 ; MA # ( 𝟝 → 5 ) MATHEMATICAL DOUBLE-STRUCK DIGIT FIVE → DIGIT FIVE # 1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE # @@ -1567,7 +1568,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE # 01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE # 118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE # -1CCF5 ; 0035 ; MA # ( 𜳵 → 5 ) OUTLINED DIGIT FIVE → DIGIT FIVE # 2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE # @@ -1581,6 +1581,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335D ; 0035 70B9 ; MA #* ( ㍝ → 5点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE → DIGIT FIVE, CJK UNIFIED IDEOGRAPH-70B9 # +1CCF6 ; 0036 ; MA # ( 𜳶 → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 1D7D4 ; 0036 ; MA # ( 𝟔 → 6 ) MATHEMATICAL BOLD DIGIT SIX → DIGIT SIX # 1D7DE ; 0036 ; MA # ( 𝟞 → 6 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SIX → DIGIT SIX # 1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX # @@ -1591,7 +1592,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX # 13EE ; 0036 ; MA # ( Ꮾ → 6 ) CHEROKEE LETTER WV → DIGIT SIX # 118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX # -1CCF6 ; 0036 ; MA # ( 𜳶 → 6 ) OUTLINED DIGIT SIX → DIGIT SIX # 06F6 ; 0666 ; MA # ( ۶ → ‎٦‎ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX # @@ -1610,6 +1610,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 335E ; 0036 70B9 ; MA #* ( ㍞ → 6点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX → DIGIT SIX, CJK UNIFIED IDEOGRAPH-70B9 # 1D212 ; 0037 ; MA #* ( 𝈒 → 7 ) GREEK VOCAL NOTATION SYMBOL-19 → DIGIT SEVEN # +1CCF7 ; 0037 ; MA # ( 𜳷 → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 1D7D5 ; 0037 ; MA # ( 𝟕 → 7 ) MATHEMATICAL BOLD DIGIT SEVEN → DIGIT SEVEN # 1D7DF ; 0037 ; MA # ( 𝟟 → 7 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN → DIGIT SEVEN # 1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN # @@ -1618,7 +1619,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN # 104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN # 118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN # -1CCF7 ; 0037 ; MA # ( 𜳷 → 7 ) OUTLINED DIGIT SEVEN → DIGIT SEVEN # 2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN # @@ -1636,6 +1636,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 09EA ; 0038 ; MA # ( ৪ → 8 ) BENGALI DIGIT FOUR → DIGIT EIGHT # 0A6A ; 0038 ; MA # ( ੪ → 8 ) GURMUKHI DIGIT FOUR → DIGIT EIGHT # 1E8CB ; 0038 ; MA #* ( ‎𞣋‎ → 8 ) MENDE KIKAKUI DIGIT FIVE → DIGIT EIGHT # +1CCF8 ; 0038 ; MA # ( 𜳸 → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 1D7D6 ; 0038 ; MA # ( 𝟖 → 8 ) MATHEMATICAL BOLD DIGIT EIGHT → DIGIT EIGHT # 1D7E0 ; 0038 ; MA # ( 𝟠 → 8 ) MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT → DIGIT EIGHT # 1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT # @@ -1645,7 +1646,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT # 0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT # 1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT # -1CCF8 ; 0038 ; MA # ( 𜳸 → 8 ) OUTLINED DIGIT EIGHT → DIGIT EIGHT # 0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT # @@ -1665,6 +1665,7 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE # 0B68 ; 0039 ; MA # ( ୨ → 9 ) ORIYA DIGIT TWO → DIGIT NINE # 09ED ; 0039 ; MA # ( ৭ → 9 ) BENGALI DIGIT SEVEN → DIGIT NINE # 0D6D ; 0039 ; MA # ( ൭ → 9 ) MALAYALAM DIGIT SEVEN → DIGIT NINE # +1CCF9 ; 0039 ; MA # ( 𜳹 → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 1D7D7 ; 0039 ; MA # ( 𝟗 → 9 ) MATHEMATICAL BOLD DIGIT NINE → DIGIT NINE # 1D7E1 ; 0039 ; MA # ( 𝟡 → 9 ) MATHEMATICAL DOUBLE-STRUCK DIGIT NINE → DIGIT NINE # 1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE # @@ -1676,7 +1677,6 @@ A76E ; 0039 ; MA # ( Ꝯ → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE # 118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE # 118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE # 118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE # -1CCF9 ; 0039 ; MA # ( 𜳹 → 9 ) OUTLINED DIGIT NINE → DIGIT NINE # 0967 ; 0669 ; MA # ( १ → ‎٩‎ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE # 118E4 ; 0669 ; MA # ( 𑣤 → ‎٩‎ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE # @@ -1723,6 +1723,7 @@ FF41 ; 0061 ; MA # ( a → a ) FULLWIDTH LATIN SMALL LETTER A → LATIN SMALL 2DF6 ; 0363 ; MA # ( ⷶ → ͣ ) COMBINING CYRILLIC LETTER A → COMBINING LATIN SMALL LETTER A # FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # →А→ +1CCD6 ; 0041 ; MA #* ( 𜳖 → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 1D400 ; 0041 ; MA # ( 𝐀 → A ) MATHEMATICAL BOLD CAPITAL A → LATIN CAPITAL LETTER A # 1D434 ; 0041 ; MA # ( 𝐴 → A ) MATHEMATICAL ITALIC CAPITAL A → LATIN CAPITAL LETTER A # 1D468 ; 0041 ; MA # ( 𝑨 → A ) MATHEMATICAL BOLD ITALIC CAPITAL A → LATIN CAPITAL LETTER A # @@ -1748,7 +1749,6 @@ FF21 ; 0041 ; MA # ( A → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPI A4EE ; 0041 ; MA # ( ꓮ → A ) LISU LETTER A → LATIN CAPITAL LETTER A # 16F40 ; 0041 ; MA # ( 𖽀 → A ) MIAO LETTER ZZYA → LATIN CAPITAL LETTER A # 102A0 ; 0041 ; MA # ( 𐊠 → A ) CARIAN LETTER A → LATIN CAPITAL LETTER A # -1CCD6 ; 0041 ; MA #* ( 𜳖 → A ) OUTLINED LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A # 2376 ; 0061 0332 ; MA #* ( ⍶ → a̲ ) APL FUNCTIONAL SYMBOL ALPHA UNDERBAR → LATIN SMALL LETTER A, COMBINING LOW LINE # →α̲→→ɑ̲→ @@ -1826,6 +1826,7 @@ A4EF ; 2C6F ; MA # ( ꓯ → Ɐ ) LISU LETTER AE → LATIN CAPITAL LETTER TURNE FF22 ; 0042 ; MA # ( B → B ) FULLWIDTH LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # →Β→ 212C ; 0042 ; MA # ( ℬ → B ) SCRIPT CAPITAL B → LATIN CAPITAL LETTER B # +1CCD7 ; 0042 ; MA #* ( 𜳗 → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 1D401 ; 0042 ; MA # ( 𝐁 → B ) MATHEMATICAL BOLD CAPITAL B → LATIN CAPITAL LETTER B # 1D435 ; 0042 ; MA # ( 𝐵 → B ) MATHEMATICAL ITALIC CAPITAL B → LATIN CAPITAL LETTER B # 1D469 ; 0042 ; MA # ( 𝑩 → B ) MATHEMATICAL BOLD ITALIC CAPITAL B → LATIN CAPITAL LETTER B # @@ -1852,7 +1853,6 @@ A4D0 ; 0042 ; MA # ( ꓐ → B ) LISU LETTER BA → LATIN CAPITAL LETTER B # 10282 ; 0042 ; MA # ( 𐊂 → B ) LYCIAN LETTER B → LATIN CAPITAL LETTER B # 102A1 ; 0042 ; MA # ( 𐊡 → B ) CARIAN LETTER P2 → LATIN CAPITAL LETTER B # 10301 ; 0042 ; MA # ( 𐌁 → B ) OLD ITALIC LETTER BE → LATIN CAPITAL LETTER B # -1CCD7 ; 0042 ; MA #* ( 𜳗 → B ) OUTLINED LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B # 0253 ; 0062 0314 ; MA # ( ɓ → b̔ ) LATIN SMALL LETTER B WITH HOOK → LATIN SMALL LETTER B, COMBINING REVERSED COMMA ABOVE # @@ -1910,6 +1910,7 @@ FF23 ; 0043 ; MA # ( C → C ) FULLWIDTH LATIN CAPITAL LETTER C → LATIN CAPI 216D ; 0043 ; MA # ( Ⅽ → C ) ROMAN NUMERAL ONE HUNDRED → LATIN CAPITAL LETTER C # 2102 ; 0043 ; MA # ( ℂ → C ) DOUBLE-STRUCK CAPITAL C → LATIN CAPITAL LETTER C # 212D ; 0043 ; MA # ( ℭ → C ) BLACK-LETTER CAPITAL C → LATIN CAPITAL LETTER C # +1CCD8 ; 0043 ; MA #* ( 𜳘 → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 1D402 ; 0043 ; MA # ( 𝐂 → C ) MATHEMATICAL BOLD CAPITAL C → LATIN CAPITAL LETTER C # 1D436 ; 0043 ; MA # ( 𝐶 → C ) MATHEMATICAL ITALIC CAPITAL C → LATIN CAPITAL LETTER C # 1D46A ; 0043 ; MA # ( 𝑪 → C ) MATHEMATICAL BOLD ITALIC CAPITAL C → LATIN CAPITAL LETTER C # @@ -1930,7 +1931,6 @@ A4DA ; 0043 ; MA # ( ꓚ → C ) LISU LETTER CA → LATIN CAPITAL LETTER C # 10302 ; 0043 ; MA # ( 𐌂 → C ) OLD ITALIC LETTER KE → LATIN CAPITAL LETTER C # 10415 ; 0043 ; MA # ( 𐐕 → C ) DESERET CAPITAL LETTER CHEE → LATIN CAPITAL LETTER C # 1051C ; 0043 ; MA # ( 𐔜 → C ) ELBASAN LETTER SHE → LATIN CAPITAL LETTER C # -1CCD8 ; 0043 ; MA #* ( 𜳘 → C ) OUTLINED LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # 00A2 ; 0063 0338 ; MA #* ( ¢ → c̸ ) CENT SIGN → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # 023C ; 0063 0338 ; MA # ( ȼ → c̸ ) LATIN SMALL LETTER C WITH STROKE → LATIN SMALL LETTER C, COMBINING LONG SOLIDUS OVERLAY # →¢→ @@ -2006,6 +2006,7 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 216E ; 0044 ; MA # ( Ⅾ → D ) ROMAN NUMERAL FIVE HUNDRED → LATIN CAPITAL LETTER D # 2145 ; 0044 ; MA # ( ⅅ → D ) DOUBLE-STRUCK ITALIC CAPITAL D → LATIN CAPITAL LETTER D # +1CCD9 ; 0044 ; MA #* ( 𜳙 → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 1D403 ; 0044 ; MA # ( 𝐃 → D ) MATHEMATICAL BOLD CAPITAL D → LATIN CAPITAL LETTER D # 1D437 ; 0044 ; MA # ( 𝐷 → D ) MATHEMATICAL ITALIC CAPITAL D → LATIN CAPITAL LETTER D # 1D46B ; 0044 ; MA # ( 𝑫 → D ) MATHEMATICAL BOLD ITALIC CAPITAL D → LATIN CAPITAL LETTER D # @@ -2023,7 +2024,6 @@ A4D2 ; 0064 ; MA # ( ꓒ → d ) LISU LETTER PHA → LATIN SMALL LETTER D # 15DE ; 0044 ; MA # ( ᗞ → D ) CANADIAN SYLLABICS CARRIER THE → LATIN CAPITAL LETTER D # 15EA ; 0044 ; MA # ( ᗪ → D ) CANADIAN SYLLABICS CARRIER PE → LATIN CAPITAL LETTER D # →ᗞ→ A4D3 ; 0044 ; MA # ( ꓓ → D ) LISU LETTER DA → LATIN CAPITAL LETTER D # -1CCD9 ; 0044 ; MA #* ( 𜳙 → D ) OUTLINED LATIN CAPITAL LETTER D → LATIN CAPITAL LETTER D # 0257 ; 0064 0314 ; MA # ( ɗ → d̔ ) LATIN SMALL LETTER D WITH HOOK → LATIN SMALL LETTER D, COMBINING REVERSED COMMA ABOVE # @@ -2099,6 +2099,7 @@ AB32 ; 0065 ; MA # ( ꬲ → e ) LATIN SMALL LETTER BLACKLETTER E → LATIN SMAL 22FF ; 0045 ; MA #* ( ⋿ → E ) Z NOTATION BAG MEMBERSHIP → LATIN CAPITAL LETTER E # FF25 ; 0045 ; MA # ( E → E ) FULLWIDTH LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # →Ε→ 2130 ; 0045 ; MA # ( ℰ → E ) SCRIPT CAPITAL E → LATIN CAPITAL LETTER E # +1CCDA ; 0045 ; MA #* ( 𜳚 → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 1D404 ; 0045 ; MA # ( 𝐄 → E ) MATHEMATICAL BOLD CAPITAL E → LATIN CAPITAL LETTER E # 1D438 ; 0045 ; MA # ( 𝐸 → E ) MATHEMATICAL ITALIC CAPITAL E → LATIN CAPITAL LETTER E # 1D46C ; 0045 ; MA # ( 𝑬 → E ) MATHEMATICAL BOLD ITALIC CAPITAL E → LATIN CAPITAL LETTER E # @@ -2124,7 +2125,6 @@ A4F0 ; 0045 ; MA # ( ꓰ → E ) LISU LETTER E → LATIN CAPITAL LETTER E # 118A6 ; 0045 ; MA # ( 𑢦 → E ) WARANG CITI CAPITAL LETTER II → LATIN CAPITAL LETTER E # 118AE ; 0045 ; MA # ( 𑢮 → E ) WARANG CITI CAPITAL LETTER YUJ → LATIN CAPITAL LETTER E # 10286 ; 0045 ; MA # ( 𐊆 → E ) LYCIAN LETTER I → LATIN CAPITAL LETTER E # -1CCDA ; 0045 ; MA #* ( 𜳚 → E ) OUTLINED LATIN CAPITAL LETTER E → LATIN CAPITAL LETTER E # 011B ; 0115 ; MA # ( ě → ĕ ) LATIN SMALL LETTER E WITH CARON → LATIN SMALL LETTER E WITH BREVE # @@ -2195,6 +2195,7 @@ A799 ; 0066 ; MA # ( ꞙ → f ) LATIN SMALL LETTER F WITH STROKE → LATIN SMAL 1D213 ; 0046 ; MA #* ( 𝈓 → F ) GREEK VOCAL NOTATION SYMBOL-20 → LATIN CAPITAL LETTER F # →Ϝ→ 2131 ; 0046 ; MA # ( ℱ → F ) SCRIPT CAPITAL F → LATIN CAPITAL LETTER F # +1CCDB ; 0046 ; MA #* ( 𜳛 → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 1D405 ; 0046 ; MA # ( 𝐅 → F ) MATHEMATICAL BOLD CAPITAL F → LATIN CAPITAL LETTER F # 1D439 ; 0046 ; MA # ( 𝐹 → F ) MATHEMATICAL ITALIC CAPITAL F → LATIN CAPITAL LETTER F # 1D46D ; 0046 ; MA # ( 𝑭 → F ) MATHEMATICAL BOLD ITALIC CAPITAL F → LATIN CAPITAL LETTER F # @@ -2217,7 +2218,6 @@ A4DD ; 0046 ; MA # ( ꓝ → F ) LISU LETTER TSA → LATIN CAPITAL LETTER F # 10287 ; 0046 ; MA # ( 𐊇 → F ) LYCIAN LETTER W → LATIN CAPITAL LETTER F # 102A5 ; 0046 ; MA # ( 𐊥 → F ) CARIAN LETTER R → LATIN CAPITAL LETTER F # 10525 ; 0046 ; MA # ( 𐔥 → F ) ELBASAN LETTER GHE → LATIN CAPITAL LETTER F # -1CCDB ; 0046 ; MA #* ( 𜳛 → F ) OUTLINED LATIN CAPITAL LETTER F → LATIN CAPITAL LETTER F # 0192 ; 0066 0326 ; MA # ( ƒ → f̦ ) LATIN SMALL LETTER F WITH HOOK → LATIN SMALL LETTER F, COMBINING COMMA BELOW # →f̡→ @@ -2264,6 +2264,7 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 018D ; 0067 ; MA # ( ƍ → g ) LATIN SMALL LETTER TURNED DELTA → LATIN SMALL LETTER G # 0581 ; 0067 ; MA # ( ց → g ) ARMENIAN SMALL LETTER CO → LATIN SMALL LETTER G # +1CCDC ; 0047 ; MA #* ( 𜳜 → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1D406 ; 0047 ; MA # ( 𝐆 → G ) MATHEMATICAL BOLD CAPITAL G → LATIN CAPITAL LETTER G # 1D43A ; 0047 ; MA # ( 𝐺 → G ) MATHEMATICAL ITALIC CAPITAL G → LATIN CAPITAL LETTER G # 1D46E ; 0047 ; MA # ( 𝑮 → G ) MATHEMATICAL BOLD ITALIC CAPITAL G → LATIN CAPITAL LETTER G # @@ -2281,7 +2282,6 @@ FF47 ; 0067 ; MA # ( g → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL 13C0 ; 0047 ; MA # ( Ꮐ → G ) CHEROKEE LETTER NAH → LATIN CAPITAL LETTER G # 13F3 ; 0047 ; MA # ( Ᏻ → G ) CHEROKEE LETTER YU → LATIN CAPITAL LETTER G # A4D6 ; 0047 ; MA # ( ꓖ → G ) LISU LETTER GA → LATIN CAPITAL LETTER G # -1CCDC ; 0047 ; MA #* ( 𜳜 → G ) OUTLINED LATIN CAPITAL LETTER G → LATIN CAPITAL LETTER G # 1DA2 ; 1D4D ; MA # ( ᶢ → ᵍ ) MODIFIER LETTER SMALL SCRIPT G → MODIFIER LETTER SMALL G # @@ -2325,6 +2325,7 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 210B ; 0048 ; MA # ( ℋ → H ) SCRIPT CAPITAL H → LATIN CAPITAL LETTER H # 210C ; 0048 ; MA # ( ℌ → H ) BLACK-LETTER CAPITAL H → LATIN CAPITAL LETTER H # 210D ; 0048 ; MA # ( ℍ → H ) DOUBLE-STRUCK CAPITAL H → LATIN CAPITAL LETTER H # +1CCDD ; 0048 ; MA #* ( 𜳝 → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D407 ; 0048 ; MA # ( 𝐇 → H ) MATHEMATICAL BOLD CAPITAL H → LATIN CAPITAL LETTER H # 1D43B ; 0048 ; MA # ( 𝐻 → H ) MATHEMATICAL ITALIC CAPITAL H → LATIN CAPITAL LETTER H # 1D46F ; 0048 ; MA # ( 𝑯 → H ) MATHEMATICAL BOLD ITALIC CAPITAL H → LATIN CAPITAL LETTER H # @@ -2347,7 +2348,6 @@ FF28 ; 0048 ; MA # ( H → H ) FULLWIDTH LATIN CAPITAL LETTER H → LATIN CAPI 157C ; 0048 ; MA # ( ᕼ → H ) CANADIAN SYLLABICS NUNAVUT H → LATIN CAPITAL LETTER H # A4E7 ; 0048 ; MA # ( ꓧ → H ) LISU LETTER XA → LATIN CAPITAL LETTER H # 102CF ; 0048 ; MA # ( 𐋏 → H ) CARIAN LETTER E2 → LATIN CAPITAL LETTER H # -1CCDD ; 0048 ; MA #* ( 𜳝 → H ) OUTLINED LATIN CAPITAL LETTER H → LATIN CAPITAL LETTER H # 1D78 ; 1D34 ; MA # ( ᵸ → ᴴ ) MODIFIER LETTER CYRILLIC EN → MODIFIER LETTER CAPITAL H # @@ -2465,6 +2465,7 @@ FF4A ; 006A ; MA # ( j → j ) FULLWIDTH LATIN SMALL LETTER J → LATIN SMALL 0458 ; 006A ; MA # ( ј → j ) CYRILLIC SMALL LETTER JE → LATIN SMALL LETTER J # FF2A ; 004A ; MA # ( J → J ) FULLWIDTH LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # →Ј→ +1CCDF ; 004A ; MA #* ( 𜳟 → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 1D409 ; 004A ; MA # ( 𝐉 → J ) MATHEMATICAL BOLD CAPITAL J → LATIN CAPITAL LETTER J # 1D43D ; 004A ; MA # ( 𝐽 → J ) MATHEMATICAL ITALIC CAPITAL J → LATIN CAPITAL LETTER J # 1D471 ; 004A ; MA # ( 𝑱 → J ) MATHEMATICAL BOLD ITALIC CAPITAL J → LATIN CAPITAL LETTER J # @@ -2484,7 +2485,6 @@ A7B2 ; 004A ; MA # ( Ʝ → J ) LATIN CAPITAL LETTER J WITH CROSSED-TAIL → LA 13AB ; 004A ; MA # ( Ꭻ → J ) CHEROKEE LETTER GU → LATIN CAPITAL LETTER J # 148D ; 004A ; MA # ( ᒍ → J ) CANADIAN SYLLABICS CO → LATIN CAPITAL LETTER J # A4D9 ; 004A ; MA # ( ꓙ → J ) LISU LETTER JA → LATIN CAPITAL LETTER J # -1CCDF ; 004A ; MA #* ( 𜳟 → J ) OUTLINED LATIN CAPITAL LETTER J → LATIN CAPITAL LETTER J # 0249 ; 006A 0335 ; MA # ( ɉ → j̵ ) LATIN SMALL LETTER J WITH STROKE → LATIN SMALL LETTER J, COMBINING SHORT STROKE OVERLAY # @@ -2513,6 +2513,7 @@ AB7B ; 1D0A ; MA # ( ꭻ → ᴊ ) CHEROKEE SMALL LETTER GU → LATIN LETTER SMA 212A ; 004B ; MA # ( K → K ) KELVIN SIGN → LATIN CAPITAL LETTER K # FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # →Κ→ +1CCE0 ; 004B ; MA #* ( 𜳠 → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 1D40A ; 004B ; MA # ( 𝐊 → K ) MATHEMATICAL BOLD CAPITAL K → LATIN CAPITAL LETTER K # 1D43E ; 004B ; MA # ( 𝐾 → K ) MATHEMATICAL ITALIC CAPITAL K → LATIN CAPITAL LETTER K # 1D472 ; 004B ; MA # ( 𝑲 → K ) MATHEMATICAL BOLD ITALIC CAPITAL K → LATIN CAPITAL LETTER K # @@ -2538,7 +2539,6 @@ FF2B ; 004B ; MA # ( K → K ) FULLWIDTH LATIN CAPITAL LETTER K → LATIN CAPI 16D5 ; 004B ; MA # ( ᛕ → K ) RUNIC LETTER OPEN-P → LATIN CAPITAL LETTER K # A4D7 ; 004B ; MA # ( ꓗ → K ) LISU LETTER KA → LATIN CAPITAL LETTER K # 10518 ; 004B ; MA # ( 𐔘 → K ) ELBASAN LETTER QE → LATIN CAPITAL LETTER K # -1CCE0 ; 004B ; MA #* ( 𜳠 → K ) OUTLINED LATIN CAPITAL LETTER K → LATIN CAPITAL LETTER K # 0199 ; 006B 0314 ; MA # ( ƙ → k̔ ) LATIN SMALL LETTER K WITH HOOK → LATIN SMALL LETTER K, COMBINING REVERSED COMMA ABOVE # @@ -2561,6 +2561,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL 06F1 ; 006C ; MA # ( ۱ → l ) EXTENDED ARABIC-INDIC DIGIT ONE → LATIN SMALL LETTER L # →1→ 10320 ; 006C ; MA #* ( 𐌠 → l ) OLD ITALIC NUMERAL ONE → LATIN SMALL LETTER L # →𐌉→→I→ 1E8C7 ; 006C ; MA #* ( ‎𞣇‎ → l ) MENDE KIKAKUI DIGIT ONE → LATIN SMALL LETTER L # +1CCF1 ; 006C ; MA # ( 𜳱 → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7CF ; 006C ; MA # ( 𝟏 → l ) MATHEMATICAL BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7D9 ; 006C ; MA # ( 𝟙 → l ) MATHEMATICAL DOUBLE-STRUCK DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→ @@ -2572,6 +2573,7 @@ FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMAL 2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→ 2110 ; 006C ; MA # ( ℐ → l ) SCRIPT CAPITAL I → LATIN SMALL LETTER L # →I→ 2111 ; 006C ; MA # ( ℑ → l ) BLACK-LETTER CAPITAL I → LATIN SMALL LETTER L # →I→ +1CCDE ; 006C ; MA #* ( 𜳞 → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ 1D408 ; 006C ; MA # ( 𝐈 → l ) MATHEMATICAL BOLD CAPITAL I → LATIN SMALL LETTER L # →I→ 1D43C ; 006C ; MA # ( 𝐼 → l ) MATHEMATICAL ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ 1D470 ; 006C ; MA # ( 𝑰 → l ) MATHEMATICAL BOLD ITALIC CAPITAL I → LATIN SMALL LETTER L # →I→ @@ -2624,12 +2626,11 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I 16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→ 1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→ 10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→ -1CCDE ; 006C ; MA #* ( 𜳞 → l ) OUTLINED LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →I→ -1CCF1 ; 006C ; MA # ( 𜳱 → l ) OUTLINED DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L # 216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L # 2112 ; 004C ; MA # ( ℒ → L ) SCRIPT CAPITAL L → LATIN CAPITAL LETTER L # +1CCE1 ; 004C ; MA #* ( 𜳡 → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # 1D40B ; 004C ; MA # ( 𝐋 → L ) MATHEMATICAL BOLD CAPITAL L → LATIN CAPITAL LETTER L # 1D43F ; 004C ; MA # ( 𝐿 → L ) MATHEMATICAL ITALIC CAPITAL L → LATIN CAPITAL LETTER L # 1D473 ; 004C ; MA # ( 𝑳 → L ) MATHEMATICAL BOLD ITALIC CAPITAL L → LATIN CAPITAL LETTER L # @@ -2651,7 +2652,6 @@ A4E1 ; 004C ; MA # ( ꓡ → L ) LISU LETTER LA → LATIN CAPITAL LETTER L # 118B2 ; 004C ; MA # ( 𑢲 → L ) WARANG CITI CAPITAL LETTER TTE → LATIN CAPITAL LETTER L # 1041B ; 004C ; MA # ( 𐐛 → L ) DESERET CAPITAL LETTER ETH → LATIN CAPITAL LETTER L # 10526 ; 004C ; MA # ( 𐔦 → L ) ELBASAN LETTER GHAMMA → LATIN CAPITAL LETTER L # -1CCE1 ; 004C ; MA #* ( 𜳡 → L ) OUTLINED LATIN CAPITAL LETTER L → LATIN CAPITAL LETTER L # FD3C ; 006C 030B ; MA # ( ‎ﴼ‎ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →‎اً‎→ FD3D ; 006C 030B ; MA # ( ‎ﴽ‎ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →‎اً‎→ @@ -2805,6 +2805,7 @@ ABAE ; 029F ; MA # ( ꮮ → ʟ ) CHEROKEE SMALL LETTER TLE → LATIN LETTER SMA FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # →Μ→ 216F ; 004D ; MA # ( Ⅿ → M ) ROMAN NUMERAL ONE THOUSAND → LATIN CAPITAL LETTER M # 2133 ; 004D ; MA # ( ℳ → M ) SCRIPT CAPITAL M → LATIN CAPITAL LETTER M # +1CCE2 ; 004D ; MA #* ( 𜳢 → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 1D40C ; 004D ; MA # ( 𝐌 → M ) MATHEMATICAL BOLD CAPITAL M → LATIN CAPITAL LETTER M # 1D440 ; 004D ; MA # ( 𝑀 → M ) MATHEMATICAL ITALIC CAPITAL M → LATIN CAPITAL LETTER M # 1D474 ; 004D ; MA # ( 𝑴 → M ) MATHEMATICAL BOLD ITALIC CAPITAL M → LATIN CAPITAL LETTER M # @@ -2832,7 +2833,6 @@ FF2D ; 004D ; MA # ( M → M ) FULLWIDTH LATIN CAPITAL LETTER M → LATIN CAPI A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # 102B0 ; 004D ; MA # ( 𐊰 → M ) CARIAN LETTER S → LATIN CAPITAL LETTER M # 10311 ; 004D ; MA # ( 𐌑 → M ) OLD ITALIC LETTER SHE → LATIN CAPITAL LETTER M # -1CCE2 ; 004D ; MA #* ( 𜳢 → M ) OUTLINED LATIN CAPITAL LETTER M → LATIN CAPITAL LETTER M # 04CD ; 004D 0326 ; MA # ( Ӎ → M̦ ) CYRILLIC CAPITAL LETTER EM WITH TAIL → LATIN CAPITAL LETTER M, COMBINING COMMA BELOW # →М̡→ @@ -2858,6 +2858,7 @@ A4DF ; 004D ; MA # ( ꓟ → M ) LISU LETTER MA → LATIN CAPITAL LETTER M # FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # →Ν→ 2115 ; 004E ; MA # ( ℕ → N ) DOUBLE-STRUCK CAPITAL N → LATIN CAPITAL LETTER N # +1CCE3 ; 004E ; MA #* ( 𜳣 → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1D40D ; 004E ; MA # ( 𝐍 → N ) MATHEMATICAL BOLD CAPITAL N → LATIN CAPITAL LETTER N # 1D441 ; 004E ; MA # ( 𝑁 → N ) MATHEMATICAL ITALIC CAPITAL N → LATIN CAPITAL LETTER N # 1D475 ; 004E ; MA # ( 𝑵 → N ) MATHEMATICAL BOLD ITALIC CAPITAL N → LATIN CAPITAL LETTER N # @@ -2879,7 +2880,6 @@ FF2E ; 004E ; MA # ( N → N ) FULLWIDTH LATIN CAPITAL LETTER N → LATIN CAPI 2C9A ; 004E ; MA # ( Ⲛ → N ) COPTIC CAPITAL LETTER NI → LATIN CAPITAL LETTER N # A4E0 ; 004E ; MA # ( ꓠ → N ) LISU LETTER NA → LATIN CAPITAL LETTER N # 10513 ; 004E ; MA # ( 𐔓 → N ) ELBASAN LETTER NE → LATIN CAPITAL LETTER N # -1CCE3 ; 004E ; MA #* ( 𜳣 → N ) OUTLINED LATIN CAPITAL LETTER N → LATIN CAPITAL LETTER N # 1018E ; 004E 030A ; MA #* ( 𐆎 → N̊ ) NOMISMA SIGN → LATIN CAPITAL LETTER N, COMBINING RING ABOVE # →Νͦ→ @@ -2994,6 +2994,7 @@ FBA6 ; 006F ; MA # ( ‎ﮦ‎ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 3007 ; 004F ; MA # ( 〇 → O ) IDEOGRAPHIC NUMBER ZERO → LATIN CAPITAL LETTER O # 114D0 ; 004F ; MA # ( 𑓐 → O ) TIRHUTA DIGIT ZERO → LATIN CAPITAL LETTER O # →০→→0→ 118E0 ; 004F ; MA # ( 𑣠 → O ) WARANG CITI DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ +1CCF0 ; 004F ; MA # ( 𜳰 → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7CE ; 004F ; MA # ( 𝟎 → O ) MATHEMATICAL BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7D8 ; 004F ; MA # ( 𝟘 → O ) MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ @@ -3001,6 +3002,7 @@ FBA6 ; 006F ; MA # ( ‎ﮦ‎ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → 1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ FF2F ; 004F ; MA # ( O → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→ +1CCE4 ; 004F ; MA #* ( 𜳤 → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # 1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O # 1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O # 1D476 ; 004F ; MA # ( 𝑶 → O ) MATHEMATICAL BOLD ITALIC CAPITAL O → LATIN CAPITAL LETTER O # @@ -3033,8 +3035,6 @@ A4F3 ; 004F ; MA # ( ꓳ → O ) LISU LETTER O → LATIN CAPITAL LETTER O # 102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O # 10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O # 10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O # -1CCE4 ; 004F ; MA #* ( 𜳤 → O ) OUTLINED LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # -1CCF0 ; 004F ; MA # ( 𜳰 → O ) OUTLINED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→ 2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR # 1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→ @@ -3202,6 +3202,7 @@ FF50 ; 0070 ; MA # ( p → p ) FULLWIDTH LATIN SMALL LETTER P → LATIN SMALL FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # →Р→ 2119 ; 0050 ; MA # ( ℙ → P ) DOUBLE-STRUCK CAPITAL P → LATIN CAPITAL LETTER P # +1CCE5 ; 0050 ; MA #* ( 𜳥 → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 1D40F ; 0050 ; MA # ( 𝐏 → P ) MATHEMATICAL BOLD CAPITAL P → LATIN CAPITAL LETTER P # 1D443 ; 0050 ; MA # ( 𝑃 → P ) MATHEMATICAL ITALIC CAPITAL P → LATIN CAPITAL LETTER P # 1D477 ; 0050 ; MA # ( 𝑷 → P ) MATHEMATICAL BOLD ITALIC CAPITAL P → LATIN CAPITAL LETTER P # @@ -3226,7 +3227,6 @@ FF30 ; 0050 ; MA # ( P → P ) FULLWIDTH LATIN CAPITAL LETTER P → LATIN CAPI 146D ; 0050 ; MA # ( ᑭ → P ) CANADIAN SYLLABICS KI → LATIN CAPITAL LETTER P # A4D1 ; 0050 ; MA # ( ꓑ → P ) LISU LETTER PA → LATIN CAPITAL LETTER P # 10295 ; 0050 ; MA # ( 𐊕 → P ) LYCIAN LETTER R → LATIN CAPITAL LETTER P # -1CCE5 ; 0050 ; MA #* ( 𜳥 → P ) OUTLINED LATIN CAPITAL LETTER P → LATIN CAPITAL LETTER P # 01A5 ; 0070 0314 ; MA # ( ƥ → p̔ ) LATIN SMALL LETTER P WITH HOOK → LATIN SMALL LETTER P, COMBINING REVERSED COMMA ABOVE # @@ -3272,6 +3272,7 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 0566 ; 0071 ; MA # ( զ → q ) ARMENIAN SMALL LETTER ZA → LATIN SMALL LETTER Q # 211A ; 0051 ; MA # ( ℚ → Q ) DOUBLE-STRUCK CAPITAL Q → LATIN CAPITAL LETTER Q # +1CCE6 ; 0051 ; MA #* ( 𜳦 → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 1D410 ; 0051 ; MA # ( 𝐐 → Q ) MATHEMATICAL BOLD CAPITAL Q → LATIN CAPITAL LETTER Q # 1D444 ; 0051 ; MA # ( 𝑄 → Q ) MATHEMATICAL ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D478 ; 0051 ; MA # ( 𝑸 → Q ) MATHEMATICAL BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # @@ -3285,7 +3286,6 @@ ABB2 ; 1D18 ; MA # ( ꮲ → ᴘ ) CHEROKEE SMALL LETTER TLV → LATIN LETTER SM 1D64C ; 0051 ; MA # ( 𝙌 → Q ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Q → LATIN CAPITAL LETTER Q # 1D680 ; 0051 ; MA # ( 𝚀 → Q ) MATHEMATICAL MONOSPACE CAPITAL Q → LATIN CAPITAL LETTER Q # 2D55 ; 0051 ; MA # ( ⵕ → Q ) TIFINAGH LETTER YARR → LATIN CAPITAL LETTER Q # -1CCE6 ; 0051 ; MA #* ( 𜳦 → Q ) OUTLINED LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER Q # 02A0 ; 0071 0314 ; MA # ( ʠ → q̔ ) LATIN SMALL LETTER Q WITH HOOK → LATIN SMALL LETTER Q, COMBINING REVERSED COMMA ABOVE # @@ -3338,6 +3338,7 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 211B ; 0052 ; MA # ( ℛ → R ) SCRIPT CAPITAL R → LATIN CAPITAL LETTER R # 211C ; 0052 ; MA # ( ℜ → R ) BLACK-LETTER CAPITAL R → LATIN CAPITAL LETTER R # 211D ; 0052 ; MA # ( ℝ → R ) DOUBLE-STRUCK CAPITAL R → LATIN CAPITAL LETTER R # +1CCE7 ; 0052 ; MA #* ( 𜳧 → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 1D411 ; 0052 ; MA # ( 𝐑 → R ) MATHEMATICAL BOLD CAPITAL R → LATIN CAPITAL LETTER R # 1D445 ; 0052 ; MA # ( 𝑅 → R ) MATHEMATICAL ITALIC CAPITAL R → LATIN CAPITAL LETTER R # 1D479 ; 0052 ; MA # ( 𝑹 → R ) MATHEMATICAL BOLD ITALIC CAPITAL R → LATIN CAPITAL LETTER R # @@ -3355,7 +3356,6 @@ AB81 ; 0072 ; MA # ( ꮁ → r ) CHEROKEE SMALL LETTER HU → LATIN SMALL LETTER 1587 ; 0052 ; MA # ( ᖇ → R ) CANADIAN SYLLABICS TLHI → LATIN CAPITAL LETTER R # A4E3 ; 0052 ; MA # ( ꓣ → R ) LISU LETTER ZHA → LATIN CAPITAL LETTER R # 16F35 ; 0052 ; MA # ( 𖼵 → R ) MIAO LETTER ZHA → LATIN CAPITAL LETTER R # -1CCE7 ; 0052 ; MA #* ( 𜳧 → R ) OUTLINED LATIN CAPITAL LETTER R → LATIN CAPITAL LETTER R # 027D ; 0072 0328 ; MA # ( ɽ → r̨ ) LATIN SMALL LETTER R WITH TAIL → LATIN SMALL LETTER R, COMBINING OGONEK # @@ -3425,6 +3425,7 @@ ABAA ; 0073 ; MA # ( ꮪ → s ) CHEROKEE SMALL LETTER DU → LATIN SMALL LETTER 10448 ; 0073 ; MA # ( 𐑈 → s ) DESERET SMALL LETTER ZHEE → LATIN SMALL LETTER S # FF33 ; 0053 ; MA # ( S → S ) FULLWIDTH LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # →Ѕ→ +1CCE8 ; 0053 ; MA #* ( 𜳨 → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 1D412 ; 0053 ; MA # ( 𝐒 → S ) MATHEMATICAL BOLD CAPITAL S → LATIN CAPITAL LETTER S # 1D446 ; 0053 ; MA # ( 𝑆 → S ) MATHEMATICAL ITALIC CAPITAL S → LATIN CAPITAL LETTER S # 1D47A ; 0053 ; MA # ( 𝑺 → S ) MATHEMATICAL BOLD ITALIC CAPITAL S → LATIN CAPITAL LETTER S # @@ -3446,13 +3447,14 @@ A4E2 ; 0053 ; MA # ( ꓢ → S ) LISU LETTER SA → LATIN CAPITAL LETTER S # 16F3A ; 0053 ; MA # ( 𖼺 → S ) MIAO LETTER SA → LATIN CAPITAL LETTER S # 10296 ; 0053 ; MA # ( 𐊖 → S ) LYCIAN LETTER S → LATIN CAPITAL LETTER S # 10420 ; 0053 ; MA # ( 𐐠 → S ) DESERET CAPITAL LETTER ZHEE → LATIN CAPITAL LETTER S # -1CCE8 ; 0053 ; MA #* ( 𜳨 → S ) OUTLINED LATIN CAPITAL LETTER S → LATIN CAPITAL LETTER S # 0282 ; 0073 0328 ; MA # ( ʂ → s̨ ) LATIN SMALL LETTER S WITH HOOK → LATIN SMALL LETTER S, COMBINING OGONEK # 1D74 ; 0073 0334 ; MA # ( ᵴ → s̴ ) LATIN SMALL LETTER S WITH MIDDLE TILDE → LATIN SMALL LETTER S, COMBINING TILDE OVERLAY # A7B5 ; 00DF ; MA # ( ꞵ → ß ) LATIN SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # →β→ +1E9E ; 00DF ; MA # ( ẞ → ß ) LATIN CAPITAL LETTER SHARP S → LATIN SMALL LETTER SHARP S # +A7D6 ; 00DF ; MA # ( Ꟗ → ß ) LATIN CAPITAL LETTER MIDDLE SCOTS S → LATIN SMALL LETTER SHARP S # →β→ 03B2 ; 00DF ; MA # ( β → ß ) GREEK SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # 03D0 ; 00DF ; MA # ( ϐ → ß ) GREEK BETA SYMBOL → LATIN SMALL LETTER SHARP S # →β→ 1D6C3 ; 00DF ; MA # ( 𝛃 → ß ) MATHEMATICAL BOLD SMALL BETA → LATIN SMALL LETTER SHARP S # →β→ @@ -3503,6 +3505,7 @@ AB4D ; 0283 ; MA # ( ꭍ → ʃ ) LATIN SMALL LETTER BASELINE ESH → LATIN SMAL 27D9 ; 0054 ; MA #* ( ⟙ → T ) LARGE DOWN TACK → LATIN CAPITAL LETTER T # 1F768 ; 0054 ; MA #* ( 🝨 → T ) ALCHEMICAL SYMBOL FOR CRUCIBLE-4 → LATIN CAPITAL LETTER T # FF34 ; 0054 ; MA # ( T → T ) FULLWIDTH LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # →Т→ +1CCE9 ; 0054 ; MA #* ( 𜳩 → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 1D413 ; 0054 ; MA # ( 𝐓 → T ) MATHEMATICAL BOLD CAPITAL T → LATIN CAPITAL LETTER T # 1D447 ; 0054 ; MA # ( 𝑇 → T ) MATHEMATICAL ITALIC CAPITAL T → LATIN CAPITAL LETTER T # 1D47B ; 0054 ; MA # ( 𝑻 → T ) MATHEMATICAL BOLD ITALIC CAPITAL T → LATIN CAPITAL LETTER T # @@ -3531,7 +3534,6 @@ A4D4 ; 0054 ; MA # ( ꓔ → T ) LISU LETTER TA → LATIN CAPITAL LETTER T # 10297 ; 0054 ; MA # ( 𐊗 → T ) LYCIAN LETTER T → LATIN CAPITAL LETTER T # 102B1 ; 0054 ; MA # ( 𐊱 → T ) CARIAN LETTER C-18 → LATIN CAPITAL LETTER T # 10315 ; 0054 ; MA # ( 𐌕 → T ) OLD ITALIC LETTER TE → LATIN CAPITAL LETTER T # -1CCE9 ; 0054 ; MA #* ( 𜳩 → T ) OUTLINED LATIN CAPITAL LETTER T → LATIN CAPITAL LETTER T # 01AD ; 0074 0314 ; MA # ( ƭ → t̔ ) LATIN SMALL LETTER T WITH HOOK → LATIN SMALL LETTER T, COMBINING REVERSED COMMA ABOVE # @@ -3614,6 +3616,7 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S 222A ; 0055 ; MA #* ( ∪ → U ) UNION → LATIN CAPITAL LETTER U # →ᑌ→ 22C3 ; 0055 ; MA #* ( ⋃ → U ) N-ARY UNION → LATIN CAPITAL LETTER U # →∪→→ᑌ→ +1CCEA ; 0055 ; MA #* ( 𜳪 → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 1D414 ; 0055 ; MA # ( 𝐔 → U ) MATHEMATICAL BOLD CAPITAL U → LATIN CAPITAL LETTER U # 1D448 ; 0055 ; MA # ( 𝑈 → U ) MATHEMATICAL ITALIC CAPITAL U → LATIN CAPITAL LETTER U # 1D47C ; 0055 ; MA # ( 𝑼 → U ) MATHEMATICAL BOLD ITALIC CAPITAL U → LATIN CAPITAL LETTER U # @@ -3634,7 +3637,6 @@ AB52 ; 0075 ; MA # ( ꭒ → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S A4F4 ; 0055 ; MA # ( ꓴ → U ) LISU LETTER U → LATIN CAPITAL LETTER U # 16F42 ; 0055 ; MA # ( 𖽂 → U ) MIAO LETTER WA → LATIN CAPITAL LETTER U # 118B8 ; 0055 ; MA # ( 𑢸 → U ) WARANG CITI CAPITAL LETTER PU → LATIN CAPITAL LETTER U # -1CCEA ; 0055 ; MA #* ( 𜳪 → U ) OUTLINED LATIN CAPITAL LETTER U → LATIN CAPITAL LETTER U # 01D4 ; 016D ; MA # ( ǔ → ŭ ) LATIN SMALL LETTER U WITH CARON → LATIN SMALL LETTER U WITH BREVE # @@ -3699,6 +3701,7 @@ ABA9 ; 0076 ; MA # ( ꮩ → v ) CHEROKEE SMALL LETTER DO → LATIN SMALL LETTER 0667 ; 0056 ; MA # ( ‎٧‎ → V ) ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # 06F7 ; 0056 ; MA # ( ۷ → V ) EXTENDED ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # →‎٧‎→ 2164 ; 0056 ; MA # ( Ⅴ → V ) ROMAN NUMERAL FIVE → LATIN CAPITAL LETTER V # +1CCEB ; 0056 ; MA #* ( 𜳫 → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 1D415 ; 0056 ; MA # ( 𝐕 → V ) MATHEMATICAL BOLD CAPITAL V → LATIN CAPITAL LETTER V # 1D449 ; 0056 ; MA # ( 𝑉 → V ) MATHEMATICAL ITALIC CAPITAL V → LATIN CAPITAL LETTER V # 1D47D ; 0056 ; MA # ( 𝑽 → V ) MATHEMATICAL BOLD ITALIC CAPITAL V → LATIN CAPITAL LETTER V # @@ -3721,7 +3724,6 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 16F08 ; 0056 ; MA # ( 𖼈 → V ) MIAO LETTER VA → LATIN CAPITAL LETTER V # 118A0 ; 0056 ; MA # ( 𑢠 → V ) WARANG CITI CAPITAL LETTER NGAA → LATIN CAPITAL LETTER V # 1051D ; 0056 ; MA # ( 𐔝 → V ) ELBASAN LETTER TE → LATIN CAPITAL LETTER V # -1CCEB ; 0056 ; MA #* ( 𜳫 → V ) OUTLINED LATIN CAPITAL LETTER V → LATIN CAPITAL LETTER V # 10197 ; 0056 0335 ; MA #* ( 𐆗 → V̵ ) ROMAN QUINARIUS SIGN → LATIN CAPITAL LETTER V, COMBINING SHORT STROKE OVERLAY # →V̶→ @@ -3748,6 +3750,7 @@ A4E6 ; 0056 ; MA # ( ꓦ → V ) LISU LETTER HA → LATIN CAPITAL LETTER V # 0668 ; 0245 ; MA # ( ‎٨‎ → Ʌ ) ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →Λ→ 06F8 ; 0245 ; MA # ( ۸ → Ʌ ) EXTENDED ARABIC-INDIC DIGIT EIGHT → LATIN CAPITAL LETTER TURNED V # →‎٨‎→→Λ→ +A7DA ; 0245 ; MA # ( Ꟛ → Ʌ ) LATIN CAPITAL LETTER LAMBDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 039B ; 0245 ; MA # ( Λ → Ʌ ) GREEK CAPITAL LETTER LAMDA → LATIN CAPITAL LETTER TURNED V # 1D6B2 ; 0245 ; MA # ( 𝚲 → Ʌ ) MATHEMATICAL BOLD CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ 1D6EC ; 0245 ; MA # ( 𝛬 → Ʌ ) MATHEMATICAL ITALIC CAPITAL LAMDA → LATIN CAPITAL LETTER TURNED V # →Λ→ @@ -3763,6 +3766,8 @@ A4E5 ; 0245 ; MA # ( ꓥ → Ʌ ) LISU LETTER NGA → LATIN CAPITAL LETTER TURNE 16F3D ; 0245 ; MA # ( 𖼽 → Ʌ ) MIAO LETTER ZZA → LATIN CAPITAL LETTER TURNED V # 1028D ; 0245 ; MA # ( 𐊍 → Ʌ ) LYCIAN LETTER L → LATIN CAPITAL LETTER TURNED V # →Λ→ +A7DC ; 0245 0338 ; MA # ( Ƛ → Ʌ̸ ) LATIN CAPITAL LETTER LAMBDA WITH STROKE → LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ + 04C5 ; 0245 0326 ; MA # ( Ӆ → Ʌ̦ ) CYRILLIC CAPITAL LETTER EL WITH TAIL → LATIN CAPITAL LETTER TURNED V, COMBINING COMMA BELOW # →Л̡→ 143D ; 0245 00B7 ; MA # ( ᐽ → Ʌ· ) CANADIAN SYLLABICS WEST-CREE PWI → LATIN CAPITAL LETTER TURNED V, MIDDLE DOT # →ᐱᐧ→→ᐱ·→ @@ -3792,6 +3797,7 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 118EF ; 0057 ; MA #* ( 𑣯 → W ) WARANG CITI NUMBER SIXTY → LATIN CAPITAL LETTER W # 118E6 ; 0057 ; MA # ( 𑣦 → W ) WARANG CITI DIGIT SIX → LATIN CAPITAL LETTER W # +1CCEC ; 0057 ; MA #* ( 𜳬 → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 1D416 ; 0057 ; MA # ( 𝐖 → W ) MATHEMATICAL BOLD CAPITAL W → LATIN CAPITAL LETTER W # 1D44A ; 0057 ; MA # ( 𝑊 → W ) MATHEMATICAL ITALIC CAPITAL W → LATIN CAPITAL LETTER W # 1D47E ; 0057 ; MA # ( 𝑾 → W ) MATHEMATICAL BOLD ITALIC CAPITAL W → LATIN CAPITAL LETTER W # @@ -3809,7 +3815,6 @@ AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER 13B3 ; 0057 ; MA # ( Ꮃ → W ) CHEROKEE LETTER LA → LATIN CAPITAL LETTER W # 13D4 ; 0057 ; MA # ( Ꮤ → W ) CHEROKEE LETTER TA → LATIN CAPITAL LETTER W # A4EA ; 0057 ; MA # ( ꓪ → W ) LISU LETTER WA → LATIN CAPITAL LETTER W # -1CCEC ; 0057 ; MA #* ( 𜳬 → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W # 047D ; 0077 0486 0487 ; MA # ( ѽ → w҆҇ ) CYRILLIC SMALL LETTER OMEGA WITH TITLO → LATIN SMALL LETTER W, COMBINING CYRILLIC PSILI PNEUMATA, COMBINING CYRILLIC POKRYTIE # →ѡ҆҇→ @@ -3857,6 +3862,7 @@ FF58 ; 0078 ; MA # ( x → x ) FULLWIDTH LATIN SMALL LETTER X → LATIN SMALL 118EC ; 0058 ; MA #* ( 𑣬 → X ) WARANG CITI NUMBER THIRTY → LATIN CAPITAL LETTER X # FF38 ; 0058 ; MA # ( X → X ) FULLWIDTH LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # →Х→ 2169 ; 0058 ; MA # ( Ⅹ → X ) ROMAN NUMERAL TEN → LATIN CAPITAL LETTER X # +1CCED ; 0058 ; MA #* ( 𜳭 → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 1D417 ; 0058 ; MA # ( 𝐗 → X ) MATHEMATICAL BOLD CAPITAL X → LATIN CAPITAL LETTER X # 1D44B ; 0058 ; MA # ( 𝑋 → X ) MATHEMATICAL ITALIC CAPITAL X → LATIN CAPITAL LETTER X # 1D47F ; 0058 ; MA # ( 𝑿 → X ) MATHEMATICAL BOLD ITALIC CAPITAL X → LATIN CAPITAL LETTER X # @@ -3886,7 +3892,6 @@ A4EB ; 0058 ; MA # ( ꓫ → X ) LISU LETTER SHA → LATIN CAPITAL LETTER X # 102B4 ; 0058 ; MA # ( 𐊴 → X ) CARIAN LETTER X → LATIN CAPITAL LETTER X # 10317 ; 0058 ; MA # ( 𐌗 → X ) OLD ITALIC LETTER EKS → LATIN CAPITAL LETTER X # 10527 ; 0058 ; MA # ( 𐔧 → X ) ELBASAN LETTER KHE → LATIN CAPITAL LETTER X # -1CCED ; 0058 ; MA #* ( 𜳭 → X ) OUTLINED LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER X # 2A30 ; 0078 0307 ; MA #* ( ⨰ → ẋ ) MULTIPLICATION SIGN WITH DOT ABOVE → LATIN SMALL LETTER X, COMBINING DOT ABOVE # →×̇→ @@ -3934,6 +3939,7 @@ AB5A ; 0079 ; MA # ( ꭚ → y ) LATIN SMALL LETTER Y WITH SHORT RIGHT LEG → L 118DC ; 0079 ; MA # ( 𑣜 → y ) WARANG CITI SMALL LETTER HAR → LATIN SMALL LETTER Y # →ɣ→→γ→ FF39 ; 0059 ; MA # ( Y → Y ) FULLWIDTH LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # →Υ→ +1CCEE ; 0059 ; MA #* ( 𜳮 → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 1D418 ; 0059 ; MA # ( 𝐘 → Y ) MATHEMATICAL BOLD CAPITAL Y → LATIN CAPITAL LETTER Y # 1D44C ; 0059 ; MA # ( 𝑌 → Y ) MATHEMATICAL ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # 1D480 ; 0059 ; MA # ( 𝒀 → Y ) MATHEMATICAL BOLD ITALIC CAPITAL Y → LATIN CAPITAL LETTER Y # @@ -3963,7 +3969,6 @@ A4EC ; 0059 ; MA # ( ꓬ → Y ) LISU LETTER YA → LATIN CAPITAL LETTER Y # 16F43 ; 0059 ; MA # ( 𖽃 → Y ) MIAO LETTER AH → LATIN CAPITAL LETTER Y # 118A4 ; 0059 ; MA # ( 𑢤 → Y ) WARANG CITI CAPITAL LETTER YA → LATIN CAPITAL LETTER Y # 102B2 ; 0059 ; MA # ( 𐊲 → Y ) CARIAN LETTER U → LATIN CAPITAL LETTER Y # -1CCEE ; 0059 ; MA #* ( 𜳮 → Y ) OUTLINED LATIN CAPITAL LETTER Y → LATIN CAPITAL LETTER Y # 01B4 ; 0079 0314 ; MA # ( ƴ → y̔ ) LATIN SMALL LETTER Y WITH HOOK → LATIN SMALL LETTER Y, COMBINING REVERSED COMMA ABOVE # @@ -4002,6 +4007,7 @@ AB93 ; 007A ; MA # ( ꮓ → z ) CHEROKEE SMALL LETTER NO → LATIN SMALL LETTER FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # →Ζ→ 2124 ; 005A ; MA # ( ℤ → Z ) DOUBLE-STRUCK CAPITAL Z → LATIN CAPITAL LETTER Z # 2128 ; 005A ; MA # ( ℨ → Z ) BLACK-LETTER CAPITAL Z → LATIN CAPITAL LETTER Z # +1CCEF ; 005A ; MA #* ( 𜳯 → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 1D419 ; 005A ; MA # ( 𝐙 → Z ) MATHEMATICAL BOLD CAPITAL Z → LATIN CAPITAL LETTER Z # 1D44D ; 005A ; MA # ( 𝑍 → Z ) MATHEMATICAL ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # 1D481 ; 005A ; MA # ( 𝒁 → Z ) MATHEMATICAL BOLD ITALIC CAPITAL Z → LATIN CAPITAL LETTER Z # @@ -4022,7 +4028,6 @@ FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPI 13C3 ; 005A ; MA # ( Ꮓ → Z ) CHEROKEE LETTER NO → LATIN CAPITAL LETTER Z # A4DC ; 005A ; MA # ( ꓜ → Z ) LISU LETTER DZA → LATIN CAPITAL LETTER Z # 118A9 ; 005A ; MA # ( 𑢩 → Z ) WARANG CITI CAPITAL LETTER O → LATIN CAPITAL LETTER Z # -1CCEF ; 005A ; MA #* ( 𜳯 → Z ) OUTLINED LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # 0290 ; 007A 0328 ; MA # ( ʐ → z̨ ) LATIN SMALL LETTER Z WITH RETROFLEX HOOK → LATIN SMALL LETTER Z, COMBINING OGONEK # →z̢→ @@ -4115,6 +4120,7 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2CE4 ; 03D7 ; MA # ( ⳤ → ϗ ) COPTIC SYMBOL KAI → GREEK KAI SYMBOL # +A7DB ; 03BB ; MA # ( ꟛ → λ ) LATIN SMALL LETTER LAMBDA → GREEK SMALL LETTER LAMDA # 1D6CC ; 03BB ; MA # ( 𝛌 → λ ) MATHEMATICAL BOLD SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D706 ; 03BB ; MA # ( 𝜆 → λ ) MATHEMATICAL ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # 1D740 ; 03BB ; MA # ( 𝝀 → λ ) MATHEMATICAL BOLD ITALIC SMALL LAMDA → GREEK SMALL LETTER LAMDA # @@ -4123,6 +4129,8 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN 2C96 ; 03BB ; MA # ( Ⲗ → λ ) COPTIC CAPITAL LETTER LAULA → GREEK SMALL LETTER LAMDA # 104DB ; 03BB ; MA # ( 𐓛 → λ ) OSAGE SMALL LETTER AH → GREEK SMALL LETTER LAMDA # +019B ; 03BB 0338 ; MA # ( ƛ → λ̸ ) LATIN SMALL LETTER LAMBDA WITH STROKE → GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ + 00B5 ; 03BC ; MA # ( µ → μ ) MICRO SIGN → GREEK SMALL LETTER MU # 1D6CD ; 03BC ; MA # ( 𝛍 → μ ) MATHEMATICAL BOLD SMALL MU → GREEK SMALL LETTER MU # 1D707 ; 03BC ; MA # ( 𝜇 → μ ) MATHEMATICAL ITALIC SMALL MU → GREEK SMALL LETTER MU # @@ -5982,6 +5990,8 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 11CB2 ; 11CAA ; MA # ( 𑲲 → 𑲪 ) MARCHEN VOWEL SIGN U → MARCHEN SUBJOINED LETTER RA # +1734 ; 1715 ; MA # ( ᜴ → ᜕ ) HANUNOO SIGN PAMUDPOD → TAGALOG SIGN PAMUDPOD # + 1081 ; 1002 103E ; MA # ( ႁ → ဂှ ) MYANMAR LETTER SHAN HA → MYANMAR LETTER GA, MYANMAR CONSONANT SIGN MEDIAL HA # 1000 ; 1002 102C ; MA # ( က → ဂာ ) MYANMAR LETTER KA → MYANMAR LETTER GA, MYANMAR VOWEL SIGN AA # @@ -7221,6 +7231,7 @@ FA31 ; 50E7 ; MA # ( 僧 → 僧 ) CJK COMPATIBILITY IDEOGRAPH-FA31 → CJK UNIF 2F80C ; 349E ; MA # ( 㒞 → 㒞 ) CJK COMPATIBILITY IDEOGRAPH-2F80C → CJK UNIFIED IDEOGRAPH-349E # +3126 ; 513F ; MA # ( ㄦ → 儿 ) BOPOMOFO LETTER ER → CJK UNIFIED IDEOGRAPH-513F # 2F09 ; 513F ; MA #* ( ⼉ → 儿 ) KANGXI RADICAL LEGS → CJK UNIFIED IDEOGRAPH-513F # FA0C ; 5140 ; MA # ( 兀 → 兀 ) CJK COMPATIBILITY IDEOGRAPH-FA0C → CJK UNIFIED IDEOGRAPH-5140 # @@ -7936,16 +7947,16 @@ FA8D ; 63C4 ; MA # ( 揄 → 揄 ) CJK COMPATIBILITY IDEOGRAPH-FA8D → CJK UNIF 2F8BD ; 63E4 ; MA # ( 揤 → 揤 ) CJK COMPATIBILITY IDEOGRAPH-2F8BD → CJK UNIFIED IDEOGRAPH-63E4 # -FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # +FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # 2F8BE ; 22BF1 ; MA # ( 𢯱 → 𢯱 ) CJK COMPATIBILITY IDEOGRAPH-2F8BE → CJK UNIFIED IDEOGRAPH-22BF1 # -FA8E ; 641C ; MA # ( 搜 → 搜 ) CJK COMPATIBILITY IDEOGRAPH-FA8E → CJK UNIFIED IDEOGRAPH-641C # - 2F8BF ; 6422 ; MA # ( 搢 → 搢 ) CJK COMPATIBILITY IDEOGRAPH-2F8BF → CJK UNIFIED IDEOGRAPH-6422 # 2F8C0 ; 63C5 ; MA # ( 揅 → 揅 ) CJK COMPATIBILITY IDEOGRAPH-2F8C0 → CJK UNIFIED IDEOGRAPH-63C5 # +FA8F ; 6452 ; MA # ( 摒 → 摒 ) CJK COMPATIBILITY IDEOGRAPH-FA8F → CJK UNIFIED IDEOGRAPH-6452 # + 2F8C3 ; 6469 ; MA # ( 摩 → 摩 ) CJK COMPATIBILITY IDEOGRAPH-2F8C3 → CJK UNIFIED IDEOGRAPH-6469 # 2F8C6 ; 6477 ; MA # ( 摷 → 摷 ) CJK COMPATIBILITY IDEOGRAPH-2F8C6 → CJK UNIFIED IDEOGRAPH-6477 # @@ -9670,5 +9681,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF 2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 # -# total: 6347 +# total: 6355 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index a35691149..093100c4b 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2024-05-03, 03:26:41 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,7 +91,7 @@ ← (‎ ʽ ‎) 02BD MODIFIER LETTER REVERSED COMMA # →‘→ ← (‎ ʾ ‎) 02BE MODIFIER LETTER RIGHT HALF RING # →ʼ→→′→ ← (‎ ˈ ‎) 02C8 MODIFIER LETTER VERTICAL LINE -← (‎ ˊ ‎) 02CA MODIFIER LETTER ACUTE ACCENT # →ʹ→→′→ +← (‎ ˊ ‎) 02CA MODIFIER LETTER ACUTE ACCENT # →΄→→ʹ→ ← (‎ ˋ ‎) 02CB MODIFIER LETTER GRAVE ACCENT # →`→→‘→ ← (‎ ߴ ‎) 07F4 NKO HIGH TONE APOSTROPHE # →’→ ← (‎ ߵ ‎) 07F5 NKO LOW TONE APOSTROPHE # →‘→ @@ -4925,8 +4925,10 @@ ← (‎ Ϸ ‎) 03F7 GREEK CAPITAL LETTER SHO ← (‎ 𐓄 ‎) 104C4 OSAGE CAPITAL LETTER PA -# ß β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 +# ß Ꟗ ẞ β Ᏸ ꞵ ϐ 𝛃 𝛽 𝜷 𝝱 𝞫 (‎ ß ‎) 00DF LATIN SMALL LETTER SHARP S +← (‎ Ꟗ ‎) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← (‎ ẞ ‎) 1E9E LATIN CAPITAL LETTER SHARP S ← (‎ β ‎) 03B2 GREEK SMALL LETTER BETA ← (‎ Ᏸ ‎) 13F0 CHEROKEE LETTER YE # →β→ ← (‎ ꞵ ‎) A7B5 LATIN SMALL LETTER BETA # →β→ @@ -5095,6 +5097,11 @@ ← (‎ 𝈡 ‎) 1D221 GREEK INSTRUMENTAL NOTATION SYMBOL-7 ← (‎ ℇ ‎) 2107 EULER CONSTANT +# λ̸ λ̷ ƛ + (‎ ƛ ‎) 019B LATIN SMALL LETTER LAMBDA WITH STROKE +← (‎ λ̸ ‎) 03BB 0338 GREEK SMALL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →λ̷→ +← (‎ λ̷ ‎) 03BB 0337 GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY + # ƨ ᴤ ϩ ꙅ (‎ ƨ ‎) 01A8 LATIN SMALL LETTER TONE TWO ← (‎ ᴤ ‎) 1D24 LATIN LETTER VOICED LARYNGEAL SPIRANT @@ -5165,8 +5172,9 @@ (‎ ɂ ‎) 0242 LATIN SMALL LETTER GLOTTAL STOP ← (‎ ꭾ ‎) AB7E CHEROKEE SMALL LETTER HE -# Ʌ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 +# Ʌ Ꟛ ٨ ۸ Λ Л ᐱ ⴷ ꓥ ꛎ 𐊍 𖼽 𐒰 𝚲 𝛬 𝜦 𝝠 𝞚 (‎ Ʌ ‎) 0245 LATIN CAPITAL LETTER TURNED V +← (‎ Ꟛ ‎) A7DA LATIN CAPITAL LETTER LAMBDA # →Λ→ ← (‎ ٨ ‎) 0668 ARABIC-INDIC DIGIT EIGHT # →Λ→ ← (‎ ۸ ‎) 06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT # →‎٨‎→→Λ→ ← (‎ Λ ‎) 039B GREEK CAPITAL LETTER LAMDA @@ -5197,6 +5205,13 @@ ← (‎ Л̡ ‎) 041B 0321 CYRILLIC CAPITAL LETTER EL, COMBINING PALATALIZED HOOK BELOW ← (‎ Ӆ ‎) 04C5 CYRILLIC CAPITAL LETTER EL WITH TAIL # →Л̡→ +# Ꟛ̸ Ʌ̸ Λ̸ Λ̷ Ƛ + (‎ Ʌ̸ ‎) 0245 0338 LATIN CAPITAL LETTER TURNED V, COMBINING LONG SOLIDUS OVERLAY +← (‎ Ꟛ̸ ‎) A7DA 0338 LATIN CAPITAL LETTER LAMBDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← (‎ Λ̸ ‎) 039B 0338 GREEK CAPITAL LETTER LAMDA, COMBINING LONG SOLIDUS OVERLAY # →Λ̷→ +← (‎ Λ̷ ‎) 039B 0337 GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY +← (‎ Ƛ ‎) A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE # →Λ̷→ + # ɋ ᶐ (‎ ɋ ‎) 024B LATIN SMALL LETTER Q WITH HOOK TAIL ← (‎ ᶐ ‎) 1D90 LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK @@ -5868,8 +5883,9 @@ ← (‎ 𝝵 ‎) 1D775 MATHEMATICAL SANS-SERIF BOLD SMALL ZETA ← (‎ 𝞯 ‎) 1D7AF MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ZETA -# λ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 +# λ ꟛ Ⲗ 𐓛 𝛌 𝜆 𝝀 𝝺 𝞴 (‎ λ ‎) 03BB GREEK SMALL LETTER LAMDA +← (‎ ꟛ ‎) A7DB LATIN SMALL LETTER LAMBDA ← (‎ Ⲗ ‎) 2C96 COPTIC CAPITAL LETTER LAULA ← (‎ 𐓛 ‎) 104DB OSAGE SMALL LETTER AH ← (‎ 𝛌 ‎) 1D6CC MATHEMATICAL BOLD SMALL LAMDA @@ -11373,6 +11389,10 @@ (‎ ᛯ ‎) 16EF RUNIC TVIMADUR SYMBOL ← (‎ ⵣ ‎) 2D63 TIFINAGH LETTER YAZ +# ᜕ ᜴ + (‎ ᜕ ‎) 1715 TAGALOG SIGN PAMUDPOD +← (‎ ᜴ ‎) 1734 HANUNOO SIGN PAMUDPOD + # អ ឣ (‎ អ ‎) 17A2 KHMER LETTER QA ← (‎ ឣ ‎) 17A3 KHMER INDEPENDENT VOWEL QAQ @@ -12322,9 +12342,10 @@ (‎ ⼈ ‎) 2F08 KANGXI RADICAL MAN ← (‎ 人 ‎) 4EBA CJK UNIFIED IDEOGRAPH-4EBA -# 儿 ⼉ +# 儿 ㄦ ⼉ (‎ ⼉ ‎) 2F09 KANGXI RADICAL LEGS ← (‎ 儿 ‎) 513F CJK UNIFIED IDEOGRAPH-513F +← (‎ ㄦ ‎) 3126 BOPOMOFO LETTER ER # →儿→ # 入 ⼊ (‎ ⼊ ‎) 2F0A KANGXI RADICAL ENTER @@ -17245,5 +17266,5 @@ (‎ 𪘀 ‎) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← (‎ 𪘀 ‎) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7290 +# total : 7302 diff --git a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt index 5435b8bff..6bd3611df 100644 --- a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt +++ b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt @@ -1,5 +1,5 @@ # confusablesSummaryIdentifier.txt -# Date: 2024-05-04, 21:31:06 GMT +# Date: 2024-05-31, 21:12:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -542,8 +542,10 @@ (‎ Ö ‎) 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS ← (‎ Ő ‎) 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -# ß β +# ß Ꟗ ẞ β (‎ ß ‎) 00DF LATIN SMALL LETTER SHARP S +← (‎ Ꟗ ‎) A7D6 LATIN CAPITAL LETTER MIDDLE SCOTS S # →β→ +← (‎ ẞ ‎) 1E9E LATIN CAPITAL LETTER SHARP S ← (‎ β ‎) 03B2 GREEK SMALL LETTER BETA # å ȧ @@ -1618,6 +1620,10 @@ (‎ 二 ‎) 4E8C CJK UNIFIED IDEOGRAPH-4E8C ← (‎ ニ ‎) 30CB KATAKANA LETTER NI +# 儿 ㄦ + (‎ 儿 ‎) 513F CJK UNIFIED IDEOGRAPH-513F +← (‎ ㄦ ‎) 3126 BOPOMOFO LETTER ER + # 八 ハ (‎ 八 ‎) 516B CJK UNIFIED IDEOGRAPH-516B ← (‎ ハ ‎) 30CF KATAKANA LETTER HA @@ -1839,5 +1845,5 @@ (‎ 鹂 ‎) 9E42 CJK UNIFIED IDEOGRAPH-9E42 ← (‎ 鹃 ‎) 9E43 CJK UNIFIED IDEOGRAPH-9E43 -# total : 635 +# total : 638 diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 65804747e..e4c47a779 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -1,4 +1,17 @@ -0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK +# See https://github.com/unicode-org/unicodetools/blob/main/docs/security.md for how to use this file. +# The format is +# Source ; Target ; comments # comments +# Source is: +# - a hex code point +# - a literal character +# - a range of the above with .. (need to check this) +# - a UnicodeSet +# Target is: +# - a hex code point +# - a literal character +# - a sequence of hex code points and or literal characters (they can be mixed) +####### +0021 ; 01C3 # ( ! → ǃ) EXCLAMATION MARK → LATIN LETTER RETROFLEX CLICK 0022 ; 02BA # ( " → ʺ) QUOTATION MARK → MODIFIER LETTER DOUBLE PRIME 0022 ; 0027 0027 0022 ; 05F4 # ( " → ״) QUOTATION MARK → HEBREW PUNCTUATION GERSHAYIM @@ -5437,4 +5450,26 @@ ABBB; 0473; V8_0; ꮻ => ѳ; CHEROKEE SMALL LETTER WI => CYRILLIC SMALL LETTER F 1F16E ; C 20E0 ; V11_0 ; CIRCLED C WITH OVERLAID BACKSLASH # 1F16F ; 🚹 ; V11_0 ; CIRCLED HUMAN FIGURE +# 178-A76 — Section 21 of document L2/24-012 +513F ; 儿 # V16.0 ; U+513F ︎➡︎ U+16FF2 +16FF3 ; 兒 # V16.0 ; U+5152 ➡ U+16FF3 +ㄦ ; 儿 # V16.0 ; U+3126 ㄦ BOPOMOFO LETTER ER ➡ 儿 + +# 176-A116 — Section 2a of L2/23-164 + +A7DA ; Λ # V16.0 ; U+A7DA LATIN CAPITAL LETTER LAMBDA ➡ greek equiv +A7DB ; λ # V16.0 ; U+A7DB LATIN SMALL LETTER LAMBDA ➡ greek equiv +A7DC ; Λ 0337 # V16.0 ; U+A7DC LATIN CAPITAL LETTER LAMBDA WITH STROKE ➡ greek equiv +ƛ ; λ 0337 # V16.0 ; existing Latin variant + +# 165-A37 — L2/20-272 + +1715 ; 1734 # V16.0 ; U+1715 TAGALOG SIGN PAMUDPOD ➡ 1734, Hanunoo Sign Pamudpod + +# 166-A55 — Section 3n of L2/21-016R + +ß ; β # sharp S with beta +ẞ ; ß # sharp S upper/lower +A7D6 ; β # Middle Scots S, uppercase +A7D6 ; β # Middle Scots S, lowercase diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index b7475ec78..216c1689e 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2024-05-03, 03:26:38 GMT +# Date: 2024-05-31, 21:12:51 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -899,6 +899,9 @@ 00DE ; 104C4 # ( Þ ~ 𐓄 ) LATIN CAPITAL LETTER THORN ~ OSAGE CAPITAL LETTER PA +00DF ; 1E9E # ( ß ~ ẞ ) LATIN SMALL LETTER SHARP S ~ LATIN CAPITAL LETTER SHARP S +00DF ; 03B2 # ( ß ~ β ) LATIN SMALL LETTER SHARP S ~ GREEK SMALL LETTER BETA + 00E5 ; 0227 # ( å ~ ȧ ) LATIN SMALL LETTER A WITH RING ABOVE ~ LATIN SMALL LETTER A WITH DOT ABOVE 00F0 ; 1E8CD # ( ð ~ ‎𞣍‎ ) LATIN SMALL LETTER ETH ~ MENDE KIKAKUI DIGIT SEVEN @@ -1295,6 +1298,8 @@ 039B ; A6CE # ( Λ ~ ꛎ ) GREEK CAPITAL LETTER LAMDA ~ BAMUM LETTER MI 039B ; 1028D # ( Λ ~ 𐊍 ) GREEK CAPITAL LETTER LAMDA ~ LYCIAN LETTER L +039B 0337 ; A7DC # ( Λ̷ ~ Ƛ ) GREEK CAPITAL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN CAPITAL LETTER LAMBDA WITH STROKE + 039C ; 041C # ( Μ ~ М ) GREEK CAPITAL LETTER MU ~ CYRILLIC CAPITAL LETTER EM 039C ; 216F # ( Μ ~ Ⅿ ) GREEK CAPITAL LETTER MU ~ ROMAN NUMERAL ONE THOUSAND @@ -1354,8 +1359,11 @@ 03BA ; 043A # ( κ ~ к ) GREEK SMALL LETTER KAPPA ~ CYRILLIC SMALL LETTER KA +03BB ; A7DB # ( λ ~ ꟛ ) GREEK SMALL LETTER LAMDA ~ LATIN SMALL LETTER LAMBDA 03BB ; 104DB # ( λ ~ 𐓛 ) GREEK SMALL LETTER LAMDA ~ OSAGE SMALL LETTER AH +03BB 0337 ; 019B # ( λ̷ ~ ƛ ) GREEK SMALL LETTER LAMDA, COMBINING SHORT SOLIDUS OVERLAY ~ LATIN SMALL LETTER LAMBDA WITH STROKE + 03BD ; 2174 # ( ν ~ ⅴ ) GREEK SMALL LETTER NU ~ SMALL ROMAN NUMERAL FIVE 03BF ; 043E # ( ο ~ о ) GREEK SMALL LETTER OMICRON ~ CYRILLIC SMALL LETTER O @@ -3599,6 +3607,8 @@ 16EF ; 2D63 # ( ᛯ ~ ⵣ ) RUNIC TVIMADUR SYMBOL ~ TIFINAGH LETTER YAZ +1715 ; 1734 # ( ᜕ ~ ᜴ ) TAGALOG SIGN PAMUDPOD ~ HANUNOO SIGN PAMUDPOD + 17A2 ; 17A3 # ( អ ~ ឣ ) KHMER LETTER QA ~ KHMER INDEPENDENT VOWEL QAQ 185C ; 1896 # ( ᡜ ~ ᢖ ) MONGOLIAN LETTER TODO DZA ~ MONGOLIAN LETTER ALI GALI ZA @@ -4030,6 +4040,8 @@ 5024 ; 503C # ( 値 ~ 值 ) CJK UNIFIED IDEOGRAPH-5024 ~ CJK UNIFIED IDEOGRAPH-503C +513F ; 3126 # ( 儿 ~ ㄦ ) CJK UNIFIED IDEOGRAPH-513F ~ BOPOMOFO LETTER ER + 5553 ; 555F # ( 啓 ~ 啟 ) CJK UNIFIED IDEOGRAPH-5553 ~ CJK UNIFIED IDEOGRAPH-555F 5861 ; 586B # ( 塡 ~ 填 ) CJK UNIFIED IDEOGRAPH-5861 ~ CJK UNIFIED IDEOGRAPH-586B @@ -4158,6 +4170,10 @@ A792 ; 0404 # ( Ꞓ ~ Є ) LATIN CAPITAL LETTER C WITH BAR ~ CYRILLIC CAPITAL LE A793 ; 0454 # ( ꞓ ~ є ) LATIN SMALL LETTER C WITH BAR ~ CYRILLIC SMALL LETTER UKRAINIAN IE +A7D6 ; 03B2 # ( Ꟗ ~ β ) LATIN CAPITAL LETTER MIDDLE SCOTS S ~ GREEK SMALL LETTER BETA + +A7DA ; 039B # ( Ꟛ ~ Λ ) LATIN CAPITAL LETTER LAMBDA ~ GREEK CAPITAL LETTER LAMDA + A7FB ; 15B7 # ( ꟻ ~ ᖷ ) LATIN EPIGRAPHIC LETTER REVERSED F ~ CANADIAN SYLLABICS BLACKFOOT WA A7FB ; 1D230 # ( ꟻ ~ 𝈰 ) LATIN EPIGRAPHIC LETTER REVERSED F ~ GREEK INSTRUMENTAL NOTATION SYMBOL-30 diff --git a/unicodetools/data/uca/dev/CollationTest.html b/unicodetools/data/uca/dev/CollationTest.html index f204d01c4..82e72214a 100644 --- a/unicodetools/data/uca/dev/CollationTest.html +++ b/unicodetools/data/uca/dev/CollationTest.html @@ -91,7 +91,7 @@

Testing

If there are any errors, then the UCA implementation is not compliant.

These files contain test cases that include ill-formed strings, with surrogate code points. Implementations that do not weight surrogate code points the same way as reserved code points - may filter out such lines lines in the test cases, before testing for conformance.

+ may filter out such lines in the test cases, before testing for conformance.